]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs4.patch
- up to 4.1.15
[packages/kernel.git] / kernel-aufs4.patch
CommitLineData
ab036dbd 1aufs4.1 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
5527c038 4index 011f433..b1083f6 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
5527c038
JR
7@@ -218,6 +218,7 @@ source "fs/pstore/Kconfig"
8 source "fs/sysv/Kconfig"
7e9cd9fe 9 source "fs/ufs/Kconfig"
7f207e10
AM
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
5527c038 16index cb92fd4..8c2df12 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
5527c038 19@@ -127,3 +127,4 @@ obj-y += exofs/ # Multiple modules
7f207e10 20 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 21 obj-$(CONFIG_PSTORE) += pstore/
c06a8ce3 22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
86dc4139 23+obj-$(CONFIG_AUFS_FS) += aufs/
c06a8ce3 24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
5527c038 25index 1a0006a..ddad01a 100644
c06a8ce3
AM
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
5527c038 28@@ -59,6 +59,7 @@ header-y += atmsvc.h
03673fb0
JR
29 header-y += atm_tcp.h
30 header-y += atm_zatm.h
c06a8ce3
AM
31 header-y += audit.h
32+header-y += aufs_type.h
c06a8ce3 33 header-y += auto_fs4.h
03673fb0 34 header-y += auto_fs.h
c06a8ce3 35 header-y += auxvec.h
ab036dbd 36aufs4.1 base patch
7f207e10 37
c1595e42 38diff --git a/MAINTAINERS b/MAINTAINERS
5527c038 39index d8afd29..feac5ea 100644
c1595e42
JR
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
5527c038 42@@ -1880,6 +1880,19 @@ F: include/linux/audit.h
c1595e42
JR
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
5527c038 51+T: git://github.com/sfjro/aufs4-linux.git
c1595e42
JR
52+S: Supported
53+F: Documentation/filesystems/aufs/
54+F: Documentation/ABI/testing/debugfs-aufs
55+F: Documentation/ABI/testing/sysfs-aufs
56+F: fs/aufs/
57+F: include/uapi/linux/aufs_type.h
58+
59 AUXILIARY DISPLAY DRIVERS
60 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
61 W: http://miguelojeda.es/auxdisplay.htm
392086de 62diff --git a/drivers/block/loop.c b/drivers/block/loop.c
5527c038 63index d7173cb..0160952 100644
392086de
AM
64--- a/drivers/block/loop.c
65+++ b/drivers/block/loop.c
5527c038 66@@ -540,6 +540,24 @@ static inline int is_loop_device(struct file *file)
392086de
AM
67 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
68 }
69
70+/*
71+ * for AUFS
72+ * no get/put for file.
73+ */
74+struct file *loop_backing_file(struct super_block *sb)
75+{
76+ struct file *ret;
77+ struct loop_device *l;
78+
79+ ret = NULL;
80+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
81+ l = sb->s_bdev->bd_disk->private_data;
82+ ret = l->lo_backing_file;
83+ }
84+ return ret;
85+}
86+EXPORT_SYMBOL(loop_backing_file);
87+
88 /* loop sysfs attributes */
89
90 static ssize_t loop_attr_show(struct device *dev, char *page,
c1595e42 91diff --git a/fs/dcache.c b/fs/dcache.c
5527c038 92index 37b5afd..bc261e2 100644
c1595e42
JR
93--- a/fs/dcache.c
94+++ b/fs/dcache.c
5527c038 95@@ -1164,7 +1164,7 @@ enum d_walk_ret {
c1595e42
JR
96 *
97 * The @enter() and @finish() callbacks are called with d_lock held.
98 */
99-static void d_walk(struct dentry *parent, void *data,
100+void d_walk(struct dentry *parent, void *data,
101 enum d_walk_ret (*enter)(void *, struct dentry *),
102 void (*finish)(void *))
103 {
5527c038
JR
104diff --git a/fs/read_write.c b/fs/read_write.c
105index 819ef3f..fd0414e 100644
106--- a/fs/read_write.c
107+++ b/fs/read_write.c
108@@ -494,6 +494,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
109 }
110 EXPORT_SYMBOL(__vfs_write);
111
112+vfs_readf_t vfs_readf(struct file *file)
113+{
114+ const struct file_operations *fop = file->f_op;
115+
116+ if (fop->read)
117+ return fop->read;
118+ if (fop->read_iter)
119+ return new_sync_read;
120+ return ERR_PTR(-ENOSYS);
121+}
122+
123+vfs_writef_t vfs_writef(struct file *file)
124+{
125+ const struct file_operations *fop = file->f_op;
126+
127+ if (fop->write)
128+ return fop->write;
129+ if (fop->write_iter)
130+ return new_sync_write;
131+ return ERR_PTR(-ENOSYS);
132+}
133+
134 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
135 {
136 mm_segment_t old_fs;
7f207e10 137diff --git a/fs/splice.c b/fs/splice.c
5527c038 138index bfe62ae..fa5eee5 100644
7f207e10
AM
139--- a/fs/splice.c
140+++ b/fs/splice.c
5527c038 141@@ -1101,8 +1101,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
142 /*
143 * Attempt to initiate a splice from pipe to file.
144 */
145-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
146- loff_t *ppos, size_t len, unsigned int flags)
147+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
148+ loff_t *ppos, size_t len, unsigned int flags)
149 {
150 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
151 loff_t *, size_t, unsigned int);
5527c038 152@@ -1118,9 +1118,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
153 /*
154 * Attempt to initiate a splice from a file to a pipe.
155 */
156-static long do_splice_to(struct file *in, loff_t *ppos,
157- struct pipe_inode_info *pipe, size_t len,
158- unsigned int flags)
159+long do_splice_to(struct file *in, loff_t *ppos,
160+ struct pipe_inode_info *pipe, size_t len,
161+ unsigned int flags)
162 {
163 ssize_t (*splice_read)(struct file *, loff_t *,
164 struct pipe_inode_info *, size_t, unsigned int);
b912730e
AM
165diff --git a/include/linux/file.h b/include/linux/file.h
166index f87d308..9a290b3 100644
167--- a/include/linux/file.h
168+++ b/include/linux/file.h
169@@ -19,6 +19,7 @@ struct dentry;
170 struct path;
171 extern struct file *alloc_file(struct path *, fmode_t mode,
172 const struct file_operations *fop);
173+extern struct file *get_empty_filp(void);
174
175 static inline void fput_light(struct file *file, int fput_needed)
176 {
5527c038
JR
177diff --git a/include/linux/fs.h b/include/linux/fs.h
178index 35ec87e..3229f97 100644
179--- a/include/linux/fs.h
180+++ b/include/linux/fs.h
181@@ -1649,6 +1649,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
182 struct iovec *fast_pointer,
183 struct iovec **ret_pointer);
184
185+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
186+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
187+ loff_t *);
188+vfs_readf_t vfs_readf(struct file *file);
189+vfs_writef_t vfs_writef(struct file *file);
190+
191 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
192 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
193 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1e00d052 194diff --git a/include/linux/splice.h b/include/linux/splice.h
076b876e 195index da2751d..2e0fca6 100644
1e00d052
AM
196--- a/include/linux/splice.h
197+++ b/include/linux/splice.h
076b876e 198@@ -83,4 +83,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
4b3da204
AM
199 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
200
201 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
1e00d052
AM
202+
203+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
204+ loff_t *ppos, size_t len, unsigned int flags);
205+extern long do_splice_to(struct file *in, loff_t *ppos,
206+ struct pipe_inode_info *pipe, size_t len,
207+ unsigned int flags);
208 #endif
ab036dbd 209aufs4.1 mmap patch
fb47a38f
JR
210
211diff --git a/fs/buffer.c b/fs/buffer.c
5527c038 212index c7a5602..8c50a22 100644
fb47a38f
JR
213--- a/fs/buffer.c
214+++ b/fs/buffer.c
c1595e42 215@@ -2450,7 +2450,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
fb47a38f
JR
216 * Update file times before taking page lock. We may end up failing the
217 * fault so this update may be superfluous but who really cares...
218 */
219- file_update_time(vma->vm_file);
220+ vma_file_update_time(vma);
221
222 ret = __block_page_mkwrite(vma, vmf, get_block);
223 sb_end_pagefault(sb);
c1595e42 224diff --git a/fs/proc/base.c b/fs/proc/base.c
5527c038 225index 093ca14..fc1ac03 100644
c1595e42
JR
226--- a/fs/proc/base.c
227+++ b/fs/proc/base.c
5527c038 228@@ -1744,7 +1744,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
c1595e42
JR
229 down_read(&mm->mmap_sem);
230 vma = find_exact_vma(mm, vm_start, vm_end);
231 if (vma && vma->vm_file) {
232- *path = vma->vm_file->f_path;
233+ *path = vma_pr_or_file(vma)->f_path;
234 path_get(path);
235 rc = 0;
236 }
fb47a38f 237diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
076b876e 238index d4a3574..1397181 100644
fb47a38f
JR
239--- a/fs/proc/nommu.c
240+++ b/fs/proc/nommu.c
076b876e 241@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
fb47a38f
JR
242 file = region->vm_file;
243
244 if (file) {
245- struct inode *inode = file_inode(region->vm_file);
246+ struct inode *inode;
076b876e 247+
fb47a38f
JR
248+ file = vmr_pr_or_file(region);
249+ inode = file_inode(file);
250 dev = inode->i_sb->s_dev;
251 ino = inode->i_ino;
252 }
253diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
7e9cd9fe 254index 6dee68d..9afa35d 100644
fb47a38f
JR
255--- a/fs/proc/task_mmu.c
256+++ b/fs/proc/task_mmu.c
7e9cd9fe 257@@ -279,7 +279,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
fb47a38f
JR
258 const char *name = NULL;
259
260 if (file) {
261- struct inode *inode = file_inode(vma->vm_file);
262+ struct inode *inode;
076b876e 263+
fb47a38f
JR
264+ file = vma_pr_or_file(vma);
265+ inode = file_inode(file);
266 dev = inode->i_sb->s_dev;
267 ino = inode->i_ino;
268 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
7e9cd9fe 269@@ -1479,7 +1482,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
076b876e
AM
270 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
271 struct vm_area_struct *vma = v;
272 struct numa_maps *md = &numa_priv->md;
273- struct file *file = vma->vm_file;
274+ struct file *file = vma_pr_or_file(vma);
076b876e 275 struct mm_struct *mm = vma->vm_mm;
7e9cd9fe
AM
276 struct mm_walk walk = {
277 .hugetlb_entry = gather_hugetlb_stats,
fb47a38f 278diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
b912730e 279index 599ec2e..1740207 100644
fb47a38f
JR
280--- a/fs/proc/task_nommu.c
281+++ b/fs/proc/task_nommu.c
c1595e42 282@@ -160,7 +160,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
fb47a38f
JR
283 file = vma->vm_file;
284
285 if (file) {
286- struct inode *inode = file_inode(vma->vm_file);
287+ struct inode *inode;
076b876e 288+
b912730e 289+ file = vma_pr_or_file(vma);
fb47a38f
JR
290+ inode = file_inode(file);
291 dev = inode->i_sb->s_dev;
292 ino = inode->i_ino;
293 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
294diff --git a/include/linux/mm.h b/include/linux/mm.h
5527c038 295index 0755b9f..2ee5500 100644
fb47a38f
JR
296--- a/include/linux/mm.h
297+++ b/include/linux/mm.h
5527c038 298@@ -1172,6 +1172,28 @@ static inline int fixup_user_fault(struct task_struct *tsk,
fb47a38f
JR
299 }
300 #endif
301
076b876e
AM
302+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
303+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
304+ int);
305+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
306+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
fb47a38f 307+
fb47a38f
JR
308+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
309+ __LINE__)
310+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
311+ __LINE__)
312+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
313+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
b912730e
AM
314+
315+#ifndef CONFIG_MMU
076b876e
AM
316+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
317+extern void vmr_do_fput(struct vm_region *, const char[], int);
318+
319+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
320+ __LINE__)
321+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
b912730e 322+#endif /* !CONFIG_MMU */
fb47a38f
JR
323+
324 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
325 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
326 void *buf, int len, int write);
327diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
5527c038 328index 8d37e26..ce89d4c 100644
fb47a38f
JR
329--- a/include/linux/mm_types.h
330+++ b/include/linux/mm_types.h
7e9cd9fe 331@@ -241,6 +241,7 @@ struct vm_region {
fb47a38f
JR
332 unsigned long vm_top; /* region allocated to here */
333 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
334 struct file *vm_file; /* the backing file or NULL */
335+ struct file *vm_prfile; /* the virtual backing file or NULL */
336
337 int vm_usage; /* region usage count (access under nommu_region_sem) */
338 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
7e9cd9fe 339@@ -305,6 +306,7 @@ struct vm_area_struct {
fb47a38f
JR
340 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
341 units, *not* PAGE_CACHE_SIZE */
342 struct file * vm_file; /* File we map to (can be NULL). */
343+ struct file *vm_prfile; /* shadow of vm_file */
344 void * vm_private_data; /* was vm_pte (shared mem) */
345
346 #ifndef CONFIG_MMU
347diff --git a/kernel/fork.c b/kernel/fork.c
5527c038 348index 03c1eaa..7e215ba 100644
fb47a38f
JR
349--- a/kernel/fork.c
350+++ b/kernel/fork.c
5527c038 351@@ -456,7 +456,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
fb47a38f
JR
352 struct inode *inode = file_inode(file);
353 struct address_space *mapping = file->f_mapping;
354
355- get_file(file);
356+ vma_get_file(tmp);
357 if (tmp->vm_flags & VM_DENYWRITE)
358 atomic_dec(&inode->i_writecount);
2000de60 359 i_mmap_lock_write(mapping);
076b876e 360diff --git a/mm/Makefile b/mm/Makefile
5527c038 361index 98c4eae..3f0c9b9 100644
076b876e
AM
362--- a/mm/Makefile
363+++ b/mm/Makefile
7e9cd9fe 364@@ -21,7 +21,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
076b876e 365 mm_init.o mmu_context.o percpu.o slab_common.o \
c1595e42 366 compaction.o vmacache.o \
076b876e 367 interval_tree.o list_lru.o workingset.o \
7e9cd9fe
AM
368- debug.o $(mmu-y)
369+ prfile.o debug.o $(mmu-y)
076b876e
AM
370
371 obj-y += init-mm.o
372
fb47a38f 373diff --git a/mm/filemap.c b/mm/filemap.c
5527c038 374index 6bf5e42..a863d0f 100644
fb47a38f
JR
375--- a/mm/filemap.c
376+++ b/mm/filemap.c
5527c038 377@@ -2062,7 +2062,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
fb47a38f
JR
378 int ret = VM_FAULT_LOCKED;
379
380 sb_start_pagefault(inode->i_sb);
381- file_update_time(vma->vm_file);
382+ vma_file_update_time(vma);
383 lock_page(page);
384 if (page->mapping != inode->i_mapping) {
385 unlock_page(page);
fb47a38f 386diff --git a/mm/memory.c b/mm/memory.c
5527c038 387index 22e037e..62096a2 100644
fb47a38f
JR
388--- a/mm/memory.c
389+++ b/mm/memory.c
5527c038 390@@ -2034,7 +2034,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
fb47a38f 391 }
7e9cd9fe 392
b912730e
AM
393 if (!page_mkwrite)
394- file_update_time(vma->vm_file);
395+ vma_file_update_time(vma);
396 }
397
398 return VM_FAULT_WRITE;
fb47a38f 399diff --git a/mm/mmap.c b/mm/mmap.c
5527c038 400index bb50cac..1ab5e596 100644
fb47a38f
JR
401--- a/mm/mmap.c
402+++ b/mm/mmap.c
7e9cd9fe 403@@ -274,7 +274,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fb47a38f
JR
404 if (vma->vm_ops && vma->vm_ops->close)
405 vma->vm_ops->close(vma);
406 if (vma->vm_file)
407- fput(vma->vm_file);
408+ vma_fput(vma);
409 mpol_put(vma_policy(vma));
410 kmem_cache_free(vm_area_cachep, vma);
411 return next;
7e9cd9fe 412@@ -886,7 +886,7 @@ again: remove_next = 1 + (end > next->vm_end);
fb47a38f
JR
413 if (remove_next) {
414 if (file) {
415 uprobe_munmap(next, next->vm_start, next->vm_end);
416- fput(file);
417+ vma_fput(vma);
418 }
419 if (next->anon_vma)
420 anon_vma_merge(vma, next);
7e9cd9fe 421@@ -1671,8 +1671,8 @@ out:
35939ee7
JR
422 return addr;
423
fb47a38f 424 unmap_and_free_vma:
fb47a38f
JR
425+ vma_fput(vma);
426 vma->vm_file = NULL;
427- fput(file);
428
429 /* Undo any partial mapping done by a device driver. */
430 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
7e9cd9fe 431@@ -2473,7 +2473,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
432 goto out_free_mpol;
433
434 if (new->vm_file)
435- get_file(new->vm_file);
436+ vma_get_file(new);
437
438 if (new->vm_ops && new->vm_ops->open)
439 new->vm_ops->open(new);
7e9cd9fe 440@@ -2492,7 +2492,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
441 if (new->vm_ops && new->vm_ops->close)
442 new->vm_ops->close(new);
443 if (new->vm_file)
444- fput(new->vm_file);
445+ vma_fput(new);
446 unlink_anon_vmas(new);
447 out_free_mpol:
448 mpol_put(vma_policy(new));
7e9cd9fe
AM
449@@ -2635,7 +2635,6 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
450 struct vm_area_struct *vma;
451 unsigned long populate = 0;
452 unsigned long ret = -EINVAL;
453- struct file *file;
454
455 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
456 "See Documentation/vm/remap_file_pages.txt.\n",
457@@ -2679,10 +2678,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
458 munlock_vma_pages_range(vma, start, start + size);
459 }
460
461- file = get_file(vma->vm_file);
462+ vma_get_file(vma);
463 ret = do_mmap_pgoff(vma->vm_file, start, size,
464 prot, flags, pgoff, &populate);
465- fput(file);
466+ vma_fput(vma);
467 out:
468 up_write(&mm->mmap_sem);
469 if (populate)
5527c038 470@@ -2949,7 +2948,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
fb47a38f
JR
471 if (anon_vma_clone(new_vma, vma))
472 goto out_free_mempol;
473 if (new_vma->vm_file)
474- get_file(new_vma->vm_file);
475+ vma_get_file(new_vma);
476 if (new_vma->vm_ops && new_vma->vm_ops->open)
477 new_vma->vm_ops->open(new_vma);
478 vma_link(mm, new_vma, prev, rb_link, rb_parent);
fb47a38f 479diff --git a/mm/nommu.c b/mm/nommu.c
5527c038 480index e544508..dd6f74a 100644
fb47a38f
JR
481--- a/mm/nommu.c
482+++ b/mm/nommu.c
7e9cd9fe 483@@ -693,7 +693,7 @@ static void __put_nommu_region(struct vm_region *region)
fb47a38f
JR
484 up_write(&nommu_region_sem);
485
486 if (region->vm_file)
487- fput(region->vm_file);
488+ vmr_fput(region);
489
490 /* IO memory and memory shared directly out of the pagecache
491 * from ramfs/tmpfs mustn't be released here */
7e9cd9fe 492@@ -858,7 +858,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
fb47a38f
JR
493 if (vma->vm_ops && vma->vm_ops->close)
494 vma->vm_ops->close(vma);
495 if (vma->vm_file)
496- fput(vma->vm_file);
497+ vma_fput(vma);
498 put_nommu_region(vma->vm_region);
499 kmem_cache_free(vm_area_cachep, vma);
500 }
7e9cd9fe 501@@ -1398,7 +1398,7 @@ unsigned long do_mmap_pgoff(struct file *file,
fb47a38f
JR
502 goto error_just_free;
503 }
504 }
505- fput(region->vm_file);
506+ vmr_fput(region);
507 kmem_cache_free(vm_region_jar, region);
508 region = pregion;
509 result = start;
7e9cd9fe 510@@ -1474,10 +1474,10 @@ error_just_free:
fb47a38f
JR
511 up_write(&nommu_region_sem);
512 error:
513 if (region->vm_file)
514- fput(region->vm_file);
515+ vmr_fput(region);
516 kmem_cache_free(vm_region_jar, region);
517 if (vma->vm_file)
518- fput(vma->vm_file);
519+ vma_fput(vma);
520 kmem_cache_free(vm_area_cachep, vma);
521 kleave(" = %d", ret);
522 return ret;
076b876e
AM
523diff --git a/mm/prfile.c b/mm/prfile.c
524new file mode 100644
ab036dbd 525index 0000000..b323b8a
076b876e
AM
526--- /dev/null
527+++ b/mm/prfile.c
528@@ -0,0 +1,86 @@
529+/*
530+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
531+ * in /proc/PID/maps.
532+ * Call these functions via macros defined in linux/mm.h.
533+ *
534+ * See Documentation/filesystems/aufs/design/06mmap.txt
535+ *
536+ * Copyright (c) 2014 Junjro R. Okajima
537+ * Copyright (c) 2014 Ian Campbell
538+ */
539+
540+#include <linux/mm.h>
541+#include <linux/file.h>
542+#include <linux/fs.h>
543+
544+/* #define PRFILE_TRACE */
545+static inline void prfile_trace(struct file *f, struct file *pr,
546+ const char func[], int line, const char func2[])
547+{
548+#ifdef PRFILE_TRACE
549+ if (pr)
ab036dbd 550+ pr_info("%s:%d: %s, %s\n", func, line, func2,
7e9cd9fe 551+ f ? (char *)f->f_path.dentry->d_name.name : "(null)");
076b876e
AM
552+#endif
553+}
554+
076b876e
AM
555+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
556+ int line)
557+{
558+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
559+
560+ prfile_trace(f, pr, func, line, __func__);
561+ file_update_time(f);
562+ if (f && pr)
563+ file_update_time(pr);
564+}
565+
566+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
567+ int line)
568+{
569+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
570+
571+ prfile_trace(f, pr, func, line, __func__);
572+ return (f && pr) ? pr : f;
573+}
574+
575+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
576+{
577+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
578+
579+ prfile_trace(f, pr, func, line, __func__);
580+ get_file(f);
581+ if (f && pr)
582+ get_file(pr);
583+}
584+
585+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
586+{
587+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
588+
589+ prfile_trace(f, pr, func, line, __func__);
590+ fput(f);
591+ if (f && pr)
592+ fput(pr);
593+}
b912730e
AM
594+
595+#ifndef CONFIG_MMU
076b876e
AM
596+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
597+ int line)
598+{
599+ struct file *f = region->vm_file, *pr = region->vm_prfile;
600+
601+ prfile_trace(f, pr, func, line, __func__);
602+ return (f && pr) ? pr : f;
603+}
604+
605+void vmr_do_fput(struct vm_region *region, const char func[], int line)
606+{
607+ struct file *f = region->vm_file, *pr = region->vm_prfile;
608+
609+ prfile_trace(f, pr, func, line, __func__);
610+ fput(f);
611+ if (f && pr)
612+ fput(pr);
613+}
b912730e 614+#endif /* !CONFIG_MMU */
ab036dbd 615aufs4.1 standalone patch
7f207e10 616
c1595e42 617diff --git a/fs/dcache.c b/fs/dcache.c
5527c038 618index bc261e2..8d7951d 100644
c1595e42
JR
619--- a/fs/dcache.c
620+++ b/fs/dcache.c
5527c038 621@@ -1269,6 +1269,7 @@ rename_retry:
c1595e42
JR
622 seq = 1;
623 goto again;
624 }
625+EXPORT_SYMBOL(d_walk);
626
627 /*
628 * Search for at least 1 mount point in the dentry's subdirs.
b912730e 629diff --git a/fs/file_table.c b/fs/file_table.c
5527c038 630index 294174d..3cea027 100644
b912730e
AM
631--- a/fs/file_table.c
632+++ b/fs/file_table.c
633@@ -147,6 +147,7 @@ over:
634 }
635 return ERR_PTR(-ENFILE);
636 }
637+EXPORT_SYMBOL(get_empty_filp);
638
639 /**
640 * alloc_file - allocate and initialize a 'struct file'
641@@ -308,6 +309,7 @@ void put_filp(struct file *file)
642 file_free(file);
643 }
644 }
645+EXPORT_SYMBOL(put_filp);
646
647 void __init files_init(unsigned long mempages)
648 {
1e00d052 649diff --git a/fs/inode.c b/fs/inode.c
5527c038 650index ea37cd1..58f5f58 100644
1e00d052
AM
651--- a/fs/inode.c
652+++ b/fs/inode.c
7e9cd9fe 653@@ -58,6 +58,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
4b3da204 654 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
2cbb1c4b
JR
655
656 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
2cbb1c4b 657+EXPORT_SYMBOL(inode_sb_list_lock);
7f207e10
AM
658
659 /*
4b3da204 660 * Empty aops. Can be used for the cases where the user does not
7f207e10 661diff --git a/fs/namespace.c b/fs/namespace.c
5527c038 662index 1b9e111..d45b81b 100644
7f207e10
AM
663--- a/fs/namespace.c
664+++ b/fs/namespace.c
7e9cd9fe 665@@ -463,6 +463,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
c06a8ce3
AM
666 mnt_dec_writers(real_mount(mnt));
667 preempt_enable();
668 }
669+EXPORT_SYMBOL_GPL(__mnt_drop_write);
670
671 /**
672 * mnt_drop_write - give up write access to a mount
5527c038 673@@ -1768,6 +1769,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
674 }
675 return 0;
676 }
677+EXPORT_SYMBOL(iterate_mounts);
678
7eafdf33 679 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
7f207e10
AM
680 {
681diff --git a/fs/notify/group.c b/fs/notify/group.c
c1595e42 682index d16b62c..06ca6bc 100644
7f207e10
AM
683--- a/fs/notify/group.c
684+++ b/fs/notify/group.c
685@@ -22,6 +22,7 @@
686 #include <linux/srcu.h>
687 #include <linux/rculist.h>
688 #include <linux/wait.h>
689+#include <linux/module.h>
690
691 #include <linux/fsnotify_backend.h>
692 #include "fsnotify.h"
fb47a38f 693@@ -72,6 +73,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
1716fcea
AM
694 {
695 atomic_inc(&group->refcnt);
696 }
697+EXPORT_SYMBOL(fsnotify_get_group);
698
699 /*
700 * Drop a reference to a group. Free it if it's through.
fb47a38f 701@@ -81,6 +83,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
7f207e10 702 if (atomic_dec_and_test(&group->refcnt))
1716fcea 703 fsnotify_final_destroy_group(group);
7f207e10
AM
704 }
705+EXPORT_SYMBOL(fsnotify_put_group);
706
707 /*
708 * Create a new fsnotify_group and hold a reference for the group returned.
fb47a38f 709@@ -109,6 +112,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
7f207e10
AM
710
711 return group;
712 }
713+EXPORT_SYMBOL(fsnotify_alloc_group);
1716fcea
AM
714
715 int fsnotify_fasync(int fd, struct file *file, int on)
716 {
7f207e10 717diff --git a/fs/notify/mark.c b/fs/notify/mark.c
2000de60 718index 92e48c7..d2c4b68 100644
7f207e10
AM
719--- a/fs/notify/mark.c
720+++ b/fs/notify/mark.c
392086de 721@@ -109,6 +109,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10 722 mark->free_mark(mark);
1716fcea 723 }
7f207e10
AM
724 }
725+EXPORT_SYMBOL(fsnotify_put_mark);
726
2000de60
JR
727 /* Calculate mask of events for a list of marks */
728 u32 fsnotify_recalc_mask(struct hlist_head *head)
729@@ -202,6 +203,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
1716fcea
AM
730 fsnotify_destroy_mark_locked(mark, group);
731 mutex_unlock(&group->mark_mutex);
7f207e10
AM
732 }
733+EXPORT_SYMBOL(fsnotify_destroy_mark);
734
2000de60
JR
735 /*
736 * Destroy all marks in the given list. The marks must be already detached from
737@@ -376,6 +378,7 @@ err:
7f207e10
AM
738
739 return ret;
740 }
741+EXPORT_SYMBOL(fsnotify_add_mark);
742
1716fcea
AM
743 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
744 struct inode *inode, struct vfsmount *mnt, int allow_dups)
2000de60 745@@ -455,6 +458,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
746 atomic_set(&mark->refcnt, 1);
747 mark->free_mark = free_mark;
748 }
749+EXPORT_SYMBOL(fsnotify_init_mark);
750
751 static int fsnotify_mark_destroy(void *ignored)
752 {
753diff --git a/fs/open.c b/fs/open.c
5527c038 754index 98e5a52..a94e2e7 100644
7f207e10
AM
755--- a/fs/open.c
756+++ b/fs/open.c
523b37e3 757@@ -62,6 +62,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
7f207e10
AM
758 mutex_unlock(&dentry->d_inode->i_mutex);
759 return ret;
760 }
761+EXPORT_SYMBOL(do_truncate);
762
1716fcea 763 long vfs_truncate(struct path *path, loff_t length)
7f207e10 764 {
5527c038 765@@ -676,6 +677,7 @@ int open_check_o_direct(struct file *f)
b912730e
AM
766 }
767 return 0;
768 }
769+EXPORT_SYMBOL(open_check_o_direct);
770
771 static int do_dentry_open(struct file *f,
772 int (*open)(struct inode *, struct file *),
5527c038
JR
773diff --git a/fs/read_write.c b/fs/read_write.c
774index fd0414e..8ace6ec 100644
775--- a/fs/read_write.c
776+++ b/fs/read_write.c
777@@ -504,6 +504,7 @@ vfs_readf_t vfs_readf(struct file *file)
778 return new_sync_read;
779 return ERR_PTR(-ENOSYS);
780 }
781+EXPORT_SYMBOL(vfs_readf);
782
783 vfs_writef_t vfs_writef(struct file *file)
784 {
785@@ -515,6 +516,7 @@ vfs_writef_t vfs_writef(struct file *file)
786 return new_sync_write;
787 return ERR_PTR(-ENOSYS);
788 }
789+EXPORT_SYMBOL(vfs_writef);
790
791 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
792 {
7f207e10 793diff --git a/fs/splice.c b/fs/splice.c
5527c038 794index fa5eee5..bfb3324 100644
7f207e10
AM
795--- a/fs/splice.c
796+++ b/fs/splice.c
5527c038 797@@ -1114,6 +1114,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
392086de
AM
798
799 return splice_write(pipe, out, ppos, len, flags);
7f207e10
AM
800 }
801+EXPORT_SYMBOL(do_splice_from);
802
803 /*
804 * Attempt to initiate a splice from a file to a pipe.
5527c038 805@@ -1140,6 +1141,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
806
807 return splice_read(in, ppos, pipe, len, flags);
808 }
809+EXPORT_SYMBOL(do_splice_to);
810
811 /**
812 * splice_direct_to_actor - splices data directly between two non-pipes
c1595e42 813diff --git a/fs/xattr.c b/fs/xattr.c
2000de60 814index 4ef6985..6bb6303 100644
c1595e42
JR
815--- a/fs/xattr.c
816+++ b/fs/xattr.c
817@@ -207,6 +207,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
818 *xattr_value = value;
819 return error;
820 }
821+EXPORT_SYMBOL(vfs_getxattr_alloc);
822
823 /* Compare an extended attribute value with the given value */
824 int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
7f207e10 825diff --git a/security/commoncap.c b/security/commoncap.c
5527c038 826index f2875cd..ebf06ec 100644
7f207e10
AM
827--- a/security/commoncap.c
828+++ b/security/commoncap.c
7e9cd9fe 829@@ -975,9 +975,11 @@ int cap_mmap_addr(unsigned long addr)
94337f0d 830 }
7f207e10
AM
831 return ret;
832 }
0c3ec466
AM
833+EXPORT_SYMBOL(cap_mmap_addr);
834
835 int cap_mmap_file(struct file *file, unsigned long reqprot,
836 unsigned long prot, unsigned long flags)
837 {
838 return 0;
839 }
840+EXPORT_SYMBOL(cap_mmap_file);
7f207e10 841diff --git a/security/device_cgroup.c b/security/device_cgroup.c
c1595e42 842index 188c1d2..426d9af 100644
7f207e10
AM
843--- a/security/device_cgroup.c
844+++ b/security/device_cgroup.c
f6c5ef8b
AM
845@@ -7,6 +7,7 @@
846 #include <linux/device_cgroup.h>
847 #include <linux/cgroup.h>
848 #include <linux/ctype.h>
849+#include <linux/export.h>
850 #include <linux/list.h>
851 #include <linux/uaccess.h>
852 #include <linux/seq_file.h>
076b876e 853@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
537831f9
AM
854 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
855 access);
7f207e10 856 }
2cbb1c4b 857+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
858
859 int devcgroup_inode_mknod(int mode, dev_t dev)
860 {
861diff --git a/security/security.c b/security/security.c
5527c038 862index 8e9b1f4..c1c7cd1 100644
7f207e10
AM
863--- a/security/security.c
864+++ b/security/security.c
7e9cd9fe 865@@ -430,6 +430,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10
AM
866 return 0;
867 return security_ops->path_rmdir(dir, dentry);
868 }
869+EXPORT_SYMBOL(security_path_rmdir);
870
871 int security_path_unlink(struct path *dir, struct dentry *dentry)
872 {
7e9cd9fe 873@@ -446,6 +447,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10
AM
874 return 0;
875 return security_ops->path_symlink(dir, dentry, old_name);
876 }
877+EXPORT_SYMBOL(security_path_symlink);
878
879 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
880 struct dentry *new_dentry)
7e9cd9fe 881@@ -454,6 +456,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10
AM
882 return 0;
883 return security_ops->path_link(old_dentry, new_dir, new_dentry);
884 }
885+EXPORT_SYMBOL(security_path_link);
886
887 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
38d290e6 888 struct path *new_dir, struct dentry *new_dentry,
7e9cd9fe 889@@ -481,6 +484,7 @@ int security_path_truncate(struct path *path)
7f207e10
AM
890 return 0;
891 return security_ops->path_truncate(path);
892 }
893+EXPORT_SYMBOL(security_path_truncate);
894
7eafdf33
AM
895 int security_path_chmod(struct path *path, umode_t mode)
896 {
7e9cd9fe 897@@ -488,6 +492,7 @@ int security_path_chmod(struct path *path, umode_t mode)
7f207e10 898 return 0;
7eafdf33 899 return security_ops->path_chmod(path, mode);
7f207e10
AM
900 }
901+EXPORT_SYMBOL(security_path_chmod);
902
537831f9 903 int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10 904 {
7e9cd9fe 905@@ -495,6 +500,7 @@ int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10
AM
906 return 0;
907 return security_ops->path_chown(path, uid, gid);
908 }
909+EXPORT_SYMBOL(security_path_chown);
910
911 int security_path_chroot(struct path *path)
912 {
7e9cd9fe 913@@ -580,6 +586,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10
AM
914 return 0;
915 return security_ops->inode_readlink(dentry);
916 }
917+EXPORT_SYMBOL(security_inode_readlink);
918
919 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
920 {
7e9cd9fe 921@@ -594,6 +601,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 922 return 0;
1e00d052 923 return security_ops->inode_permission(inode, mask);
7f207e10
AM
924 }
925+EXPORT_SYMBOL(security_inode_permission);
926
1e00d052 927 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 928 {
7e9cd9fe 929@@ -716,6 +724,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
930
931 return fsnotify_perm(file, mask);
932 }
933+EXPORT_SYMBOL(security_file_permission);
934
935 int security_file_alloc(struct file *file)
936 {
7e9cd9fe 937@@ -775,6 +784,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
7f207e10
AM
938 return ret;
939 return ima_file_mmap(file, prot);
940 }
0c3ec466 941+EXPORT_SYMBOL(security_mmap_file);
7f207e10 942
0c3ec466
AM
943 int security_mmap_addr(unsigned long addr)
944 {
7f207e10
AM
945diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
946--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
ab036dbd 947+++ linux/Documentation/ABI/testing/debugfs-aufs 2015-09-24 10:47:58.244719488 +0200
86dc4139 948@@ -0,0 +1,50 @@
7f207e10
AM
949+What: /debug/aufs/si_<id>/
950+Date: March 2009
f6b6e03d 951+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
952+Description:
953+ Under /debug/aufs, a directory named si_<id> is created
954+ per aufs mount, where <id> is a unique id generated
955+ internally.
1facf9fc 956+
86dc4139
AM
957+What: /debug/aufs/si_<id>/plink
958+Date: Apr 2013
f6b6e03d 959+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
960+Description:
961+ It has three lines and shows the information about the
962+ pseudo-link. The first line is a single number
963+ representing a number of buckets. The second line is a
964+ number of pseudo-links per buckets (separated by a
965+ blank). The last line is a single number representing a
966+ total number of psedo-links.
967+ When the aufs mount option 'noplink' is specified, it
968+ will show "1\n0\n0\n".
969+
7f207e10
AM
970+What: /debug/aufs/si_<id>/xib
971+Date: March 2009
f6b6e03d 972+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
973+Description:
974+ It shows the consumed blocks by xib (External Inode Number
975+ Bitmap), its block size and file size.
976+ When the aufs mount option 'noxino' is specified, it
977+ will be empty. About XINO files, see the aufs manual.
978+
979+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
980+Date: March 2009
f6b6e03d 981+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
982+Description:
983+ It shows the consumed blocks by xino (External Inode Number
984+ Translation Table), its link count, block size and file
985+ size.
986+ When the aufs mount option 'noxino' is specified, it
987+ will be empty. About XINO files, see the aufs manual.
988+
989+What: /debug/aufs/si_<id>/xigen
990+Date: March 2009
f6b6e03d 991+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
992+Description:
993+ It shows the consumed blocks by xigen (External Inode
994+ Generation Table), its block size and file size.
995+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
996+ be created.
997+ When the aufs mount option 'noxino' is specified, it
998+ will be empty. About XINO files, see the aufs manual.
999diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1000--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1001+++ linux/Documentation/ABI/testing/sysfs-aufs 2015-09-24 10:47:58.244719488 +0200
392086de 1002@@ -0,0 +1,31 @@
7f207e10
AM
1003+What: /sys/fs/aufs/si_<id>/
1004+Date: March 2009
f6b6e03d 1005+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1006+Description:
1007+ Under /sys/fs/aufs, a directory named si_<id> is created
1008+ per aufs mount, where <id> is a unique id generated
1009+ internally.
1010+
1011+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1012+Date: March 2009
f6b6e03d 1013+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1014+Description:
1015+ It shows the abolute path of a member directory (which
1016+ is called branch) in aufs, and its permission.
1017+
392086de
AM
1018+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1019+Date: July 2013
f6b6e03d 1020+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
1021+Description:
1022+ It shows the id of a member directory (which is called
1023+ branch) in aufs.
1024+
7f207e10
AM
1025+What: /sys/fs/aufs/si_<id>/xi_path
1026+Date: March 2009
f6b6e03d 1027+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1028+Description:
1029+ It shows the abolute path of XINO (External Inode Number
1030+ Bitmap, Translation Table and Generation Table) file
1031+ even if it is the default path.
1032+ When the aufs mount option 'noxino' is specified, it
1033+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
1034diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1035--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1036+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1037@@ -0,0 +1,170 @@
53392da6 1038+
2000de60 1039+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1040+#
1041+# This program is free software; you can redistribute it and/or modify
1042+# it under the terms of the GNU General Public License as published by
1043+# the Free Software Foundation; either version 2 of the License, or
1044+# (at your option) any later version.
1045+#
1046+# This program is distributed in the hope that it will be useful,
1047+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1048+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1049+# GNU General Public License for more details.
1050+#
1051+# You should have received a copy of the GNU General Public License
523b37e3 1052+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1053+
1054+Introduction
1055+----------------------------------------
1056+
1057+aufs [ei ju: ef es] | [a u f s]
1058+1. abbrev. for "advanced multi-layered unification filesystem".
1059+2. abbrev. for "another unionfs".
1060+3. abbrev. for "auf das" in German which means "on the" in English.
1061+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1062+ But "Filesystem aufs Filesystem" is hard to understand.
1063+
1064+AUFS is a filesystem with features:
1065+- multi layered stackable unification filesystem, the member directory
1066+ is called as a branch.
1067+- branch permission and attribute, 'readonly', 'real-readonly',
7e9cd9fe 1068+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
1069+ combination.
1070+- internal "file copy-on-write".
1071+- logical deletion, whiteout.
1072+- dynamic branch manipulation, adding, deleting and changing permission.
1073+- allow bypassing aufs, user's direct branch access.
1074+- external inode number translation table and bitmap which maintains the
1075+ persistent aufs inode number.
1076+- seekable directory, including NFS readdir.
1077+- file mapping, mmap and sharing pages.
1078+- pseudo-link, hardlink over branches.
1079+- loopback mounted filesystem as a branch.
1080+- several policies to select one among multiple writable branches.
1081+- revert a single systemcall when an error occurs in aufs.
1082+- and more...
1083+
1084+
1085+Multi Layered Stackable Unification Filesystem
1086+----------------------------------------------------------------------
1087+Most people already knows what it is.
1088+It is a filesystem which unifies several directories and provides a
1089+merged single directory. When users access a file, the access will be
1090+passed/re-directed/converted (sorry, I am not sure which English word is
1091+correct) to the real file on the member filesystem. The member
1092+filesystem is called 'lower filesystem' or 'branch' and has a mode
1093+'readonly' and 'readwrite.' And the deletion for a file on the lower
1094+readonly branch is handled by creating 'whiteout' on the upper writable
1095+branch.
1096+
1097+On LKML, there have been discussions about UnionMount (Jan Blunck,
1098+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1099+different approaches to implement the merged-view.
1100+The former tries putting it into VFS, and the latter implements as a
1101+separate filesystem.
1102+(If I misunderstand about these implementations, please let me know and
1103+I shall correct it. Because it is a long time ago when I read their
1104+source files last time).
1105+
1106+UnionMount's approach will be able to small, but may be hard to share
1107+branches between several UnionMount since the whiteout in it is
1108+implemented in the inode on branch filesystem and always
1109+shared. According to Bharata's post, readdir does not seems to be
1110+finished yet.
1111+There are several missing features known in this implementations such as
1112+- for users, the inode number may change silently. eg. copy-up.
1113+- link(2) may break by copy-up.
1114+- read(2) may get an obsoleted filedata (fstat(2) too).
1115+- fcntl(F_SETLK) may be broken by copy-up.
1116+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1117+ open(O_RDWR).
1118+
7e9cd9fe
AM
1119+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1120+merged into mainline. This is another implementation of UnionMount as a
1121+separated filesystem. All the limitations and known problems which
1122+UnionMount are equally inherited to "overlay" filesystem.
1123+
1124+Unionfs has a longer history. When I started implementing a stackable
1125+filesystem (Aug 2005), it already existed. It has virtual super_block,
1126+inode, dentry and file objects and they have an array pointing lower
1127+same kind objects. After contributing many patches for Unionfs, I
1128+re-started my project AUFS (Jun 2006).
53392da6
AM
1129+
1130+In AUFS, the structure of filesystem resembles to Unionfs, but I
1131+implemented my own ideas, approaches and enhancements and it became
1132+totally different one.
1133+
1134+Comparing DM snapshot and fs based implementation
1135+- the number of bytes to be copied between devices is much smaller.
1136+- the type of filesystem must be one and only.
1137+- the fs must be writable, no readonly fs, even for the lower original
1138+ device. so the compression fs will not be usable. but if we use
1139+ loopback mount, we may address this issue.
1140+ for instance,
1141+ mount /cdrom/squashfs.img /sq
1142+ losetup /sq/ext2.img
1143+ losetup /somewhere/cow
1144+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1145+- it will be difficult (or needs more operations) to extract the
1146+ difference between the original device and COW.
1147+- DM snapshot-merge may help a lot when users try merging. in the
1148+ fs-layer union, users will use rsync(1).
1149+
7e9cd9fe
AM
1150+You may want to read my old paper "Filesystems in LiveCD"
1151+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 1152+
7e9cd9fe
AM
1153+
1154+Several characters/aspects/persona of aufs
53392da6
AM
1155+----------------------------------------------------------------------
1156+
7e9cd9fe 1157+Aufs has several characters, aspects or persona.
53392da6
AM
1158+1. a filesystem, callee of VFS helper
1159+2. sub-VFS, caller of VFS helper for branches
1160+3. a virtual filesystem which maintains persistent inode number
1161+4. reader/writer of files on branches such like an application
1162+
1163+1. Callee of VFS Helper
1164+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1165+unlink(2) from an application reaches sys_unlink() kernel function and
1166+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1167+calls filesystem specific unlink operation. Actually aufs implements the
1168+unlink operation but it behaves like a redirector.
1169+
1170+2. Caller of VFS Helper for Branches
1171+aufs_unlink() passes the unlink request to the branch filesystem as if
1172+it were called from VFS. So the called unlink operation of the branch
1173+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1174+every necessary pre/post operation for the branch filesystem.
1175+- acquire the lock for the parent dir on a branch
1176+- lookup in a branch
1177+- revalidate dentry on a branch
1178+- mnt_want_write() for a branch
1179+- vfs_unlink() for a branch
1180+- mnt_drop_write() for a branch
1181+- release the lock on a branch
1182+
1183+3. Persistent Inode Number
1184+One of the most important issue for a filesystem is to maintain inode
1185+numbers. This is particularly important to support exporting a
1186+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1187+backend block device for its own. But some storage is necessary to
7e9cd9fe
AM
1188+keep and maintain the inode numbers. It may be a large space and may not
1189+suit to keep in memory. Aufs rents some space from its first writable
1190+branch filesystem (by default) and creates file(s) on it. These files
1191+are created by aufs internally and removed soon (currently) keeping
1192+opened.
53392da6
AM
1193+Note: Because these files are removed, they are totally gone after
1194+ unmounting aufs. It means the inode numbers are not persistent
1195+ across unmount or reboot. I have a plan to make them really
1196+ persistent which will be important for aufs on NFS server.
1197+
1198+4. Read/Write Files Internally (copy-on-write)
1199+Because a branch can be readonly, when you write a file on it, aufs will
1200+"copy-up" it to the upper writable branch internally. And then write the
1201+originally requested thing to the file. Generally kernel doesn't
1202+open/read/write file actively. In aufs, even a single write may cause a
1203+internal "file copy". This behaviour is very similar to cp(1) command.
1204+
1205+Some people may think it is better to pass such work to user space
1206+helper, instead of doing in kernel space. Actually I am still thinking
1207+about it. But currently I have implemented it in kernel space.
1208diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1209--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1210+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1211@@ -0,0 +1,258 @@
53392da6 1212+
2000de60 1213+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1214+#
1215+# This program is free software; you can redistribute it and/or modify
1216+# it under the terms of the GNU General Public License as published by
1217+# the Free Software Foundation; either version 2 of the License, or
1218+# (at your option) any later version.
1219+#
1220+# This program is distributed in the hope that it will be useful,
1221+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1222+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1223+# GNU General Public License for more details.
1224+#
1225+# You should have received a copy of the GNU General Public License
523b37e3 1226+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1227+
1228+Basic Aufs Internal Structure
1229+
1230+Superblock/Inode/Dentry/File Objects
1231+----------------------------------------------------------------------
1232+As like an ordinary filesystem, aufs has its own
1233+superblock/inode/dentry/file objects. All these objects have a
1234+dynamically allocated array and store the same kind of pointers to the
1235+lower filesystem, branch.
1236+For example, when you build a union with one readwrite branch and one
1237+readonly, mounted /au, /rw and /ro respectively.
1238+- /au = /rw + /ro
1239+- /ro/fileA exists but /rw/fileA
1240+
1241+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1242+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7e9cd9fe 1243+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
1244+- [1] = /ro/fileA
1245+
1246+This style of an array is essentially same to the aufs
1247+superblock/inode/dentry/file objects.
1248+
1249+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1250+branches dynamically, these objects has its own generation. When
1251+branches are changed, the generation in aufs superblock is
1252+incremented. And a generation in other object are compared when it is
1253+accessed. When a generation in other objects are obsoleted, aufs
1254+refreshes the internal array.
53392da6
AM
1255+
1256+
1257+Superblock
1258+----------------------------------------------------------------------
1259+Additionally aufs superblock has some data for policies to select one
1260+among multiple writable branches, XIB files, pseudo-links and kobject.
1261+See below in detail.
7e9cd9fe
AM
1262+About the policies which supports copy-down a directory, see
1263+wbr_policy.txt too.
53392da6
AM
1264+
1265+
1266+Branch and XINO(External Inode Number Translation Table)
1267+----------------------------------------------------------------------
1268+Every branch has its own xino (external inode number translation table)
1269+file. The xino file is created and unlinked by aufs internally. When two
1270+members of a union exist on the same filesystem, they share the single
1271+xino file.
1272+The struct of a xino file is simple, just a sequence of aufs inode
1273+numbers which is indexed by the lower inode number.
1274+In the above sample, assume the inode number of /ro/fileA is i111 and
1275+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1276+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1277+
1278+When the inode numbers are not contiguous, the xino file will be sparse
1279+which has a hole in it and doesn't consume as much disk space as it
1280+might appear. If your branch filesystem consumes disk space for such
1281+holes, then you should specify 'xino=' option at mounting aufs.
1282+
7e9cd9fe
AM
1283+Aufs has a mount option to free the disk blocks for such holes in XINO
1284+files on tmpfs or ramdisk. But it is not so effective actually. If you
1285+meet a problem of disk shortage due to XINO files, then you should try
1286+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1287+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1288+the holes in XINO files.
1289+
53392da6 1290+Also a writable branch has three kinds of "whiteout bases". All these
7e9cd9fe 1291+are existed when the branch is joined to aufs, and their names are
53392da6
AM
1292+whiteout-ed doubly, so that users will never see their names in aufs
1293+hierarchy.
7e9cd9fe 1294+1. a regular file which will be hardlinked to all whiteouts.
53392da6 1295+2. a directory to store a pseudo-link.
7e9cd9fe 1296+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
1297+
1298+1. Whiteout Base
1299+ When you remove a file on a readonly branch, aufs handles it as a
1300+ logical deletion and creates a whiteout on the upper writable branch
1301+ as a hardlink of this file in order not to consume inode on the
1302+ writable branch.
1303+2. Pseudo-link Dir
1304+ See below, Pseudo-link.
1305+3. Step-Parent Dir
1306+ When "fileC" exists on the lower readonly branch only and it is
1307+ opened and removed with its parent dir, and then user writes
1308+ something into it, then aufs copies-up fileC to this
1309+ directory. Because there is no other dir to store fileC. After
1310+ creating a file under this dir, the file is unlinked.
1311+
1312+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1313+dynamically, a branch has its own id. When the branch order changes,
1314+aufs finds the new index by searching the branch id.
53392da6
AM
1315+
1316+
1317+Pseudo-link
1318+----------------------------------------------------------------------
1319+Assume "fileA" exists on the lower readonly branch only and it is
1320+hardlinked to "fileB" on the branch. When you write something to fileA,
1321+aufs copies-up it to the upper writable branch. Additionally aufs
1322+creates a hardlink under the Pseudo-link Directory of the writable
1323+branch. The inode of a pseudo-link is kept in aufs super_block as a
1324+simple list. If fileB is read after unlinking fileA, aufs returns
1325+filedata from the pseudo-link instead of the lower readonly
1326+branch. Because the pseudo-link is based upon the inode, to keep the
7e9cd9fe 1327+inode number by xino (see above) is essentially necessary.
53392da6
AM
1328+
1329+All the hardlinks under the Pseudo-link Directory of the writable branch
1330+should be restored in a proper location later. Aufs provides a utility
1331+to do this. The userspace helpers executed at remounting and unmounting
1332+aufs by default.
1333+During this utility is running, it puts aufs into the pseudo-link
1334+maintenance mode. In this mode, only the process which began the
1335+maintenance mode (and its child processes) is allowed to operate in
1336+aufs. Some other processes which are not related to the pseudo-link will
1337+be allowed to run too, but the rest have to return an error or wait
1338+until the maintenance mode ends. If a process already acquires an inode
1339+mutex (in VFS), it has to return an error.
1340+
1341+
1342+XIB(external inode number bitmap)
1343+----------------------------------------------------------------------
1344+Addition to the xino file per a branch, aufs has an external inode number
7e9cd9fe
AM
1345+bitmap in a superblock object. It is also an internal file such like a
1346+xino file.
53392da6
AM
1347+It is a simple bitmap to mark whether the aufs inode number is in-use or
1348+not.
1349+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1350+
7e9cd9fe 1351+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
1352+reduce the number of consumed disk blocks for these files.
1353+
1354+
1355+Virtual or Vertical Dir, and Readdir in Userspace
1356+----------------------------------------------------------------------
1357+In order to support multiple layers (branches), aufs readdir operation
1358+constructs a virtual dir block on memory. For readdir, aufs calls
1359+vfs_readdir() internally for each dir on branches, merges their entries
1360+with eliminating the whiteout-ed ones, and sets it to file (dir)
1361+object. So the file object has its entry list until it is closed. The
1362+entry list will be updated when the file position is zero and becomes
7e9cd9fe 1363+obsoleted. This decision is made in aufs automatically.
53392da6
AM
1364+
1365+The dynamically allocated memory block for the name of entries has a
1366+unit of 512 bytes (by default) and stores the names contiguously (no
1367+padding). Another block for each entry is handled by kmem_cache too.
1368+During building dir blocks, aufs creates hash list and judging whether
1369+the entry is whiteouted by its upper branch or already listed.
1370+The merged result is cached in the corresponding inode object and
1371+maintained by a customizable life-time option.
1372+
1373+Some people may call it can be a security hole or invite DoS attack
1374+since the opened and once readdir-ed dir (file object) holds its entry
1375+list and becomes a pressure for system memory. But I'd say it is similar
1376+to files under /proc or /sys. The virtual files in them also holds a
1377+memory page (generally) while they are opened. When an idea to reduce
1378+memory for them is introduced, it will be applied to aufs too.
1379+For those who really hate this situation, I've developed readdir(3)
1380+library which operates this merging in userspace. You just need to set
1381+LD_PRELOAD environment variable, and aufs will not consume no memory in
1382+kernel space for readdir(3).
1383+
1384+
1385+Workqueue
1386+----------------------------------------------------------------------
1387+Aufs sometimes requires privilege access to a branch. For instance,
1388+in copy-up/down operation. When a user process is going to make changes
1389+to a file which exists in the lower readonly branch only, and the mode
1390+of one of ancestor directories may not be writable by a user
1391+process. Here aufs copy-up the file with its ancestors and they may
1392+require privilege to set its owner/group/mode/etc.
1393+This is a typical case of a application character of aufs (see
1394+Introduction).
1395+
1396+Aufs uses workqueue synchronously for this case. It creates its own
1397+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1398+passes the request to call mkdir or write (for example), and wait for
1399+its completion. This approach solves a problem of a signal handler
1400+simply.
1401+If aufs didn't adopt the workqueue and changed the privilege of the
7e9cd9fe
AM
1402+process, then the process may receive the unexpected SIGXFSZ or other
1403+signals.
53392da6
AM
1404+
1405+Also aufs uses the system global workqueue ("events" kernel thread) too
1406+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1407+whiteout base and etc. This is unrelated to a privilege.
1408+Most of aufs operation tries acquiring a rw_semaphore for aufs
1409+superblock at the beginning, at the same time waits for the completion
1410+of all queued asynchronous tasks.
1411+
1412+
1413+Whiteout
1414+----------------------------------------------------------------------
1415+The whiteout in aufs is very similar to Unionfs's. That is represented
1416+by its filename. UnionMount takes an approach of a file mode, but I am
1417+afraid several utilities (find(1) or something) will have to support it.
1418+
1419+Basically the whiteout represents "logical deletion" which stops aufs to
1420+lookup further, but also it represents "dir is opaque" which also stop
7e9cd9fe 1421+further lookup.
53392da6
AM
1422+
1423+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1424+In order to make several functions in a single systemcall to be
1425+revertible, aufs adopts an approach to rename a directory to a temporary
1426+unique whiteouted name.
1427+For example, in rename(2) dir where the target dir already existed, aufs
1428+renames the target dir to a temporary unique whiteouted name before the
7e9cd9fe 1429+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
1430+update the attributes, etc). If an error happens in these actions, aufs
1431+simply renames the whiteouted name back and returns an error. If all are
1432+succeeded, aufs registers a function to remove the whiteouted unique
1433+temporary name completely and asynchronously to the system global
1434+workqueue.
1435+
1436+
1437+Copy-up
1438+----------------------------------------------------------------------
1439+It is a well-known feature or concept.
1440+When user modifies a file on a readonly branch, aufs operate "copy-up"
1441+internally and makes change to the new file on the upper writable branch.
1442+When the trigger systemcall does not update the timestamps of the parent
1443+dir, aufs reverts it after copy-up.
c2b27bf2
AM
1444+
1445+
1446+Move-down (aufs3.9 and later)
1447+----------------------------------------------------------------------
1448+"Copy-up" is one of the essential feature in aufs. It copies a file from
1449+the lower readonly branch to the upper writable branch when a user
1450+changes something about the file.
1451+"Move-down" is an opposite action of copy-up. Basically this action is
1452+ran manually instead of automatically and internally.
076b876e
AM
1453+For desgin and implementation, aufs has to consider these issues.
1454+- whiteout for the file may exist on the lower branch.
1455+- ancestor directories may not exist on the lower branch.
1456+- diropq for the ancestor directories may exist on the upper branch.
1457+- free space on the lower branch will reduce.
1458+- another access to the file may happen during moving-down, including
7e9cd9fe 1459+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
1460+- the file should not be hard-linked nor pseudo-linked. they should be
1461+ handled by auplink utility later.
c2b27bf2
AM
1462+
1463+Sometimes users want to move-down a file from the upper writable branch
1464+to the lower readonly or writable branch. For instance,
1465+- the free space of the upper writable branch is going to run out.
1466+- create a new intermediate branch between the upper and lower branch.
1467+- etc.
1468+
1469+For this purpose, use "aumvdown" command in aufs-util.git.
b912730e
AM
1470diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1471--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1472+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2015-09-24 10:47:58.244719488 +0200
b912730e
AM
1473@@ -0,0 +1,85 @@
1474+
1475+# Copyright (C) 2015 Junjiro R. Okajima
1476+#
1477+# This program is free software; you can redistribute it and/or modify
1478+# it under the terms of the GNU General Public License as published by
1479+# the Free Software Foundation; either version 2 of the License, or
1480+# (at your option) any later version.
1481+#
1482+# This program is distributed in the hope that it will be useful,
1483+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1484+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1485+# GNU General Public License for more details.
1486+#
1487+# You should have received a copy of the GNU General Public License
1488+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1489+
1490+Support for a branch who has its ->atomic_open()
1491+----------------------------------------------------------------------
1492+The filesystems who implement its ->atomic_open() are not majority. For
1493+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1494+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1495+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1496+sure whether all filesystems who have ->atomic_open() behave like this,
1497+but NFSv4 surely returns the error.
1498+
1499+In order to support ->atomic_open() for aufs, there are a few
1500+approaches.
1501+
1502+A. Introduce aufs_atomic_open()
1503+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1504+ branch fs.
1505+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1506+ an aufs user Pip Cet's approach
1507+ - calls aufs_create(), VFS finish_open() and notify_change().
1508+ - pass fake-mode to finish_open(), and then correct the mode by
1509+ notify_change().
1510+C. Extend aufs_open() to call branch fs's ->atomic_open()
1511+ - no aufs_atomic_open().
1512+ - aufs_lookup() registers the TID to an aufs internal object.
1513+ - aufs_create() does nothing when the matching TID is registered, but
1514+ registers the mode.
1515+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1516+ TID is registered.
1517+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1518+ credential
1519+ - no aufs_atomic_open().
1520+ - aufs_create() registers the TID to an internal object. this info
1521+ represents "this process created this file just now."
1522+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1523+ registered TID and re-try open() with superuser's credential.
1524+
1525+Pros and cons for each approach.
1526+
1527+A.
1528+ - straightforward but highly depends upon VFS internal.
1529+ - the atomic behavaiour is kept.
1530+ - some of parameters such as nameidata are hard to reproduce for
1531+ branch fs.
1532+ - large overhead.
1533+B.
1534+ - easy to implement.
1535+ - the atomic behavaiour is lost.
1536+C.
1537+ - the atomic behavaiour is kept.
1538+ - dirty and tricky.
1539+ - VFS checks whether the file is created correctly after calling
1540+ ->create(), which means this approach doesn't work.
1541+D.
1542+ - easy to implement.
1543+ - the atomic behavaiour is lost.
1544+ - to open a file with superuser's credential and give it to a user
1545+ process is a bad idea, since the file object keeps the credential
1546+ in it. It may affect LSM or something. This approach doesn't work
1547+ either.
1548+
1549+The approach A is ideal, but it hard to implement. So here is a
1550+variation of A, which is to be implemented.
1551+
1552+A-1. Introduce aufs_atomic_open()
1553+ - calls branch fs ->atomic_open() if exists. otherwise calls
1554+ vfs_create() and finish_open().
1555+ - the demerit is that the several checks after branch fs
1556+ ->atomic_open() are lost. in the ordinary case, the checks are
1557+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1558+ be implemented in aufs, but not all I am afraid.
53392da6
AM
1559diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1560--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1561+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1562@@ -0,0 +1,113 @@
53392da6 1563+
2000de60 1564+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1565+#
1566+# This program is free software; you can redistribute it and/or modify
1567+# it under the terms of the GNU General Public License as published by
1568+# the Free Software Foundation; either version 2 of the License, or
1569+# (at your option) any later version.
1570+#
1571+# This program is distributed in the hope that it will be useful,
1572+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1573+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1574+# GNU General Public License for more details.
1575+#
1576+# You should have received a copy of the GNU General Public License
523b37e3 1577+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1578+
1579+Lookup in a Branch
1580+----------------------------------------------------------------------
1581+Since aufs has a character of sub-VFS (see Introduction), it operates
7e9cd9fe
AM
1582+lookup for branches as VFS does. It may be a heavy work. But almost all
1583+lookup operation in aufs is the simplest case, ie. lookup only an entry
1584+directly connected to its parent. Digging down the directory hierarchy
1585+is unnecessary. VFS has a function lookup_one_len() for that use, and
1586+aufs calls it.
1587+
1588+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1589+->d_revalidate(), also aufs forces the hardest revalidate tests for
1590+them.
1591+For d_revalidate, aufs implements three levels of revalidate tests. See
1592+"Revalidate Dentry and UDBA" in detail.
1593+
1594+
076b876e
AM
1595+Test Only the Highest One for the Directory Permission (dirperm1 option)
1596+----------------------------------------------------------------------
1597+Let's try case study.
1598+- aufs has two branches, upper readwrite and lower readonly.
1599+ /au = /rw + /ro
1600+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1601+- user invoked "chmod a+rx /au/dirA"
1602+- the internal copy-up is activated and "/rw/dirA" is created and its
7e9cd9fe 1603+ permission bits are set to world readable.
076b876e
AM
1604+- then "/au/dirA" becomes world readable?
1605+
1606+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1607+or it may be a natively readonly filesystem. If aufs respects the lower
1608+branch, it should not respond readdir request from other users. But user
1609+allowed it by chmod. Should really aufs rejects showing the entries
1610+under /ro/dirA?
1611+
7e9cd9fe
AM
1612+To be honest, I don't have a good solution for this case. So aufs
1613+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1614+users.
076b876e
AM
1615+When dirperm1 is specified, aufs checks only the highest one for the
1616+directory permission, and shows the entries. Otherwise, as usual, checks
1617+every dir existing on all branches and rejects the request.
1618+
1619+As a side effect, dirperm1 option improves the performance of aufs
1620+because the number of permission check is reduced when the number of
1621+branch is many.
1622+
1623+
53392da6
AM
1624+Revalidate Dentry and UDBA (User's Direct Branch Access)
1625+----------------------------------------------------------------------
1626+Generally VFS helpers re-validate a dentry as a part of lookup.
1627+0. digging down the directory hierarchy.
1628+1. lock the parent dir by its i_mutex.
1629+2. lookup the final (child) entry.
1630+3. revalidate it.
1631+4. call the actual operation (create, unlink, etc.)
1632+5. unlock the parent dir
1633+
1634+If the filesystem implements its ->d_revalidate() (step 3), then it is
1635+called. Actually aufs implements it and checks the dentry on a branch is
1636+still valid.
1637+But it is not enough. Because aufs has to release the lock for the
1638+parent dir on a branch at the end of ->lookup() (step 2) and
1639+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1640+held by VFS.
1641+If the file on a branch is changed directly, eg. bypassing aufs, after
1642+aufs released the lock, then the subsequent operation may cause
1643+something unpleasant result.
1644+
1645+This situation is a result of VFS architecture, ->lookup() and
1646+->d_revalidate() is separated. But I never say it is wrong. It is a good
1647+design from VFS's point of view. It is just not suitable for sub-VFS
1648+character in aufs.
1649+
1650+Aufs supports such case by three level of revalidation which is
1651+selectable by user.
1652+1. Simple Revalidate
1653+ Addition to the native flow in VFS's, confirm the child-parent
1654+ relationship on the branch just after locking the parent dir on the
1655+ branch in the "actual operation" (step 4). When this validation
1656+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1657+ checks the validation of the dentry on branches.
1658+2. Monitor Changes Internally by Inotify/Fsnotify
1659+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1660+ the dentry on the branch, and returns EBUSY if it finds different
1661+ dentry.
1662+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1663+ during it is in cache. When the event is notified, aufs registers a
1664+ function to kernel 'events' thread by schedule_work(). And the
1665+ function sets some special status to the cached aufs dentry and inode
1666+ private data. If they are not cached, then aufs has nothing to
1667+ do. When the same file is accessed through aufs (step 0-3) later,
1668+ aufs will detect the status and refresh all necessary data.
1669+ In this mode, aufs has to ignore the event which is fired by aufs
1670+ itself.
1671+3. No Extra Validation
1672+ This is the simplest test and doesn't add any additional revalidation
7e9cd9fe 1673+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1674+ aufs performance when system surely hide the aufs branches from user,
1675+ by over-mounting something (or another method).
1676diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1677--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1678+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2015-09-24 10:47:58.244719488 +0200
7e9cd9fe 1679@@ -0,0 +1,74 @@
53392da6 1680+
2000de60 1681+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1682+#
1683+# This program is free software; you can redistribute it and/or modify
1684+# it under the terms of the GNU General Public License as published by
1685+# the Free Software Foundation; either version 2 of the License, or
1686+# (at your option) any later version.
1687+#
1688+# This program is distributed in the hope that it will be useful,
1689+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1690+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1691+# GNU General Public License for more details.
1692+#
1693+# You should have received a copy of the GNU General Public License
523b37e3 1694+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1695+
1696+Branch Manipulation
1697+
1698+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1699+and changing its permission/attribute, there are a lot of works to do.
1700+
1701+
1702+Add a Branch
1703+----------------------------------------------------------------------
1704+o Confirm the adding dir exists outside of aufs, including loopback
7e9cd9fe 1705+ mount, and its various attributes.
53392da6
AM
1706+o Initialize the xino file and whiteout bases if necessary.
1707+ See struct.txt.
1708+
1709+o Check the owner/group/mode of the directory
1710+ When the owner/group/mode of the adding directory differs from the
1711+ existing branch, aufs issues a warning because it may impose a
1712+ security risk.
1713+ For example, when a upper writable branch has a world writable empty
1714+ top directory, a malicious user can create any files on the writable
1715+ branch directly, like copy-up and modify manually. If something like
1716+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1717+ writable branch, and the writable branch is world-writable, then a
1718+ malicious guy may create /etc/passwd on the writable branch directly
1719+ and the infected file will be valid in aufs.
7e9cd9fe 1720+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1721+ producing a warning.
1722+
1723+
1724+Delete a Branch
1725+----------------------------------------------------------------------
1726+o Confirm the deleting branch is not busy
1727+ To be general, there is one merit to adopt "remount" interface to
1728+ manipulate branches. It is to discard caches. At deleting a branch,
1729+ aufs checks the still cached (and connected) dentries and inodes. If
1730+ there are any, then they are all in-use. An inode without its
1731+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1732+
1733+ For the cached one, aufs checks whether the same named entry exists on
1734+ other branches.
1735+ If the cached one is a directory, because aufs provides a merged view
1736+ to users, as long as one dir is left on any branch aufs can show the
1737+ dir to users. In this case, the branch can be removed from aufs.
1738+ Otherwise aufs rejects deleting the branch.
1739+
1740+ If any file on the deleting branch is opened by aufs, then aufs
1741+ rejects deleting.
1742+
1743+
1744+Modify the Permission of a Branch
1745+----------------------------------------------------------------------
1746+o Re-initialize or remove the xino file and whiteout bases if necessary.
1747+ See struct.txt.
1748+
1749+o rw --> ro: Confirm the modifying branch is not busy
1750+ Aufs rejects the request if any of these conditions are true.
1751+ - a file on the branch is mmap-ed.
1752+ - a regular file on the branch is opened for write and there is no
1753+ same named entry on the upper branch.
1754diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1755--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1756+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2015-09-24 10:47:58.244719488 +0200
523b37e3 1757@@ -0,0 +1,64 @@
53392da6 1758+
2000de60 1759+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1760+#
1761+# This program is free software; you can redistribute it and/or modify
1762+# it under the terms of the GNU General Public License as published by
1763+# the Free Software Foundation; either version 2 of the License, or
1764+# (at your option) any later version.
1765+#
1766+# This program is distributed in the hope that it will be useful,
1767+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1768+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1769+# GNU General Public License for more details.
1770+#
1771+# You should have received a copy of the GNU General Public License
523b37e3 1772+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1773+
1774+Policies to Select One among Multiple Writable Branches
1775+----------------------------------------------------------------------
1776+When the number of writable branch is more than one, aufs has to decide
1777+the target branch for file creation or copy-up. By default, the highest
1778+writable branch which has the parent (or ancestor) dir of the target
1779+file is chosen (top-down-parent policy).
1780+By user's request, aufs implements some other policies to select the
7e9cd9fe
AM
1781+writable branch, for file creation several policies, round-robin,
1782+most-free-space, and other policies. For copy-up, top-down-parent,
1783+bottom-up-parent, bottom-up and others.
53392da6
AM
1784+
1785+As expected, the round-robin policy selects the branch in circular. When
1786+you have two writable branches and creates 10 new files, 5 files will be
1787+created for each branch. mkdir(2) systemcall is an exception. When you
1788+create 10 new directories, all will be created on the same branch.
1789+And the most-free-space policy selects the one which has most free
1790+space among the writable branches. The amount of free space will be
1791+checked by aufs internally, and users can specify its time interval.
1792+
1793+The policies for copy-up is more simple,
1794+top-down-parent is equivalent to the same named on in create policy,
1795+bottom-up-parent selects the writable branch where the parent dir
1796+exists and the nearest upper one from the copyup-source,
1797+bottom-up selects the nearest upper writable branch from the
1798+copyup-source, regardless the existence of the parent dir.
1799+
1800+There are some rules or exceptions to apply these policies.
1801+- If there is a readonly branch above the policy-selected branch and
1802+ the parent dir is marked as opaque (a variation of whiteout), or the
1803+ target (creating) file is whiteout-ed on the upper readonly branch,
1804+ then the result of the policy is ignored and the target file will be
1805+ created on the nearest upper writable branch than the readonly branch.
1806+- If there is a writable branch above the policy-selected branch and
1807+ the parent dir is marked as opaque or the target file is whiteouted
1808+ on the branch, then the result of the policy is ignored and the target
1809+ file will be created on the highest one among the upper writable
1810+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1811+ it as usual.
1812+- link(2) and rename(2) systemcalls are exceptions in every policy.
1813+ They try selecting the branch where the source exists as possible
1814+ since copyup a large file will take long time. If it can't be,
1815+ ie. the branch where the source exists is readonly, then they will
1816+ follow the copyup policy.
1817+- There is an exception for rename(2) when the target exists.
1818+ If the rename target exists, aufs compares the index of the branches
1819+ where the source and the target exists and selects the higher
1820+ one. If the selected branch is readonly, then aufs follows the
1821+ copyup policy.
076b876e
AM
1822diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1823--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1824+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2015-09-24 10:47:58.244719488 +0200
076b876e
AM
1825@@ -0,0 +1,120 @@
1826+
2000de60 1827+# Copyright (C) 2011-2015 Junjiro R. Okajima
076b876e
AM
1828+#
1829+# This program is free software; you can redistribute it and/or modify
1830+# it under the terms of the GNU General Public License as published by
1831+# the Free Software Foundation; either version 2 of the License, or
1832+# (at your option) any later version.
1833+#
1834+# This program is distributed in the hope that it will be useful,
1835+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1836+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1837+# GNU General Public License for more details.
1838+#
1839+# You should have received a copy of the GNU General Public License
1840+# along with this program; if not, write to the Free Software
1841+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1842+
1843+
1844+File-based Hierarchical Storage Management (FHSM)
1845+----------------------------------------------------------------------
1846+Hierarchical Storage Management (or HSM) is a well-known feature in the
1847+storage world. Aufs provides this feature as file-based with multiple
7e9cd9fe 1848+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1849+Here the word "colder" means that the less used files, and "lower" means
7e9cd9fe 1850+that the position in the order of the stacked branches vertically.
076b876e
AM
1851+These multiple writable branches are prioritized, ie. the topmost one
1852+should be the fastest drive and be used heavily.
1853+
1854+o Characters in aufs FHSM story
1855+- aufs itself and a new branch attribute.
1856+- a new ioctl interface to move-down and to establish a connection with
1857+ the daemon ("move-down" is a converse of "copy-up").
1858+- userspace tool and daemon.
1859+
1860+The userspace daemon establishes a connection with aufs and waits for
1861+the notification. The notified information is very similar to struct
1862+statfs containing the number of consumed blocks and inodes.
1863+When the consumed blocks/inodes of a branch exceeds the user-specified
1864+upper watermark, the daemon activates its move-down process until the
1865+consumed blocks/inodes reaches the user-specified lower watermark.
1866+
1867+The actual move-down is done by aufs based upon the request from
1868+user-space since we need to maintain the inode number and the internal
1869+pointer arrays in aufs.
1870+
1871+Currently aufs FHSM handles the regular files only. Additionally they
1872+must not be hard-linked nor pseudo-linked.
1873+
1874+
1875+o Cowork of aufs and the user-space daemon
1876+ During the userspace daemon established the connection, aufs sends a
1877+ small notification to it whenever aufs writes something into the
1878+ writable branch. But it may cost high since aufs issues statfs(2)
1879+ internally. So user can specify a new option to cache the
1880+ info. Actually the notification is controlled by these factors.
1881+ + the specified cache time.
1882+ + classified as "force" by aufs internally.
1883+ Until the specified time expires, aufs doesn't send the info
1884+ except the forced cases. When aufs decide forcing, the info is always
1885+ notified to userspace.
1886+ For example, the number of free inodes is generally large enough and
1887+ the shortage of it happens rarely. So aufs doesn't force the
1888+ notification when creating a new file, directory and others. This is
1889+ the typical case which aufs doesn't force.
1890+ When aufs writes the actual filedata and the files consumes any of new
1891+ blocks, the aufs forces notifying.
1892+
1893+
1894+o Interfaces in aufs
1895+- New branch attribute.
1896+ + fhsm
1897+ Specifies that the branch is managed by FHSM feature. In other word,
1898+ participant in the FHSM.
1899+ When nofhsm is set to the branch, it will not be the source/target
1900+ branch of the move-down operation. This attribute is set
1901+ independently from coo and moo attributes, and if you want full
1902+ FHSM, you should specify them as well.
1903+- New mount option.
1904+ + fhsm_sec
1905+ Specifies a second to suppress many less important info to be
1906+ notified.
1907+- New ioctl.
1908+ + AUFS_CTL_FHSM_FD
1909+ create a new file descriptor which userspace can read the notification
1910+ (a subset of struct statfs) from aufs.
1911+- Module parameter 'brs'
1912+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
1913+ be set.
1914+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
1915+ When there are two or more branches with fhsm attributes,
1916+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
1917+ terminates it. As a result of remounting and branch-manipulation, the
1918+ number of branches with fhsm attribute can be one. In this case,
1919+ /sbin/mount.aufs will terminate the user-space daemon.
1920+
1921+
1922+Finally the operation is done as these steps in kernel-space.
1923+- make sure that,
1924+ + no one else is using the file.
1925+ + the file is not hard-linked.
1926+ + the file is not pseudo-linked.
1927+ + the file is a regular file.
1928+ + the parent dir is not opaqued.
1929+- find the target writable branch.
1930+- make sure the file is not whiteout-ed by the upper (than the target)
1931+ branch.
1932+- make the parent dir on the target branch.
1933+- mutex lock the inode on the branch.
1934+- unlink the whiteout on the target branch (if exists).
1935+- lookup and create the whiteout-ed temporary name on the target branch.
1936+- copy the file as the whiteout-ed temporary name on the target branch.
1937+- rename the whiteout-ed temporary name to the original name.
1938+- unlink the file on the source branch.
1939+- maintain the internal pointer array and the external inode number
1940+ table (XINO).
1941+- maintain the timestamps and other attributes of the parent dir and the
1942+ file.
1943+
1944+And of course, in every step, an error may happen. So the operation
1945+should restore the original file state after an error happens.
53392da6
AM
1946diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1947--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 1948+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2015-09-24 10:47:58.244719488 +0200
b912730e 1949@@ -0,0 +1,72 @@
53392da6 1950+
2000de60 1951+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
1952+#
1953+# This program is free software; you can redistribute it and/or modify
1954+# it under the terms of the GNU General Public License as published by
1955+# the Free Software Foundation; either version 2 of the License, or
1956+# (at your option) any later version.
1957+#
1958+# This program is distributed in the hope that it will be useful,
1959+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1960+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1961+# GNU General Public License for more details.
1962+#
1963+# You should have received a copy of the GNU General Public License
523b37e3 1964+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1965+
1966+mmap(2) -- File Memory Mapping
1967+----------------------------------------------------------------------
1968+In aufs, the file-mapped pages are handled by a branch fs directly, no
1969+interaction with aufs. It means aufs_mmap() calls the branch fs's
1970+->mmap().
1971+This approach is simple and good, but there is one problem.
7e9cd9fe 1972+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
1973+device and inode number), and the printed path will be the path on the
1974+branch fs's instead of virtual aufs's.
1975+This is not a problem in most cases, but some utilities lsof(1) (and its
1976+user) may expect the path on aufs.
1977+
1978+To address this issue, aufs adds a new member called vm_prfile in struct
1979+vm_area_struct (and struct vm_region). The original vm_file points to
1980+the file on the branch fs in order to handle everything correctly as
1981+usual. The new vm_prfile points to a virtual file in aufs, and the
1982+show-functions in procfs refers to vm_prfile if it is set.
1983+Also we need to maintain several other places where touching vm_file
1984+such like
1985+- fork()/clone() copies vma and the reference count of vm_file is
1986+ incremented.
1987+- merging vma maintains the ref count too.
1988+
7e9cd9fe 1989+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
1990+leaves all behaviour around f_mapping unchanged. This is surely an
1991+advantage.
1992+Actually aufs had adopted another complicated approach which calls
1993+generic_file_mmap() and handles struct vm_operations_struct. In this
1994+approach, aufs met a hard problem and I could not solve it without
1995+switching the approach.
b912730e
AM
1996+
1997+There may be one more another approach which is
1998+- bind-mount the branch-root onto the aufs-root internally
1999+- grab the new vfsmount (ie. struct mount)
2000+- lazy-umount the branch-root internally
2001+- in open(2) the aufs-file, open the branch-file with the hidden
2002+ vfsmount (instead of the original branch's vfsmount)
2003+- ideally this "bind-mount and lazy-umount" should be done atomically,
2004+ but it may be possible from userspace by the mount helper.
2005+
2006+Adding the internal hidden vfsmount and using it in opening a file, the
2007+file path under /proc will be printed correctly. This approach looks
2008+smarter, but is not possible I am afraid.
2009+- aufs-root may be bind-mount later. when it happens, another hidden
2010+ vfsmount will be required.
2011+- it is hard to get the chance to bind-mount and lazy-umount
2012+ + in kernel-space, FS can have vfsmount in open(2) via
2013+ file->f_path, and aufs can know its vfsmount. But several locks are
2014+ already acquired, and if aufs tries to bind-mount and lazy-umount
2015+ here, then it may cause a deadlock.
2016+ + in user-space, bind-mount doesn't invoke the mount helper.
2017+- since /proc shows dev and ino, aufs has to give vma these info. it
2018+ means a new member vm_prinode will be necessary. this is essentially
2019+ equivalent to vm_prfile described above.
2020+
2021+I have to give up this "looks-smater" approach.
c1595e42
JR
2022diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2023--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2024+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2015-09-24 10:47:58.244719488 +0200
c1595e42
JR
2025@@ -0,0 +1,96 @@
2026+
2000de60 2027+# Copyright (C) 2014-2015 Junjiro R. Okajima
c1595e42
JR
2028+#
2029+# This program is free software; you can redistribute it and/or modify
2030+# it under the terms of the GNU General Public License as published by
2031+# the Free Software Foundation; either version 2 of the License, or
2032+# (at your option) any later version.
2033+#
2034+# This program is distributed in the hope that it will be useful,
2035+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2036+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2037+# GNU General Public License for more details.
2038+#
2039+# You should have received a copy of the GNU General Public License
2040+# along with this program; if not, write to the Free Software
2041+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2042+
2043+
2044+Listing XATTR/EA and getting the value
2045+----------------------------------------------------------------------
2046+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2047+shows the values from the topmost existing file. This behaviour is good
7e9cd9fe 2048+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
2049+information. But for the directories, aufs considers all the same named
2050+entries on the lower branches. Which means, if one of the lower entry
2051+rejects readdir call, then aufs returns an error even if the topmost
2052+entry allows it. This behaviour is necessary to respect the branch fs's
2053+security, but can make users confused since the user-visible standard
2054+attributes don't match the behaviour.
2055+To address this issue, aufs has a mount option called dirperm1 which
2056+checks the permission for the topmost entry only, and ignores the lower
2057+entry's permission.
2058+
2059+A similar issue can happen around XATTR.
2060+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7e9cd9fe
AM
2061+always set. Otherwise these very unpleasant situation would happen.
2062+- listxattr(2) may return the duplicated entries.
c1595e42
JR
2063+- users may not be able to remove or reset the XATTR forever,
2064+
2065+
2066+XATTR/EA support in the internal (copy,move)-(up,down)
2067+----------------------------------------------------------------------
7e9cd9fe 2068+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
2069+- "security" for LSM and capability.
2070+- "system" for posix ACL, 'acl' mount option is required for the branch
2071+ fs generally.
2072+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2073+- "user" for userspace, 'user_xattr' mount option is required for the
2074+ branch fs generally.
2075+
2076+Moreover there are some other categories. Aufs handles these rather
2077+unpopular categories as the ordinary ones, ie. there is no special
2078+condition nor exception.
2079+
2080+In copy-up, the support for XATTR on the dst branch may differ from the
2081+src branch. In this case, the copy-up operation will get an error and
7e9cd9fe
AM
2082+the original user operation which triggered the copy-up will fail. It
2083+can happen that even all copy-up will fail.
c1595e42
JR
2084+When both of src and dst branches support XATTR and if an error occurs
2085+during copying XATTR, then the copy-up should fail obviously. That is a
2086+good reason and aufs should return an error to userspace. But when only
7e9cd9fe 2087+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
2088+For example, the src branch supports ACL but the dst branch doesn't
2089+because the dst branch may natively un-support it or temporary
2090+un-support it due to "noacl" mount option. Of course, the dst branch fs
2091+may NOT return an error even if the XATTR is not supported. It is
2092+totally up to the branch fs.
2093+
2094+Anyway when the aufs internal copy-up gets an error from the dst branch
2095+fs, then aufs tries removing the just copied entry and returns the error
2096+to the userspace. The worst case of this situation will be all copy-up
2097+will fail.
2098+
2099+For the copy-up operation, there two basic approaches.
2100+- copy the specified XATTR only (by category above), and return the
7e9cd9fe 2101+ error unconditionally if it happens.
c1595e42
JR
2102+- copy all XATTR, and ignore the error on the specified category only.
2103+
2104+In order to support XATTR and to implement the correct behaviour, aufs
7e9cd9fe
AM
2105+chooses the latter approach and introduces some new branch attributes,
2106+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 2107+They correspond to the XATTR namespaces (see above). Additionally, to be
7e9cd9fe
AM
2108+convenient, "icex" is also provided which means all "icex*" attributes
2109+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
2110+
2111+The meaning of these attributes is to ignore the error from setting
2112+XATTR on that branch.
2113+Note that aufs tries copying all XATTR unconditionally, and ignores the
2114+error from the dst branch according to the specified attributes.
2115+
2116+Some XATTR may have its default value. The default value may come from
2117+the parent dir or the environment. If the default value is set at the
2118+file creating-time, it will be overwritten by copy-up.
2119+Some contradiction may happen I am afraid.
2120+Do we need another attribute to stop copying XATTR? I am unsure. For
2121+now, aufs implements the branch attributes to ignore the error.
53392da6
AM
2122diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2123--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2124+++ linux/Documentation/filesystems/aufs/design/07export.txt 2015-09-24 10:47:58.248052907 +0200
523b37e3 2125@@ -0,0 +1,58 @@
53392da6 2126+
2000de60 2127+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
2128+#
2129+# This program is free software; you can redistribute it and/or modify
2130+# it under the terms of the GNU General Public License as published by
2131+# the Free Software Foundation; either version 2 of the License, or
2132+# (at your option) any later version.
2133+#
2134+# This program is distributed in the hope that it will be useful,
2135+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2136+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2137+# GNU General Public License for more details.
2138+#
2139+# You should have received a copy of the GNU General Public License
523b37e3 2140+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2141+
2142+Export Aufs via NFS
2143+----------------------------------------------------------------------
2144+Here is an approach.
2145+- like xino/xib, add a new file 'xigen' which stores aufs inode
2146+ generation.
2147+- iget_locked(): initialize aufs inode generation for a new inode, and
2148+ store it in xigen file.
2149+- destroy_inode(): increment aufs inode generation and store it in xigen
2150+ file. it is necessary even if it is not unlinked, because any data of
2151+ inode may be changed by UDBA.
2152+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2153+ build file handle by
2154+ + branch id (4 bytes)
2155+ + superblock generation (4 bytes)
2156+ + inode number (4 or 8 bytes)
2157+ + parent dir inode number (4 or 8 bytes)
2158+ + inode generation (4 bytes))
2159+ + return value of exportfs_encode_fh() for the parent on a branch (4
2160+ bytes)
2161+ + file handle for a branch (by exportfs_encode_fh())
2162+- fh_to_dentry():
2163+ + find the index of a branch from its id in handle, and check it is
2164+ still exist in aufs.
2165+ + 1st level: get the inode number from handle and search it in cache.
7e9cd9fe
AM
2166+ + 2nd level: if not found in cache, get the parent inode number from
2167+ the handle and search it in cache. and then open the found parent
2168+ dir, find the matching inode number by vfs_readdir() and get its
2169+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
2170+ + 3rd level: if the parent dir is not cached, call
2171+ exportfs_decode_fh() for a branch and get the parent on a branch,
2172+ build a pathname of it, convert it a pathname in aufs, call
2173+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2174+ the 2nd level.
2175+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2176+ for every branch, but not itself. to get this, (currently) aufs
2177+ searches in current->nsproxy->mnt_ns list. it may not be a good
2178+ idea, but I didn't get other approach.
2179+ + test the generation of the gotten inode.
2180+- every inode operation: they may get EBUSY due to UDBA. in this case,
2181+ convert it into ESTALE for NFSD.
2182+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2183+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2184diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2185--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2186+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2015-09-24 10:47:58.248052907 +0200
523b37e3 2187@@ -0,0 +1,52 @@
53392da6 2188+
2000de60 2189+# Copyright (C) 2005-2015 Junjiro R. Okajima
53392da6
AM
2190+#
2191+# This program is free software; you can redistribute it and/or modify
2192+# it under the terms of the GNU General Public License as published by
2193+# the Free Software Foundation; either version 2 of the License, or
2194+# (at your option) any later version.
2195+#
2196+# This program is distributed in the hope that it will be useful,
2197+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2198+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2199+# GNU General Public License for more details.
2200+#
2201+# You should have received a copy of the GNU General Public License
523b37e3 2202+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2203+
2204+Show Whiteout Mode (shwh)
2205+----------------------------------------------------------------------
2206+Generally aufs hides the name of whiteouts. But in some cases, to show
2207+them is very useful for users. For instance, creating a new middle layer
2208+(branch) by merging existing layers.
2209+
2210+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2211+When you have three branches,
2212+- Bottom: 'system', squashfs (underlying base system), read-only
2213+- Middle: 'mods', squashfs, read-only
2214+- Top: 'overlay', ram (tmpfs), read-write
2215+
2216+The top layer is loaded at boot time and saved at shutdown, to preserve
2217+the changes made to the system during the session.
2218+When larger changes have been made, or smaller changes have accumulated,
2219+the size of the saved top layer data grows. At this point, it would be
2220+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2221+and rewrite the 'mods' squashfs, clearing the top layer and thus
2222+restoring save and load speed.
2223+
2224+This merging is simplified by the use of another aufs mount, of just the
2225+two overlay branches using the 'shwh' option.
2226+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2227+ aufs /livesys/merge_union
2228+
2229+A merged view of these two branches is then available at
2230+/livesys/merge_union, and the new feature is that the whiteouts are
2231+visible!
2232+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2233+writing to all branches. Also the default mode for all branches is 'ro'.
2234+It is now possible to save the combined contents of the two overlay
2235+branches to a new squashfs, e.g.:
2236+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2237+
2238+This new squashfs archive can be stored on the boot device and the
2239+initramfs will use it to replace the old one at the next boot.
2240diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2241--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2242+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2015-09-24 10:47:58.248052907 +0200
7e9cd9fe 2243@@ -0,0 +1,47 @@
53392da6 2244+
2000de60 2245+# Copyright (C) 2010-2015 Junjiro R. Okajima
53392da6
AM
2246+#
2247+# This program is free software; you can redistribute it and/or modify
2248+# it under the terms of the GNU General Public License as published by
2249+# the Free Software Foundation; either version 2 of the License, or
2250+# (at your option) any later version.
2251+#
2252+# This program is distributed in the hope that it will be useful,
2253+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2254+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2255+# GNU General Public License for more details.
2256+#
2257+# You should have received a copy of the GNU General Public License
523b37e3 2258+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2259+
2260+Dynamically customizable FS operations
2261+----------------------------------------------------------------------
2262+Generally FS operations (struct inode_operations, struct
2263+address_space_operations, struct file_operations, etc.) are defined as
2264+"static const", but it never means that FS have only one set of
2265+operation. Some FS have multiple sets of them. For instance, ext2 has
2266+three sets, one for XIP, for NOBH, and for normal.
2267+Since aufs overrides and redirects these operations, sometimes aufs has
7e9cd9fe 2268+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
2269+VFS acts differently if a function (member in the struct) is set or
2270+not. It means aufs should have several sets of operations and select one
2271+among them according to the branch FS definition.
2272+
7e9cd9fe 2273+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 2274+aufs defines these operations dynamically. For instance, aufs defines
7e9cd9fe
AM
2275+dummy direct_IO function for struct address_space_operations, but it may
2276+not be set to the address_space_operations actually. When the branch FS
2277+doesn't have it, aufs doesn't set it to its address_space_operations
2278+while the function definition itself is still alive. So the behaviour
2279+itself will not change, and it will return an error when direct_IO is
2280+not set.
53392da6
AM
2281+
2282+The lifetime of these dynamically generated operation object is
2283+maintained by aufs branch object. When the branch is removed from aufs,
2284+the reference counter of the object is decremented. When it reaches
2285+zero, the dynamically generated operation object will be freed.
2286+
7e9cd9fe
AM
2287+This approach is designed to support AIO (io_submit), Direct I/O and
2288+XIP (DAX) mainly.
2289+Currently this approach is applied to address_space_operations for
2290+regular files only.
53392da6
AM
2291diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2292--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2293+++ linux/Documentation/filesystems/aufs/README 2015-09-24 10:47:58.244719488 +0200
5527c038 2294@@ -0,0 +1,383 @@
53392da6 2295+
5527c038 2296+Aufs4 -- advanced multi layered unification filesystem version 4.x
53392da6
AM
2297+http://aufs.sf.net
2298+Junjiro R. Okajima
2299+
2300+
2301+0. Introduction
2302+----------------------------------------
2303+In the early days, aufs was entirely re-designed and re-implemented
7e9cd9fe 2304+Unionfs Version 1.x series. Adding many original ideas, approaches,
53392da6
AM
2305+improvements and implementations, it becomes totally different from
2306+Unionfs while keeping the basic features.
2307+Recently, Unionfs Version 2.x series begin taking some of the same
2308+approaches to aufs1's.
2309+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2310+University and his team.
2311+
5527c038 2312+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
53392da6
AM
2313+If you want older kernel version support, try aufs2-2.6.git or
2314+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2315+
2316+Note: it becomes clear that "Aufs was rejected. Let's give it up."
38d290e6
JR
2317+ According to Christoph Hellwig, linux rejects all union-type
2318+ filesystems but UnionMount.
53392da6
AM
2319+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2320+
38d290e6
JR
2321+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2322+ UnionMount, and he pointed out an issue around a directory mutex
2323+ lock and aufs addressed it. But it is still unsure whether aufs will
2324+ be merged (or any other union solution).
076b876e 2325+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
38d290e6 2326+
53392da6
AM
2327+
2328+1. Features
2329+----------------------------------------
2330+- unite several directories into a single virtual filesystem. The member
2331+ directory is called as a branch.
2332+- you can specify the permission flags to the branch, which are 'readonly',
2333+ 'readwrite' and 'whiteout-able.'
2334+- by upper writable branch, internal copyup and whiteout, files/dirs on
2335+ readonly branch are modifiable logically.
2336+- dynamic branch manipulation, add, del.
2337+- etc...
2338+
7e9cd9fe
AM
2339+Also there are many enhancements in aufs, such as:
2340+- test only the highest one for the directory permission (dirperm1)
2341+- copyup on open (coo=)
2342+- 'move' policy for copy-up between two writable branches, after
2343+ checking free space.
2344+- xattr, acl
53392da6
AM
2345+- readdir(3) in userspace.
2346+- keep inode number by external inode number table
2347+- keep the timestamps of file/dir in internal copyup operation
2348+- seekable directory, supporting NFS readdir.
2349+- whiteout is hardlinked in order to reduce the consumption of inodes
2350+ on branch
2351+- do not copyup, nor create a whiteout when it is unnecessary
2352+- revert a single systemcall when an error occurs in aufs
2353+- remount interface instead of ioctl
2354+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2355+- loopback mounted filesystem as a branch
2356+- kernel thread for removing the dir who has a plenty of whiteouts
2357+- support copyup sparse file (a file which has a 'hole' in it)
2358+- default permission flags for branches
2359+- selectable permission flags for ro branch, whether whiteout can
2360+ exist or not
2361+- export via NFS.
2362+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2363+- support multiple writable branches, some policies to select one
2364+ among multiple writable branches.
2365+- a new semantics for link(2) and rename(2) to support multiple
2366+ writable branches.
2367+- no glibc changes are required.
2368+- pseudo hardlink (hardlink over branches)
2369+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2370+ including NFS or remote filesystem branch.
2371+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2372+- and more...
2373+
5527c038 2374+Currently these features are dropped temporary from aufs4.
53392da6 2375+See design/08plan.txt in detail.
53392da6
AM
2376+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2377+ (robr)
2378+- statistics of aufs thread (/sys/fs/aufs/stat)
53392da6
AM
2379+
2380+Features or just an idea in the future (see also design/*.txt),
2381+- reorder the branch index without del/re-add.
2382+- permanent xino files for NFSD
2383+- an option for refreshing the opened files after add/del branches
53392da6
AM
2384+- light version, without branch manipulation. (unnecessary?)
2385+- copyup in userspace
2386+- inotify in userspace
2387+- readv/writev
53392da6
AM
2388+
2389+
2390+2. Download
2391+----------------------------------------
5527c038
JR
2392+There are three GIT trees for aufs4, aufs4-linux.git,
2393+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
1e00d052 2394+"aufs-util.git."
5527c038
JR
2395+While the aufs-util is always necessary, you need either of aufs4-linux
2396+or aufs4-standalone.
1e00d052 2397+
5527c038 2398+The aufs4-linux tree includes the whole linux mainline GIT tree,
1e00d052
AM
2399+git://git.kernel.org/.../torvalds/linux.git.
2400+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
5527c038 2401+build aufs4 as an external kernel module.
2000de60 2402+Several extra patches are not included in this tree. Only
5527c038 2403+aufs4-standalone tree contains them. They are describe in the later
2000de60 2404+section "Configuration and Compilation."
1e00d052 2405+
5527c038 2406+On the other hand, the aufs4-standalone tree has only aufs source files
53392da6 2407+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2000de60 2408+But you need to apply all aufs patches manually.
53392da6 2409+
5527c038
JR
2410+You will find GIT branches whose name is in form of "aufs4.x" where "x"
2411+represents the linux kernel version, "linux-4.x". For instance,
2412+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
2413+"aufs4.x-rcN" branch.
1e00d052 2414+
5527c038 2415+o aufs4-linux tree
1e00d052 2416+$ git clone --reference /your/linux/git/tree \
5527c038 2417+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
1e00d052 2418+- if you don't have linux GIT tree, then remove "--reference ..."
5527c038
JR
2419+$ cd aufs4-linux.git
2420+$ git checkout origin/aufs4.0
53392da6 2421+
2000de60
JR
2422+Or You may want to directly git-pull aufs into your linux GIT tree, and
2423+leave the patch-work to GIT.
2424+$ cd /your/linux/git/tree
5527c038
JR
2425+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
2426+$ git fetch aufs4
2427+$ git checkout -b my4.0 v4.0
2428+$ (add your local change...)
2429+$ git pull aufs4 aufs4.0
2430+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
2000de60 2431+- you may need to solve some conflicts between your_changes and
5527c038
JR
2432+ aufs4.0. in this case, git-rerere is recommended so that you can
2433+ solve the similar conflicts automatically when you upgrade to 4.1 or
2000de60
JR
2434+ later in the future.
2435+
5527c038
JR
2436+o aufs4-standalone tree
2437+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
2438+$ cd aufs4-standalone.git
2439+$ git checkout origin/aufs4.0
53392da6
AM
2440+
2441+o aufs-util tree
5527c038
JR
2442+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2443+- note that the public aufs-util.git is on SourceForge instead of
2444+ GitHUB.
53392da6 2445+$ cd aufs-util.git
5527c038 2446+$ git checkout origin/aufs4.0
53392da6 2447+
5527c038
JR
2448+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
2449+The minor version number, 'x' in '4.x', of aufs may not always
9dbd164d
AM
2450+follow the minor version number of the kernel.
2451+Because changes in the kernel that cause the use of a new
2452+minor version number do not always require changes to aufs-util.
2453+
2454+Since aufs-util has its own minor version number, you may not be
2455+able to find a GIT branch in aufs-util for your kernel's
2456+exact minor version number.
2457+In this case, you should git-checkout the branch for the
53392da6 2458+nearest lower number.
9dbd164d
AM
2459+
2460+For (an unreleased) example:
5527c038
JR
2461+If you are using "linux-4.10" and the "aufs4.10" branch
2462+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
9dbd164d
AM
2463+or something numerically smaller is the branch for your kernel.
2464+
53392da6
AM
2465+Also you can view all branches by
2466+ $ git branch -a
2467+
2468+
2469+3. Configuration and Compilation
2470+----------------------------------------
2471+Make sure you have git-checkout'ed the correct branch.
2472+
5527c038 2473+For aufs4-linux tree,
c06a8ce3 2474+- enable CONFIG_AUFS_FS.
1e00d052
AM
2475+- set other aufs configurations if necessary.
2476+
5527c038 2477+For aufs4-standalone tree,
53392da6
AM
2478+There are several ways to build.
2479+
2480+1.
5527c038
JR
2481+- apply ./aufs4-kbuild.patch to your kernel source files.
2482+- apply ./aufs4-base.patch too.
2483+- apply ./aufs4-mmap.patch too.
2484+- apply ./aufs4-standalone.patch too, if you have a plan to set
2485+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
537831f9
AM
2486+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2487+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
c06a8ce3 2488+- enable CONFIG_AUFS_FS, you can select either
53392da6
AM
2489+ =m or =y.
2490+- and build your kernel as usual.
2491+- install the built kernel.
c06a8ce3
AM
2492+ Note: Since linux-3.9, every filesystem module requires an alias
2493+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2494+ modules.aliases file if you set CONFIG_AUFS_FS=m.
7eafdf33
AM
2495+- install the header files too by "make headers_install" to the
2496+ directory where you specify. By default, it is $PWD/usr.
b4510431 2497+ "make help" shows a brief note for headers_install.
53392da6
AM
2498+- and reboot your system.
2499+
2500+2.
2501+- module only (CONFIG_AUFS_FS=m).
5527c038
JR
2502+- apply ./aufs4-base.patch to your kernel source files.
2503+- apply ./aufs4-mmap.patch too.
2504+- apply ./aufs4-standalone.patch too.
53392da6
AM
2505+- build your kernel, don't forget "make headers_install", and reboot.
2506+- edit ./config.mk and set other aufs configurations if necessary.
b4510431 2507+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
53392da6
AM
2508+ every aufs configurations.
2509+- build the module by simple "make".
c06a8ce3
AM
2510+ Note: Since linux-3.9, every filesystem module requires an alias
2511+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2512+ modules.aliases file.
53392da6
AM
2513+- you can specify ${KDIR} make variable which points to your kernel
2514+ source tree.
2515+- install the files
2516+ + run "make install" to install the aufs module, or copy the built
b4510431
AM
2517+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2518+ + run "make install_headers" (instead of headers_install) to install
2519+ the modified aufs header file (you can specify DESTDIR which is
2520+ available in aufs standalone version's Makefile only), or copy
2521+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2522+ you like manually. By default, the target directory is $PWD/usr.
5527c038 2523+- no need to apply aufs4-kbuild.patch, nor copying source files to your
53392da6
AM
2524+ kernel source tree.
2525+
b4510431 2526+Note: The header file aufs_type.h is necessary to build aufs-util
53392da6
AM
2527+ as well as "make headers_install" in the kernel source tree.
2528+ headers_install is subject to be forgotten, but it is essentially
2529+ necessary, not only for building aufs-util.
2530+ You may not meet problems without headers_install in some older
2531+ version though.
2532+
2533+And then,
2534+- read README in aufs-util, build and install it
9dbd164d
AM
2535+- note that your distribution may contain an obsoleted version of
2536+ aufs_type.h in /usr/include/linux or something. When you build aufs
2537+ utilities, make sure that your compiler refers the correct aufs header
2538+ file which is built by "make headers_install."
53392da6
AM
2539+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2540+ then run "make install_ulib" too. And refer to the aufs manual in
2541+ detail.
2542+
5527c038 2543+There several other patches in aufs4-standalone.git. They are all
38d290e6 2544+optional. When you meet some problems, they will help you.
5527c038 2545+- aufs4-loopback.patch
38d290e6
JR
2546+ Supports a nested loopback mount in a branch-fs. This patch is
2547+ unnecessary until aufs produces a message like "you may want to try
2548+ another patch for loopback file".
2549+- vfs-ino.patch
2550+ Modifies a system global kernel internal function get_next_ino() in
2551+ order to stop assigning 0 for an inode-number. Not directly related to
2552+ aufs, but recommended generally.
2553+- tmpfs-idr.patch
2554+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2555+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2556+ duplication of inode number, which is important for backup tools and
2557+ other utilities. When you find aufs XINO files for tmpfs branch
2558+ growing too much, try this patch.
2559+
53392da6
AM
2560+
2561+4. Usage
2562+----------------------------------------
2563+At first, make sure aufs-util are installed, and please read the aufs
2564+manual, aufs.5 in aufs-util.git tree.
2565+$ man -l aufs.5
2566+
2567+And then,
2568+$ mkdir /tmp/rw /tmp/aufs
2569+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2570+
2571+Here is another example. The result is equivalent.
2572+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2573+ Or
2574+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2575+# mount -o remount,append:${HOME} /tmp/aufs
2576+
2577+Then, you can see whole tree of your home dir through /tmp/aufs. If
2578+you modify a file under /tmp/aufs, the one on your home directory is
2579+not affected, instead the same named file will be newly created under
2580+/tmp/rw. And all of your modification to a file will be applied to
2581+the one under /tmp/rw. This is called the file based Copy on Write
2582+(COW) method.
2583+Aufs mount options are described in aufs.5.
2584+If you run chroot or something and make your aufs as a root directory,
2585+then you need to customize the shutdown script. See the aufs manual in
2586+detail.
2587+
2588+Additionally, there are some sample usages of aufs which are a
2589+diskless system with network booting, and LiveCD over NFS.
2590+See sample dir in CVS tree on SourceForge.
2591+
2592+
2593+5. Contact
2594+----------------------------------------
2595+When you have any problems or strange behaviour in aufs, please let me
2596+know with:
2597+- /proc/mounts (instead of the output of mount(8))
2598+- /sys/module/aufs/*
2599+- /sys/fs/aufs/* (if you have them)
2600+- /debug/aufs/* (if you have them)
2601+- linux kernel version
2602+ if your kernel is not plain, for example modified by distributor,
2603+ the url where i can download its source is necessary too.
2604+- aufs version which was printed at loading the module or booting the
2605+ system, instead of the date you downloaded.
2606+- configuration (define/undefine CONFIG_AUFS_xxx)
2607+- kernel configuration or /proc/config.gz (if you have it)
2608+- behaviour which you think to be incorrect
2609+- actual operation, reproducible one is better
2610+- mailto: aufs-users at lists.sourceforge.net
2611+
2612+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2613+and Feature Requests) on SourceForge. Please join and write to
2614+aufs-users ML.
2615+
2616+
2617+6. Acknowledgements
2618+----------------------------------------
2619+Thanks to everyone who have tried and are using aufs, whoever
2620+have reported a bug or any feedback.
2621+
2622+Especially donators:
2623+Tomas Matejicek(slax.org) made a donation (much more than once).
2624+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2625+ scripts) is making "doubling" donations.
2626+ Unfortunately I cannot list all of the donators, but I really
b4510431 2627+ appreciate.
53392da6
AM
2628+ It ends Aug 2010, but the ordinary donation URL is still available.
2629+ <http://sourceforge.net/donate/index.php?group_id=167503>
2630+Dai Itasaka made a donation (2007/8).
2631+Chuck Smith made a donation (2008/4, 10 and 12).
2632+Henk Schoneveld made a donation (2008/9).
2633+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2634+Francois Dupoux made a donation (2008/11).
2635+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2636+ aufs2 GIT tree (2009/2).
2637+William Grant made a donation (2009/3).
2638+Patrick Lane made a donation (2009/4).
2639+The Mail Archive (mail-archive.com) made donations (2009/5).
2640+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2641+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2642+Pavel Pronskiy made a donation (2011/2).
2643+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2644+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
537831f9
AM
2645+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2646+11).
1e00d052 2647+Sam Liddicott made a donation (2011/9).
86dc4139
AM
2648+Era Scarecrow made a donation (2013/4).
2649+Bor Ratajc made a donation (2013/4).
2650+Alessandro Gorreta made a donation (2013/4).
2651+POIRETTE Marc made a donation (2013/4).
2652+Alessandro Gorreta made a donation (2013/4).
2653+lauri kasvandik made a donation (2013/5).
392086de 2654+"pemasu from Finland" made a donation (2013/7).
523b37e3
AM
2655+The Parted Magic Project made a donation (2013/9 and 11).
2656+Pavel Barta made a donation (2013/10).
38d290e6 2657+Nikolay Pertsev made a donation (2014/5).
ab036dbd 2658+James B made a donation (2014/7 and 2015/7).
076b876e 2659+Stefano Di Biase made a donation (2014/8).
2000de60 2660+Daniel Epellei made a donation (2015/1).
53392da6
AM
2661+
2662+Thank you very much.
2663+Donations are always, including future donations, very important and
2664+helpful for me to keep on developing aufs.
2665+
2666+
2667+7.
2668+----------------------------------------
2669+If you are an experienced user, no explanation is needed. Aufs is
2670+just a linux filesystem.
2671+
2672+
2673+Enjoy!
2674+
2675+# Local variables: ;
2676+# mode: text;
2677+# End: ;
7f207e10
AM
2678diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2679--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2680+++ linux/fs/aufs/aufs.h 2015-09-24 10:47:58.248052907 +0200
523b37e3 2681@@ -0,0 +1,59 @@
7f207e10 2682+/*
2000de60 2683+ * Copyright (C) 2005-2015 Junjiro R. Okajima
7f207e10
AM
2684+ *
2685+ * This program, aufs is free software; you can redistribute it and/or modify
2686+ * it under the terms of the GNU General Public License as published by
2687+ * the Free Software Foundation; either version 2 of the License, or
2688+ * (at your option) any later version.
2689+ *
2690+ * This program is distributed in the hope that it will be useful,
2691+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2692+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2693+ * GNU General Public License for more details.
2694+ *
2695+ * You should have received a copy of the GNU General Public License
523b37e3 2696+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2697+ */
2698+
2699+/*
2700+ * all header files
2701+ */
2702+
2703+#ifndef __AUFS_H__
2704+#define __AUFS_H__
2705+
2706+#ifdef __KERNEL__
2707+
2708+#define AuStub(type, name, body, ...) \
2709+ static inline type name(__VA_ARGS__) { body; }
2710+
2711+#define AuStubVoid(name, ...) \
2712+ AuStub(void, name, , __VA_ARGS__)
2713+#define AuStubInt0(name, ...) \
2714+ AuStub(int, name, return 0, __VA_ARGS__)
2715+
2716+#include "debug.h"
2717+
2718+#include "branch.h"
2719+#include "cpup.h"
2720+#include "dcsub.h"
2721+#include "dbgaufs.h"
2722+#include "dentry.h"
2723+#include "dir.h"
2724+#include "dynop.h"
2725+#include "file.h"
2726+#include "fstype.h"
2727+#include "inode.h"
2728+#include "loop.h"
2729+#include "module.h"
7f207e10
AM
2730+#include "opts.h"
2731+#include "rwsem.h"
2732+#include "spl.h"
2733+#include "super.h"
2734+#include "sysaufs.h"
2735+#include "vfsub.h"
2736+#include "whout.h"
2737+#include "wkq.h"
2738+
2739+#endif /* __KERNEL__ */
2740+#endif /* __AUFS_H__ */
2741diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2742--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 2743+++ linux/fs/aufs/branch.c 2015-12-10 18:46:31.223310574 +0100
5527c038 2744@@ -0,0 +1,1414 @@
7f207e10 2745+/*
2000de60 2746+ * Copyright (C) 2005-2015 Junjiro R. Okajima
7f207e10
AM
2747+ *
2748+ * This program, aufs is free software; you can redistribute it and/or modify
2749+ * it under the terms of the GNU General Public License as published by
2750+ * the Free Software Foundation; either version 2 of the License, or
2751+ * (at your option) any later version.
2752+ *
2753+ * This program is distributed in the hope that it will be useful,
2754+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2755+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2756+ * GNU General Public License for more details.
2757+ *
2758+ * You should have received a copy of the GNU General Public License
523b37e3 2759+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2760+ */
2761+
2762+/*
2763+ * branch management
2764+ */
2765+
027c5e7a 2766+#include <linux/compat.h>
7f207e10
AM
2767+#include <linux/statfs.h>
2768+#include "aufs.h"
2769+
2770+/*
2771+ * free a single branch
1facf9fc 2772+ */
2773+static void au_br_do_free(struct au_branch *br)
2774+{
2775+ int i;
2776+ struct au_wbr *wbr;
4a4d8108 2777+ struct au_dykey **key;
1facf9fc 2778+
027c5e7a
AM
2779+ au_hnotify_fin_br(br);
2780+
1facf9fc 2781+ if (br->br_xino.xi_file)
2782+ fput(br->br_xino.xi_file);
2783+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2784+
2785+ AuDebugOn(atomic_read(&br->br_count));
2786+
2787+ wbr = br->br_wbr;
2788+ if (wbr) {
2789+ for (i = 0; i < AuBrWh_Last; i++)
2790+ dput(wbr->wbr_wh[i]);
2791+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2792+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2793+ }
2794+
076b876e
AM
2795+ if (br->br_fhsm) {
2796+ au_br_fhsm_fin(br->br_fhsm);
2797+ kfree(br->br_fhsm);
2798+ }
2799+
4a4d8108
AM
2800+ key = br->br_dykey;
2801+ for (i = 0; i < AuBrDynOp; i++, key++)
2802+ if (*key)
2803+ au_dy_put(*key);
2804+ else
2805+ break;
2806+
537831f9
AM
2807+ /* recursive lock, s_umount of branch's */
2808+ lockdep_off();
86dc4139 2809+ path_put(&br->br_path);
537831f9 2810+ lockdep_on();
1facf9fc 2811+ kfree(wbr);
2812+ kfree(br);
2813+}
2814+
2815+/*
2816+ * frees all branches
2817+ */
2818+void au_br_free(struct au_sbinfo *sbinfo)
2819+{
2820+ aufs_bindex_t bmax;
2821+ struct au_branch **br;
2822+
dece6358
AM
2823+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2824+
1facf9fc 2825+ bmax = sbinfo->si_bend + 1;
2826+ br = sbinfo->si_branch;
2827+ while (bmax--)
2828+ au_br_do_free(*br++);
2829+}
2830+
2831+/*
2832+ * find the index of a branch which is specified by @br_id.
2833+ */
2834+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2835+{
2836+ aufs_bindex_t bindex, bend;
2837+
2838+ bend = au_sbend(sb);
2839+ for (bindex = 0; bindex <= bend; bindex++)
2840+ if (au_sbr_id(sb, bindex) == br_id)
2841+ return bindex;
2842+ return -1;
2843+}
2844+
2845+/* ---------------------------------------------------------------------- */
2846+
2847+/*
2848+ * add a branch
2849+ */
2850+
b752ccd1
AM
2851+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2852+ struct dentry *h_root)
1facf9fc 2853+{
b752ccd1
AM
2854+ if (unlikely(h_adding == h_root
2855+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2856+ return 1;
b752ccd1
AM
2857+ if (h_adding->d_sb != h_root->d_sb)
2858+ return 0;
2859+ return au_test_subdir(h_adding, h_root)
2860+ || au_test_subdir(h_root, h_adding);
1facf9fc 2861+}
2862+
2863+/*
2864+ * returns a newly allocated branch. @new_nbranch is a number of branches
2865+ * after adding a branch.
2866+ */
2867+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
2868+ int perm)
2869+{
2870+ struct au_branch *add_branch;
2871+ struct dentry *root;
5527c038 2872+ struct inode *inode;
4a4d8108 2873+ int err;
1facf9fc 2874+
4a4d8108 2875+ err = -ENOMEM;
1facf9fc 2876+ root = sb->s_root;
2877+ add_branch = kmalloc(sizeof(*add_branch), GFP_NOFS);
2878+ if (unlikely(!add_branch))
2879+ goto out;
2880+
027c5e7a
AM
2881+ err = au_hnotify_init_br(add_branch, perm);
2882+ if (unlikely(err))
2883+ goto out_br;
2884+
1facf9fc 2885+ add_branch->br_wbr = NULL;
2886+ if (au_br_writable(perm)) {
2887+ /* may be freed separately at changing the branch permission */
2888+ add_branch->br_wbr = kmalloc(sizeof(*add_branch->br_wbr),
2889+ GFP_NOFS);
2890+ if (unlikely(!add_branch->br_wbr))
027c5e7a 2891+ goto out_hnotify;
1facf9fc 2892+ }
2893+
076b876e
AM
2894+ add_branch->br_fhsm = NULL;
2895+ if (au_br_fhsm(perm)) {
2896+ err = au_fhsm_br_alloc(add_branch);
2897+ if (unlikely(err))
2898+ goto out_wbr;
2899+ }
2900+
4a4d8108
AM
2901+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
2902+ if (!err)
2903+ err = au_di_realloc(au_di(root), new_nbranch);
5527c038
JR
2904+ if (!err) {
2905+ inode = d_inode(root);
2906+ err = au_ii_realloc(au_ii(inode), new_nbranch);
2907+ }
4a4d8108
AM
2908+ if (!err)
2909+ return add_branch; /* success */
1facf9fc 2910+
076b876e 2911+out_wbr:
1facf9fc 2912+ kfree(add_branch->br_wbr);
027c5e7a
AM
2913+out_hnotify:
2914+ au_hnotify_fin_br(add_branch);
4f0767ce 2915+out_br:
1facf9fc 2916+ kfree(add_branch);
4f0767ce 2917+out:
4a4d8108 2918+ return ERR_PTR(err);
1facf9fc 2919+}
2920+
2921+/*
2922+ * test if the branch permission is legal or not.
2923+ */
2924+static int test_br(struct inode *inode, int brperm, char *path)
2925+{
2926+ int err;
2927+
4a4d8108
AM
2928+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
2929+ if (!err)
2930+ goto out;
1facf9fc 2931+
4a4d8108
AM
2932+ err = -EINVAL;
2933+ pr_err("write permission for readonly mount or inode, %s\n", path);
2934+
4f0767ce 2935+out:
1facf9fc 2936+ return err;
2937+}
2938+
2939+/*
2940+ * returns:
2941+ * 0: success, the caller will add it
2942+ * plus: success, it is already unified, the caller should ignore it
2943+ * minus: error
2944+ */
2945+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
2946+{
2947+ int err;
2948+ aufs_bindex_t bend, bindex;
5527c038 2949+ struct dentry *root, *h_dentry;
1facf9fc 2950+ struct inode *inode, *h_inode;
2951+
2952+ root = sb->s_root;
2953+ bend = au_sbend(sb);
2954+ if (unlikely(bend >= 0
2955+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
2956+ err = 1;
2957+ if (!remount) {
2958+ err = -EINVAL;
4a4d8108 2959+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 2960+ }
2961+ goto out;
2962+ }
2963+
2964+ err = -ENOSPC; /* -E2BIG; */
2965+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
2966+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 2967+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 2968+ goto out;
2969+ }
2970+
2971+ err = -EDOM;
2972+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 2973+ pr_err("bad index %d\n", add->bindex);
1facf9fc 2974+ goto out;
2975+ }
2976+
5527c038 2977+ inode = d_inode(add->path.dentry);
1facf9fc 2978+ err = -ENOENT;
2979+ if (unlikely(!inode->i_nlink)) {
4a4d8108 2980+ pr_err("no existence %s\n", add->pathname);
1facf9fc 2981+ goto out;
2982+ }
2983+
2984+ err = -EINVAL;
2985+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 2986+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 2987+ goto out;
2988+ }
2989+
2990+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
2991+ pr_err("unsupported filesystem, %s (%s)\n",
2992+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 2993+ goto out;
2994+ }
2995+
c1595e42
JR
2996+ if (unlikely(inode->i_sb->s_stack_depth)) {
2997+ pr_err("already stacked, %s (%s)\n",
2998+ add->pathname, au_sbtype(inode->i_sb));
2999+ goto out;
3000+ }
3001+
5527c038 3002+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
1facf9fc 3003+ if (unlikely(err))
3004+ goto out;
3005+
3006+ if (bend < 0)
3007+ return 0; /* success */
3008+
3009+ err = -EINVAL;
3010+ for (bindex = 0; bindex <= bend; bindex++)
3011+ if (unlikely(test_overlap(sb, add->path.dentry,
3012+ au_h_dptr(root, bindex)))) {
4a4d8108 3013+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 3014+ goto out;
3015+ }
3016+
3017+ err = 0;
3018+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
5527c038
JR
3019+ h_dentry = au_h_dptr(root, 0);
3020+ h_inode = d_inode(h_dentry);
1facf9fc 3021+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
3022+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3023+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3024+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3025+ add->pathname,
3026+ i_uid_read(inode), i_gid_read(inode),
3027+ (inode->i_mode & S_IALLUGO),
3028+ i_uid_read(h_inode), i_gid_read(h_inode),
3029+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 3030+ }
3031+
4f0767ce 3032+out:
1facf9fc 3033+ return err;
3034+}
3035+
3036+/*
3037+ * initialize or clean the whiteouts for an adding branch
3038+ */
3039+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 3040+ int new_perm)
1facf9fc 3041+{
3042+ int err, old_perm;
3043+ aufs_bindex_t bindex;
3044+ struct mutex *h_mtx;
3045+ struct au_wbr *wbr;
3046+ struct au_hinode *hdir;
5527c038 3047+ struct dentry *h_dentry;
1facf9fc 3048+
86dc4139
AM
3049+ err = vfsub_mnt_want_write(au_br_mnt(br));
3050+ if (unlikely(err))
3051+ goto out;
3052+
1facf9fc 3053+ wbr = br->br_wbr;
3054+ old_perm = br->br_perm;
3055+ br->br_perm = new_perm;
3056+ hdir = NULL;
3057+ h_mtx = NULL;
3058+ bindex = au_br_index(sb, br->br_id);
3059+ if (0 <= bindex) {
5527c038 3060+ hdir = au_hi(d_inode(sb->s_root), bindex);
4a4d8108 3061+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 3062+ } else {
5527c038
JR
3063+ h_dentry = au_br_dentry(br);
3064+ h_mtx = &d_inode(h_dentry)->i_mutex;
1facf9fc 3065+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
3066+ }
3067+ if (!wbr)
86dc4139 3068+ err = au_wh_init(br, sb);
1facf9fc 3069+ else {
3070+ wbr_wh_write_lock(wbr);
86dc4139 3071+ err = au_wh_init(br, sb);
1facf9fc 3072+ wbr_wh_write_unlock(wbr);
3073+ }
3074+ if (hdir)
4a4d8108 3075+ au_hn_imtx_unlock(hdir);
1facf9fc 3076+ else
3077+ mutex_unlock(h_mtx);
86dc4139 3078+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 3079+ br->br_perm = old_perm;
3080+
3081+ if (!err && wbr && !au_br_writable(new_perm)) {
3082+ kfree(wbr);
3083+ br->br_wbr = NULL;
3084+ }
3085+
86dc4139 3086+out:
1facf9fc 3087+ return err;
3088+}
3089+
3090+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 3091+ int perm)
1facf9fc 3092+{
3093+ int err;
4a4d8108 3094+ struct kstatfs kst;
1facf9fc 3095+ struct au_wbr *wbr;
3096+
3097+ wbr = br->br_wbr;
dece6358 3098+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 3099+ memset(wbr->wbr_wh, 0, sizeof(wbr->wbr_wh));
3100+ atomic_set(&wbr->wbr_wh_running, 0);
3101+ wbr->wbr_bytes = 0;
3102+
4a4d8108
AM
3103+ /*
3104+ * a limit for rmdir/rename a dir
523b37e3 3105+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 3106+ */
86dc4139 3107+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
3108+ if (unlikely(err))
3109+ goto out;
3110+ err = -EINVAL;
3111+ if (kst.f_namelen >= NAME_MAX)
86dc4139 3112+ err = au_br_init_wh(sb, br, perm);
4a4d8108 3113+ else
523b37e3
AM
3114+ pr_err("%pd(%s), unsupported namelen %ld\n",
3115+ au_br_dentry(br),
86dc4139 3116+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 3117+
4f0767ce 3118+out:
1facf9fc 3119+ return err;
3120+}
3121+
c1595e42 3122+/* initialize a new branch */
1facf9fc 3123+static int au_br_init(struct au_branch *br, struct super_block *sb,
3124+ struct au_opt_add *add)
3125+{
3126+ int err;
5527c038 3127+ struct inode *h_inode;
1facf9fc 3128+
3129+ err = 0;
3130+ memset(&br->br_xino, 0, sizeof(br->br_xino));
3131+ mutex_init(&br->br_xino.xi_nondir_mtx);
3132+ br->br_perm = add->perm;
86dc4139 3133+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108
AM
3134+ spin_lock_init(&br->br_dykey_lock);
3135+ memset(br->br_dykey, 0, sizeof(br->br_dykey));
1facf9fc 3136+ atomic_set(&br->br_count, 0);
1facf9fc 3137+ atomic_set(&br->br_xino_running, 0);
3138+ br->br_id = au_new_br_id(sb);
7f207e10 3139+ AuDebugOn(br->br_id < 0);
1facf9fc 3140+
3141+ if (au_br_writable(add->perm)) {
86dc4139 3142+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 3143+ if (unlikely(err))
b752ccd1 3144+ goto out_err;
1facf9fc 3145+ }
3146+
3147+ if (au_opt_test(au_mntflags(sb), XINO)) {
5527c038
JR
3148+ h_inode = d_inode(add->path.dentry);
3149+ err = au_xino_br(sb, br, h_inode->i_ino,
1facf9fc 3150+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3151+ if (unlikely(err)) {
3152+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 3153+ goto out_err;
1facf9fc 3154+ }
3155+ }
3156+
3157+ sysaufs_br_init(br);
86dc4139 3158+ path_get(&br->br_path);
b752ccd1 3159+ goto out; /* success */
1facf9fc 3160+
4f0767ce 3161+out_err:
86dc4139 3162+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 3163+out:
1facf9fc 3164+ return err;
3165+}
3166+
3167+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
3168+ struct au_branch *br, aufs_bindex_t bend,
3169+ aufs_bindex_t amount)
3170+{
3171+ struct au_branch **brp;
3172+
dece6358
AM
3173+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3174+
1facf9fc 3175+ brp = sbinfo->si_branch + bindex;
3176+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3177+ *brp = br;
3178+ sbinfo->si_bend++;
3179+ if (unlikely(bend < 0))
3180+ sbinfo->si_bend = 0;
3181+}
3182+
3183+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
3184+ aufs_bindex_t bend, aufs_bindex_t amount)
3185+{
3186+ struct au_hdentry *hdp;
3187+
1308ab2a 3188+ AuRwMustWriteLock(&dinfo->di_rwsem);
3189+
1facf9fc 3190+ hdp = dinfo->di_hdentry + bindex;
3191+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3192+ au_h_dentry_init(hdp);
3193+ dinfo->di_bend++;
3194+ if (unlikely(bend < 0))
3195+ dinfo->di_bstart = 0;
3196+}
3197+
3198+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
3199+ aufs_bindex_t bend, aufs_bindex_t amount)
3200+{
3201+ struct au_hinode *hip;
3202+
1308ab2a 3203+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3204+
1facf9fc 3205+ hip = iinfo->ii_hinode + bindex;
3206+ memmove(hip + 1, hip, sizeof(*hip) * amount);
3207+ hip->hi_inode = NULL;
4a4d8108 3208+ au_hn_init(hip);
1facf9fc 3209+ iinfo->ii_bend++;
3210+ if (unlikely(bend < 0))
3211+ iinfo->ii_bstart = 0;
3212+}
3213+
86dc4139
AM
3214+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3215+ aufs_bindex_t bindex)
1facf9fc 3216+{
86dc4139 3217+ struct dentry *root, *h_dentry;
5527c038 3218+ struct inode *root_inode, *h_inode;
1facf9fc 3219+ aufs_bindex_t bend, amount;
3220+
3221+ root = sb->s_root;
5527c038 3222+ root_inode = d_inode(root);
1facf9fc 3223+ bend = au_sbend(sb);
3224+ amount = bend + 1 - bindex;
86dc4139 3225+ h_dentry = au_br_dentry(br);
53392da6 3226+ au_sbilist_lock();
1facf9fc 3227+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
3228+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
3229+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
3230+ au_set_h_dptr(root, bindex, dget(h_dentry));
5527c038
JR
3231+ h_inode = d_inode(h_dentry);
3232+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
53392da6 3233+ au_sbilist_unlock();
1facf9fc 3234+}
3235+
3236+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3237+{
3238+ int err;
1facf9fc 3239+ aufs_bindex_t bend, add_bindex;
3240+ struct dentry *root, *h_dentry;
3241+ struct inode *root_inode;
3242+ struct au_branch *add_branch;
3243+
3244+ root = sb->s_root;
5527c038 3245+ root_inode = d_inode(root);
1facf9fc 3246+ IMustLock(root_inode);
3247+ err = test_add(sb, add, remount);
3248+ if (unlikely(err < 0))
3249+ goto out;
3250+ if (err) {
3251+ err = 0;
3252+ goto out; /* success */
3253+ }
3254+
3255+ bend = au_sbend(sb);
3256+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
3257+ err = PTR_ERR(add_branch);
3258+ if (IS_ERR(add_branch))
3259+ goto out;
3260+
3261+ err = au_br_init(add_branch, sb, add);
3262+ if (unlikely(err)) {
3263+ au_br_do_free(add_branch);
3264+ goto out;
3265+ }
3266+
3267+ add_bindex = add->bindex;
1facf9fc 3268+ if (!remount)
86dc4139 3269+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3270+ else {
3271+ sysaufs_brs_del(sb, add_bindex);
86dc4139 3272+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3273+ sysaufs_brs_add(sb, add_bindex);
3274+ }
3275+
86dc4139 3276+ h_dentry = add->path.dentry;
1308ab2a 3277+ if (!add_bindex) {
1facf9fc 3278+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 3279+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3280+ } else
5527c038 3281+ au_add_nlink(root_inode, d_inode(h_dentry));
1facf9fc 3282+
3283+ /*
4a4d8108 3284+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3285+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3286+ * once detached from aufs.
3287+ */
3288+ if (au_xino_brid(sb) < 0
3289+ && au_br_writable(add_branch->br_perm)
3290+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3291+ && add_branch->br_xino.xi_file
2000de60 3292+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
1facf9fc 3293+ au_xino_brid_set(sb, add_branch->br_id);
3294+
4f0767ce 3295+out:
1facf9fc 3296+ return err;
3297+}
3298+
3299+/* ---------------------------------------------------------------------- */
3300+
076b876e
AM
3301+static unsigned long long au_farray_cb(void *a,
3302+ unsigned long long max __maybe_unused,
3303+ void *arg)
3304+{
3305+ unsigned long long n;
3306+ struct file **p, *f;
3307+ struct au_sphlhead *files;
3308+ struct au_finfo *finfo;
3309+ struct super_block *sb = arg;
3310+
3311+ n = 0;
3312+ p = a;
3313+ files = &au_sbi(sb)->si_files;
3314+ spin_lock(&files->spin);
3315+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3316+ f = finfo->fi_file;
3317+ if (file_count(f)
3318+ && !special_file(file_inode(f)->i_mode)) {
3319+ get_file(f);
3320+ *p++ = f;
3321+ n++;
3322+ AuDebugOn(n > max);
3323+ }
3324+ }
3325+ spin_unlock(&files->spin);
3326+
3327+ return n;
3328+}
3329+
3330+static struct file **au_farray_alloc(struct super_block *sb,
3331+ unsigned long long *max)
3332+{
3333+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
3334+ return au_array_alloc(max, au_farray_cb, sb);
3335+}
3336+
3337+static void au_farray_free(struct file **a, unsigned long long max)
3338+{
3339+ unsigned long long ull;
3340+
3341+ for (ull = 0; ull < max; ull++)
3342+ if (a[ull])
3343+ fput(a[ull]);
3344+ au_array_free(a);
3345+}
3346+
3347+/* ---------------------------------------------------------------------- */
3348+
1facf9fc 3349+/*
3350+ * delete a branch
3351+ */
3352+
3353+/* to show the line number, do not make it inlined function */
4a4d8108 3354+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3355+ if (do_info) \
4a4d8108 3356+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3357+} while (0)
3358+
027c5e7a
AM
3359+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
3360+ aufs_bindex_t bend)
3361+{
3362+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
3363+}
3364+
3365+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
3366+ aufs_bindex_t bend)
3367+{
5527c038 3368+ return au_test_ibusy(d_inode(dentry), bstart, bend);
027c5e7a
AM
3369+}
3370+
1facf9fc 3371+/*
3372+ * test if the branch is deletable or not.
3373+ */
3374+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3375+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3376+{
3377+ int err, i, j, ndentry;
3378+ aufs_bindex_t bstart, bend;
1facf9fc 3379+ struct au_dcsub_pages dpages;
3380+ struct au_dpage *dpage;
3381+ struct dentry *d;
1facf9fc 3382+
3383+ err = au_dpages_init(&dpages, GFP_NOFS);
3384+ if (unlikely(err))
3385+ goto out;
3386+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3387+ if (unlikely(err))
3388+ goto out_dpages;
3389+
1facf9fc 3390+ for (i = 0; !err && i < dpages.ndpage; i++) {
3391+ dpage = dpages.dpages + i;
3392+ ndentry = dpage->ndentry;
3393+ for (j = 0; !err && j < ndentry; j++) {
3394+ d = dpage->dentries[j];
c1595e42 3395+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3396+ if (!au_digen_test(d, sigen)) {
1facf9fc 3397+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3398+ if (unlikely(au_dbrange_test(d))) {
3399+ di_read_unlock(d, AuLock_IR);
3400+ continue;
3401+ }
3402+ } else {
1facf9fc 3403+ di_write_lock_child(d);
027c5e7a
AM
3404+ if (unlikely(au_dbrange_test(d))) {
3405+ di_write_unlock(d);
3406+ continue;
3407+ }
1facf9fc 3408+ err = au_reval_dpath(d, sigen);
3409+ if (!err)
3410+ di_downgrade_lock(d, AuLock_IR);
3411+ else {
3412+ di_write_unlock(d);
3413+ break;
3414+ }
3415+ }
3416+
027c5e7a 3417+ /* AuDbgDentry(d); */
1facf9fc 3418+ bstart = au_dbstart(d);
3419+ bend = au_dbend(d);
3420+ if (bstart <= bindex
3421+ && bindex <= bend
3422+ && au_h_dptr(d, bindex)
027c5e7a 3423+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 3424+ err = -EBUSY;
523b37e3 3425+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3426+ AuDbgDentry(d);
1facf9fc 3427+ }
3428+ di_read_unlock(d, AuLock_IR);
3429+ }
3430+ }
3431+
4f0767ce 3432+out_dpages:
1facf9fc 3433+ au_dpages_free(&dpages);
4f0767ce 3434+out:
1facf9fc 3435+ return err;
3436+}
3437+
3438+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3439+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3440+{
3441+ int err;
7f207e10
AM
3442+ unsigned long long max, ull;
3443+ struct inode *i, **array;
1facf9fc 3444+ aufs_bindex_t bstart, bend;
1facf9fc 3445+
7f207e10
AM
3446+ array = au_iarray_alloc(sb, &max);
3447+ err = PTR_ERR(array);
3448+ if (IS_ERR(array))
3449+ goto out;
3450+
1facf9fc 3451+ err = 0;
7f207e10
AM
3452+ AuDbg("b%d\n", bindex);
3453+ for (ull = 0; !err && ull < max; ull++) {
3454+ i = array[ull];
076b876e
AM
3455+ if (unlikely(!i))
3456+ break;
7f207e10 3457+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3458+ continue;
3459+
7f207e10 3460+ /* AuDbgInode(i); */
537831f9 3461+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3462+ ii_read_lock_child(i);
3463+ else {
3464+ ii_write_lock_child(i);
027c5e7a
AM
3465+ err = au_refresh_hinode_self(i);
3466+ au_iigen_dec(i);
1facf9fc 3467+ if (!err)
3468+ ii_downgrade_lock(i);
3469+ else {
3470+ ii_write_unlock(i);
3471+ break;
3472+ }
3473+ }
3474+
3475+ bstart = au_ibstart(i);
3476+ bend = au_ibend(i);
3477+ if (bstart <= bindex
3478+ && bindex <= bend
3479+ && au_h_iptr(i, bindex)
027c5e7a 3480+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 3481+ err = -EBUSY;
3482+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3483+ AuDbgInode(i);
1facf9fc 3484+ }
3485+ ii_read_unlock(i);
3486+ }
7f207e10 3487+ au_iarray_free(array, max);
1facf9fc 3488+
7f207e10 3489+out:
1facf9fc 3490+ return err;
3491+}
3492+
b752ccd1
AM
3493+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3494+ const unsigned int verbose)
1facf9fc 3495+{
3496+ int err;
3497+ unsigned int sigen;
3498+
3499+ sigen = au_sigen(root->d_sb);
3500+ DiMustNoWaiters(root);
5527c038 3501+ IiMustNoWaiters(d_inode(root));
1facf9fc 3502+ di_write_unlock(root);
b752ccd1 3503+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3504+ if (!err)
b752ccd1 3505+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3506+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3507+
3508+ return err;
3509+}
3510+
076b876e
AM
3511+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3512+ struct file **to_free, int *idx)
3513+{
3514+ int err;
c1595e42 3515+ unsigned char matched, root;
076b876e
AM
3516+ aufs_bindex_t bindex, bend;
3517+ struct au_fidir *fidir;
3518+ struct au_hfile *hfile;
3519+
3520+ err = 0;
2000de60 3521+ root = IS_ROOT(file->f_path.dentry);
c1595e42
JR
3522+ if (root) {
3523+ get_file(file);
3524+ to_free[*idx] = file;
3525+ (*idx)++;
3526+ goto out;
3527+ }
3528+
076b876e 3529+ matched = 0;
076b876e
AM
3530+ fidir = au_fi(file)->fi_hdir;
3531+ AuDebugOn(!fidir);
3532+ bend = au_fbend_dir(file);
3533+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) {
3534+ hfile = fidir->fd_hfile + bindex;
3535+ if (!hfile->hf_file)
3536+ continue;
3537+
c1595e42 3538+ if (hfile->hf_br->br_id == br_id) {
076b876e 3539+ matched = 1;
076b876e 3540+ break;
c1595e42 3541+ }
076b876e 3542+ }
c1595e42 3543+ if (matched)
076b876e
AM
3544+ err = -EBUSY;
3545+
3546+out:
3547+ return err;
3548+}
3549+
3550+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3551+ struct file **to_free, int opened)
3552+{
3553+ int err, idx;
3554+ unsigned long long ull, max;
3555+ aufs_bindex_t bstart;
3556+ struct file *file, **array;
076b876e
AM
3557+ struct dentry *root;
3558+ struct au_hfile *hfile;
3559+
3560+ array = au_farray_alloc(sb, &max);
3561+ err = PTR_ERR(array);
3562+ if (IS_ERR(array))
3563+ goto out;
3564+
3565+ err = 0;
3566+ idx = 0;
3567+ root = sb->s_root;
3568+ di_write_unlock(root);
3569+ for (ull = 0; ull < max; ull++) {
3570+ file = array[ull];
3571+ if (unlikely(!file))
3572+ break;
3573+
3574+ /* AuDbg("%pD\n", file); */
3575+ fi_read_lock(file);
3576+ bstart = au_fbstart(file);
2000de60 3577+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3578+ hfile = &au_fi(file)->fi_htop;
3579+ if (hfile->hf_br->br_id == br_id)
3580+ err = -EBUSY;
3581+ } else
3582+ err = test_dir_busy(file, br_id, to_free, &idx);
3583+ fi_read_unlock(file);
3584+ if (unlikely(err))
3585+ break;
3586+ }
3587+ di_write_lock_child(root);
3588+ au_farray_free(array, max);
3589+ AuDebugOn(idx > opened);
3590+
3591+out:
3592+ return err;
3593+}
3594+
3595+static void br_del_file(struct file **to_free, unsigned long long opened,
3596+ aufs_bindex_t br_id)
3597+{
3598+ unsigned long long ull;
3599+ aufs_bindex_t bindex, bstart, bend, bfound;
3600+ struct file *file;
3601+ struct au_fidir *fidir;
3602+ struct au_hfile *hfile;
3603+
3604+ for (ull = 0; ull < opened; ull++) {
3605+ file = to_free[ull];
3606+ if (unlikely(!file))
3607+ break;
3608+
3609+ /* AuDbg("%pD\n", file); */
2000de60 3610+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3611+ bfound = -1;
3612+ fidir = au_fi(file)->fi_hdir;
3613+ AuDebugOn(!fidir);
3614+ fi_write_lock(file);
3615+ bstart = au_fbstart(file);
3616+ bend = au_fbend_dir(file);
3617+ for (bindex = bstart; bindex <= bend; bindex++) {
3618+ hfile = fidir->fd_hfile + bindex;
3619+ if (!hfile->hf_file)
3620+ continue;
3621+
3622+ if (hfile->hf_br->br_id == br_id) {
3623+ bfound = bindex;
3624+ break;
3625+ }
3626+ }
3627+ AuDebugOn(bfound < 0);
3628+ au_set_h_fptr(file, bfound, NULL);
3629+ if (bfound == bstart) {
3630+ for (bstart++; bstart <= bend; bstart++)
3631+ if (au_hf_dir(file, bstart)) {
3632+ au_set_fbstart(file, bstart);
3633+ break;
3634+ }
3635+ }
3636+ fi_write_unlock(file);
3637+ }
3638+}
3639+
1facf9fc 3640+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3641+ const aufs_bindex_t bindex,
3642+ const aufs_bindex_t bend)
3643+{
3644+ struct au_branch **brp, **p;
3645+
dece6358
AM
3646+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3647+
1facf9fc 3648+ brp = sbinfo->si_branch + bindex;
3649+ if (bindex < bend)
3650+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
3651+ sbinfo->si_branch[0 + bend] = NULL;
3652+ sbinfo->si_bend--;
3653+
53392da6 3654+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3655+ if (p)
3656+ sbinfo->si_branch = p;
4a4d8108 3657+ /* harmless error */
1facf9fc 3658+}
3659+
3660+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
3661+ const aufs_bindex_t bend)
3662+{
3663+ struct au_hdentry *hdp, *p;
3664+
1308ab2a 3665+ AuRwMustWriteLock(&dinfo->di_rwsem);
3666+
4a4d8108 3667+ hdp = dinfo->di_hdentry;
1facf9fc 3668+ if (bindex < bend)
4a4d8108
AM
3669+ memmove(hdp + bindex, hdp + bindex + 1,
3670+ sizeof(*hdp) * (bend - bindex));
3671+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 3672+ dinfo->di_bend--;
3673+
53392da6 3674+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3675+ if (p)
3676+ dinfo->di_hdentry = p;
4a4d8108 3677+ /* harmless error */
1facf9fc 3678+}
3679+
3680+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
3681+ const aufs_bindex_t bend)
3682+{
3683+ struct au_hinode *hip, *p;
3684+
1308ab2a 3685+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3686+
1facf9fc 3687+ hip = iinfo->ii_hinode + bindex;
3688+ if (bindex < bend)
3689+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
3690+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 3691+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 3692+ iinfo->ii_bend--;
3693+
53392da6 3694+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3695+ if (p)
3696+ iinfo->ii_hinode = p;
4a4d8108 3697+ /* harmless error */
1facf9fc 3698+}
3699+
3700+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3701+ struct au_branch *br)
3702+{
3703+ aufs_bindex_t bend;
3704+ struct au_sbinfo *sbinfo;
53392da6
AM
3705+ struct dentry *root, *h_root;
3706+ struct inode *inode, *h_inode;
3707+ struct au_hinode *hinode;
1facf9fc 3708+
dece6358
AM
3709+ SiMustWriteLock(sb);
3710+
1facf9fc 3711+ root = sb->s_root;
5527c038 3712+ inode = d_inode(root);
1facf9fc 3713+ sbinfo = au_sbi(sb);
3714+ bend = sbinfo->si_bend;
3715+
53392da6
AM
3716+ h_root = au_h_dptr(root, bindex);
3717+ hinode = au_hi(inode, bindex);
3718+ h_inode = au_igrab(hinode->hi_inode);
3719+ au_hiput(hinode);
1facf9fc 3720+
53392da6 3721+ au_sbilist_lock();
1facf9fc 3722+ au_br_do_del_brp(sbinfo, bindex, bend);
3723+ au_br_do_del_hdp(au_di(root), bindex, bend);
3724+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
3725+ au_sbilist_unlock();
3726+
3727+ dput(h_root);
3728+ iput(h_inode);
3729+ au_br_do_free(br);
1facf9fc 3730+}
3731+
076b876e
AM
3732+static unsigned long long empty_cb(void *array, unsigned long long max,
3733+ void *arg)
3734+{
3735+ return max;
3736+}
3737+
1facf9fc 3738+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3739+{
3740+ int err, rerr, i;
076b876e 3741+ unsigned long long opened;
1facf9fc 3742+ unsigned int mnt_flags;
3743+ aufs_bindex_t bindex, bend, br_id;
3744+ unsigned char do_wh, verbose;
3745+ struct au_branch *br;
3746+ struct au_wbr *wbr;
076b876e
AM
3747+ struct dentry *root;
3748+ struct file **to_free;
1facf9fc 3749+
3750+ err = 0;
076b876e
AM
3751+ opened = 0;
3752+ to_free = NULL;
3753+ root = sb->s_root;
3754+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3755+ if (bindex < 0) {
3756+ if (remount)
3757+ goto out; /* success */
3758+ err = -ENOENT;
4a4d8108 3759+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3760+ goto out;
3761+ }
3762+ AuDbg("bindex b%d\n", bindex);
3763+
3764+ err = -EBUSY;
3765+ mnt_flags = au_mntflags(sb);
3766+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3767+ bend = au_sbend(sb);
3768+ if (unlikely(!bend)) {
3769+ AuVerbose(verbose, "no more branches left\n");
3770+ goto out;
3771+ }
3772+ br = au_sbr(sb, bindex);
86dc4139 3773+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3774+
3775+ br_id = br->br_id;
3776+ opened = atomic_read(&br->br_count);
3777+ if (unlikely(opened)) {
3778+ to_free = au_array_alloc(&opened, empty_cb, NULL);
3779+ err = PTR_ERR(to_free);
3780+ if (IS_ERR(to_free))
3781+ goto out;
3782+
3783+ err = test_file_busy(sb, br_id, to_free, opened);
3784+ if (unlikely(err)) {
3785+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3786+ goto out;
3787+ }
1facf9fc 3788+ }
3789+
3790+ wbr = br->br_wbr;
3791+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3792+ if (do_wh) {
1308ab2a 3793+ /* instead of WbrWhMustWriteLock(wbr) */
3794+ SiMustWriteLock(sb);
1facf9fc 3795+ for (i = 0; i < AuBrWh_Last; i++) {
3796+ dput(wbr->wbr_wh[i]);
3797+ wbr->wbr_wh[i] = NULL;
3798+ }
3799+ }
3800+
076b876e 3801+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3802+ if (unlikely(err)) {
3803+ if (do_wh)
3804+ goto out_wh;
3805+ goto out;
3806+ }
3807+
3808+ err = 0;
076b876e
AM
3809+ if (to_free) {
3810+ /*
3811+ * now we confirmed the branch is deletable.
3812+ * let's free the remaining opened dirs on the branch.
3813+ */
3814+ di_write_unlock(root);
3815+ br_del_file(to_free, opened, br_id);
3816+ di_write_lock_child(root);
3817+ }
3818+
1facf9fc 3819+ if (!remount)
3820+ au_br_do_del(sb, bindex, br);
3821+ else {
3822+ sysaufs_brs_del(sb, bindex);
3823+ au_br_do_del(sb, bindex, br);
3824+ sysaufs_brs_add(sb, bindex);
3825+ }
3826+
1308ab2a 3827+ if (!bindex) {
5527c038 3828+ au_cpup_attr_all(d_inode(root), /*force*/1);
1308ab2a 3829+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3830+ } else
5527c038 3831+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
1facf9fc 3832+ if (au_opt_test(mnt_flags, PLINK))
3833+ au_plink_half_refresh(sb, br_id);
3834+
b752ccd1 3835+ if (au_xino_brid(sb) == br_id)
1facf9fc 3836+ au_xino_brid_set(sb, -1);
3837+ goto out; /* success */
3838+
4f0767ce 3839+out_wh:
1facf9fc 3840+ /* revert */
86dc4139 3841+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3842+ if (rerr)
0c3ec466
AM
3843+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3844+ del->pathname, rerr);
4f0767ce 3845+out:
076b876e
AM
3846+ if (to_free)
3847+ au_farray_free(to_free, opened);
1facf9fc 3848+ return err;
3849+}
3850+
3851+/* ---------------------------------------------------------------------- */
3852+
027c5e7a
AM
3853+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3854+{
3855+ int err;
3856+ aufs_bindex_t bstart, bend;
3857+ struct aufs_ibusy ibusy;
3858+ struct inode *inode, *h_inode;
3859+
3860+ err = -EPERM;
3861+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3862+ goto out;
3863+
3864+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3865+ if (!err)
3866+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3867+ if (unlikely(err)) {
3868+ err = -EFAULT;
3869+ AuTraceErr(err);
3870+ goto out;
3871+ }
3872+
3873+ err = -EINVAL;
3874+ si_read_lock(sb, AuLock_FLUSH);
3875+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
3876+ goto out_unlock;
3877+
3878+ err = 0;
3879+ ibusy.h_ino = 0; /* invalid */
3880+ inode = ilookup(sb, ibusy.ino);
3881+ if (!inode
3882+ || inode->i_ino == AUFS_ROOT_INO
3883+ || is_bad_inode(inode))
3884+ goto out_unlock;
3885+
3886+ ii_read_lock_child(inode);
3887+ bstart = au_ibstart(inode);
3888+ bend = au_ibend(inode);
3889+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
3890+ h_inode = au_h_iptr(inode, ibusy.bindex);
3891+ if (h_inode && au_test_ibusy(inode, bstart, bend))
3892+ ibusy.h_ino = h_inode->i_ino;
3893+ }
3894+ ii_read_unlock(inode);
3895+ iput(inode);
3896+
3897+out_unlock:
3898+ si_read_unlock(sb);
3899+ if (!err) {
3900+ err = __put_user(ibusy.h_ino, &arg->h_ino);
3901+ if (unlikely(err)) {
3902+ err = -EFAULT;
3903+ AuTraceErr(err);
3904+ }
3905+ }
3906+out:
3907+ return err;
3908+}
3909+
3910+long au_ibusy_ioctl(struct file *file, unsigned long arg)
3911+{
2000de60 3912+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
027c5e7a
AM
3913+}
3914+
3915+#ifdef CONFIG_COMPAT
3916+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
3917+{
2000de60 3918+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
027c5e7a
AM
3919+}
3920+#endif
3921+
3922+/* ---------------------------------------------------------------------- */
3923+
1facf9fc 3924+/*
3925+ * change a branch permission
3926+ */
3927+
dece6358
AM
3928+static void au_warn_ima(void)
3929+{
3930+#ifdef CONFIG_IMA
1308ab2a 3931+ /* since it doesn't support mark_files_ro() */
027c5e7a 3932+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
3933+#endif
3934+}
3935+
1facf9fc 3936+static int do_need_sigen_inc(int a, int b)
3937+{
3938+ return au_br_whable(a) && !au_br_whable(b);
3939+}
3940+
3941+static int need_sigen_inc(int old, int new)
3942+{
3943+ return do_need_sigen_inc(old, new)
3944+ || do_need_sigen_inc(new, old);
3945+}
3946+
3947+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
3948+{
7f207e10 3949+ int err, do_warn;
027c5e7a 3950+ unsigned int mnt_flags;
7f207e10 3951+ unsigned long long ull, max;
e49829fe 3952+ aufs_bindex_t br_id;
38d290e6 3953+ unsigned char verbose, writer;
7f207e10 3954+ struct file *file, *hf, **array;
e49829fe 3955+ struct au_hfile *hfile;
1facf9fc 3956+
027c5e7a
AM
3957+ mnt_flags = au_mntflags(sb);
3958+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3959+
7f207e10
AM
3960+ array = au_farray_alloc(sb, &max);
3961+ err = PTR_ERR(array);
3962+ if (IS_ERR(array))
1facf9fc 3963+ goto out;
3964+
7f207e10 3965+ do_warn = 0;
e49829fe 3966+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
3967+ for (ull = 0; ull < max; ull++) {
3968+ file = array[ull];
076b876e
AM
3969+ if (unlikely(!file))
3970+ break;
1facf9fc 3971+
523b37e3 3972+ /* AuDbg("%pD\n", file); */
1facf9fc 3973+ fi_read_lock(file);
3974+ if (unlikely(au_test_mmapped(file))) {
3975+ err = -EBUSY;
523b37e3 3976+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 3977+ AuDbgFile(file);
1facf9fc 3978+ FiMustNoWaiters(file);
3979+ fi_read_unlock(file);
7f207e10 3980+ goto out_array;
1facf9fc 3981+ }
3982+
e49829fe
JR
3983+ hfile = &au_fi(file)->fi_htop;
3984+ hf = hfile->hf_file;
7e9cd9fe 3985+ if (!d_is_reg(file->f_path.dentry)
1facf9fc 3986+ || !(file->f_mode & FMODE_WRITE)
e49829fe 3987+ || hfile->hf_br->br_id != br_id
7f207e10
AM
3988+ || !(hf->f_mode & FMODE_WRITE))
3989+ array[ull] = NULL;
3990+ else {
3991+ do_warn = 1;
3992+ get_file(file);
1facf9fc 3993+ }
3994+
1facf9fc 3995+ FiMustNoWaiters(file);
3996+ fi_read_unlock(file);
7f207e10
AM
3997+ fput(file);
3998+ }
1facf9fc 3999+
4000+ err = 0;
7f207e10 4001+ if (do_warn)
dece6358 4002+ au_warn_ima();
7f207e10
AM
4003+
4004+ for (ull = 0; ull < max; ull++) {
4005+ file = array[ull];
4006+ if (!file)
4007+ continue;
4008+
1facf9fc 4009+ /* todo: already flushed? */
523b37e3
AM
4010+ /*
4011+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4012+ * approach which resets f_mode and calls mnt_drop_write() and
4013+ * file_release_write() for each file, because the branch
4014+ * attribute in aufs world is totally different from the native
4015+ * fs rw/ro mode.
4016+ */
7f207e10
AM
4017+ /* fi_read_lock(file); */
4018+ hfile = &au_fi(file)->fi_htop;
4019+ hf = hfile->hf_file;
4020+ /* fi_read_unlock(file); */
027c5e7a 4021+ spin_lock(&hf->f_lock);
38d290e6
JR
4022+ writer = !!(hf->f_mode & FMODE_WRITER);
4023+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 4024+ spin_unlock(&hf->f_lock);
38d290e6
JR
4025+ if (writer) {
4026+ put_write_access(file_inode(hf));
c06a8ce3 4027+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 4028+ }
4029+ }
4030+
7f207e10
AM
4031+out_array:
4032+ au_farray_free(array, max);
4f0767ce 4033+out:
7f207e10 4034+ AuTraceErr(err);
1facf9fc 4035+ return err;
4036+}
4037+
4038+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4039+ int *do_refresh)
1facf9fc 4040+{
4041+ int err, rerr;
4042+ aufs_bindex_t bindex;
4043+ struct dentry *root;
4044+ struct au_branch *br;
076b876e 4045+ struct au_br_fhsm *bf;
1facf9fc 4046+
4047+ root = sb->s_root;
1facf9fc 4048+ bindex = au_find_dbindex(root, mod->h_root);
4049+ if (bindex < 0) {
4050+ if (remount)
4051+ return 0; /* success */
4052+ err = -ENOENT;
4a4d8108 4053+ pr_err("%s no such branch\n", mod->path);
1facf9fc 4054+ goto out;
4055+ }
4056+ AuDbg("bindex b%d\n", bindex);
4057+
5527c038 4058+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
1facf9fc 4059+ if (unlikely(err))
4060+ goto out;
4061+
4062+ br = au_sbr(sb, bindex);
86dc4139 4063+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 4064+ if (br->br_perm == mod->perm)
4065+ return 0; /* success */
4066+
076b876e
AM
4067+ /* pre-allocate for non-fhsm --> fhsm */
4068+ bf = NULL;
4069+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4070+ err = au_fhsm_br_alloc(br);
4071+ if (unlikely(err))
4072+ goto out;
4073+ bf = br->br_fhsm;
4074+ br->br_fhsm = NULL;
4075+ }
4076+
1facf9fc 4077+ if (au_br_writable(br->br_perm)) {
4078+ /* remove whiteout base */
86dc4139 4079+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 4080+ if (unlikely(err))
076b876e 4081+ goto out_bf;
1facf9fc 4082+
4083+ if (!au_br_writable(mod->perm)) {
4084+ /* rw --> ro, file might be mmapped */
4085+ DiMustNoWaiters(root);
5527c038 4086+ IiMustNoWaiters(d_inode(root));
1facf9fc 4087+ di_write_unlock(root);
4088+ err = au_br_mod_files_ro(sb, bindex);
4089+ /* aufs_write_lock() calls ..._child() */
4090+ di_write_lock_child(root);
4091+
4092+ if (unlikely(err)) {
4093+ rerr = -ENOMEM;
4094+ br->br_wbr = kmalloc(sizeof(*br->br_wbr),
4095+ GFP_NOFS);
86dc4139
AM
4096+ if (br->br_wbr)
4097+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 4098+ if (unlikely(rerr)) {
4099+ AuIOErr("nested error %d (%d)\n",
4100+ rerr, err);
4101+ br->br_perm = mod->perm;
4102+ }
4103+ }
4104+ }
4105+ } else if (au_br_writable(mod->perm)) {
4106+ /* ro --> rw */
4107+ err = -ENOMEM;
4108+ br->br_wbr = kmalloc(sizeof(*br->br_wbr), GFP_NOFS);
4109+ if (br->br_wbr) {
86dc4139 4110+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 4111+ if (unlikely(err)) {
4112+ kfree(br->br_wbr);
4113+ br->br_wbr = NULL;
4114+ }
4115+ }
4116+ }
076b876e
AM
4117+ if (unlikely(err))
4118+ goto out_bf;
4119+
4120+ if (au_br_fhsm(br->br_perm)) {
4121+ if (!au_br_fhsm(mod->perm)) {
4122+ /* fhsm --> non-fhsm */
4123+ au_br_fhsm_fin(br->br_fhsm);
4124+ kfree(br->br_fhsm);
4125+ br->br_fhsm = NULL;
4126+ }
4127+ } else if (au_br_fhsm(mod->perm))
4128+ /* non-fhsm --> fhsm */
4129+ br->br_fhsm = bf;
4130+
076b876e
AM
4131+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4132+ br->br_perm = mod->perm;
4133+ goto out; /* success */
1facf9fc 4134+
076b876e
AM
4135+out_bf:
4136+ kfree(bf);
4137+out:
4138+ AuTraceErr(err);
4139+ return err;
4140+}
4141+
4142+/* ---------------------------------------------------------------------- */
4143+
4144+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4145+{
4146+ int err;
4147+ struct kstatfs kstfs;
4148+
4149+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 4150+ if (!err) {
076b876e
AM
4151+ stfs->f_blocks = kstfs.f_blocks;
4152+ stfs->f_bavail = kstfs.f_bavail;
4153+ stfs->f_files = kstfs.f_files;
4154+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 4155+ }
4156+
1facf9fc 4157+ return err;
4158+}
7f207e10
AM
4159diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4160--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 4161+++ linux/fs/aufs/branch.h 2015-12-10 18:46:31.223310574 +0100
b912730e 4162@@ -0,0 +1,279 @@
1facf9fc 4163+/*
2000de60 4164+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 4165+ *
4166+ * This program, aufs is free software; you can redistribute it and/or modify
4167+ * it under the terms of the GNU General Public License as published by
4168+ * the Free Software Foundation; either version 2 of the License, or
4169+ * (at your option) any later version.
dece6358
AM
4170+ *
4171+ * This program is distributed in the hope that it will be useful,
4172+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4173+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4174+ * GNU General Public License for more details.
4175+ *
4176+ * You should have received a copy of the GNU General Public License
523b37e3 4177+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4178+ */
4179+
4180+/*
4181+ * branch filesystems and xino for them
4182+ */
4183+
4184+#ifndef __AUFS_BRANCH_H__
4185+#define __AUFS_BRANCH_H__
4186+
4187+#ifdef __KERNEL__
4188+
1facf9fc 4189+#include <linux/mount.h>
4a4d8108 4190+#include "dynop.h"
1facf9fc 4191+#include "rwsem.h"
4192+#include "super.h"
4193+
4194+/* ---------------------------------------------------------------------- */
4195+
4196+/* a xino file */
4197+struct au_xino_file {
4198+ struct file *xi_file;
4199+ struct mutex xi_nondir_mtx;
4200+
4201+ /* todo: make xino files an array to support huge inode number */
4202+
4203+#ifdef CONFIG_DEBUG_FS
4204+ struct dentry *xi_dbgaufs;
4205+#endif
4206+};
4207+
076b876e
AM
4208+/* File-based Hierarchical Storage Management */
4209+struct au_br_fhsm {
4210+#ifdef CONFIG_AUFS_FHSM
4211+ struct mutex bf_lock;
4212+ unsigned long bf_jiffy;
4213+ struct aufs_stfs bf_stfs;
4214+ int bf_readable;
4215+#endif
4216+};
4217+
1facf9fc 4218+/* members for writable branch only */
4219+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4220+struct au_wbr {
dece6358 4221+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 4222+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 4223+ atomic_t wbr_wh_running;
1facf9fc 4224+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4225+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4226+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4227+
4228+ /* mfs mode */
4229+ unsigned long long wbr_bytes;
4230+};
4231+
4a4d8108
AM
4232+/* ext2 has 3 types of operations at least, ext3 has 4 */
4233+#define AuBrDynOp (AuDyLast * 4)
4234+
1716fcea
AM
4235+#ifdef CONFIG_AUFS_HFSNOTIFY
4236+/* support for asynchronous destruction */
4237+struct au_br_hfsnotify {
4238+ struct fsnotify_group *hfsn_group;
4239+};
4240+#endif
4241+
392086de
AM
4242+/* sysfs entries */
4243+struct au_brsysfs {
4244+ char name[16];
4245+ struct attribute attr;
4246+};
4247+
4248+enum {
4249+ AuBrSysfs_BR,
4250+ AuBrSysfs_BRID,
4251+ AuBrSysfs_Last
4252+};
4253+
1facf9fc 4254+/* protected by superblock rwsem */
4255+struct au_branch {
4256+ struct au_xino_file br_xino;
4257+
4258+ aufs_bindex_t br_id;
4259+
4260+ int br_perm;
86dc4139 4261+ struct path br_path;
4a4d8108
AM
4262+ spinlock_t br_dykey_lock;
4263+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 4264+ atomic_t br_count;
4265+
4266+ struct au_wbr *br_wbr;
076b876e 4267+ struct au_br_fhsm *br_fhsm;
1facf9fc 4268+
4269+ /* xino truncation */
1facf9fc 4270+ atomic_t br_xino_running;
4271+
027c5e7a 4272+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4273+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4274+#endif
4275+
1facf9fc 4276+#ifdef CONFIG_SYSFS
392086de
AM
4277+ /* entries under sysfs per mount-point */
4278+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4279+#endif
4280+};
4281+
4282+/* ---------------------------------------------------------------------- */
4283+
86dc4139
AM
4284+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4285+{
4286+ return br->br_path.mnt;
4287+}
4288+
4289+static inline struct dentry *au_br_dentry(struct au_branch *br)
4290+{
4291+ return br->br_path.dentry;
4292+}
4293+
4294+static inline struct super_block *au_br_sb(struct au_branch *br)
4295+{
4296+ return au_br_mnt(br)->mnt_sb;
4297+}
4298+
1facf9fc 4299+static inline int au_br_rdonly(struct au_branch *br)
4300+{
86dc4139 4301+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4302+ || !au_br_writable(br->br_perm))
4303+ ? -EROFS : 0;
4304+}
4305+
4a4d8108 4306+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4307+{
4a4d8108 4308+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4309+ return !(brperm & AuBrPerm_RR);
1facf9fc 4310+#else
4311+ return 0;
4312+#endif
4313+}
4314+
b912730e
AM
4315+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4316+{
4317+ int err, exec_flag;
4318+
4319+ err = 0;
4320+ exec_flag = oflag & __FMODE_EXEC;
4321+ if (unlikely(exec_flag && (au_br_mnt(br)->mnt_flags & MNT_NOEXEC)))
4322+ err = -EACCES;
4323+
4324+ return err;
4325+}
4326+
1facf9fc 4327+/* ---------------------------------------------------------------------- */
4328+
4329+/* branch.c */
4330+struct au_sbinfo;
4331+void au_br_free(struct au_sbinfo *sinfo);
4332+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4333+struct au_opt_add;
4334+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4335+struct au_opt_del;
4336+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4337+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4338+#ifdef CONFIG_COMPAT
4339+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4340+#endif
1facf9fc 4341+struct au_opt_mod;
4342+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4343+ int *do_refresh);
076b876e
AM
4344+struct aufs_stfs;
4345+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4346+
4347+/* xino.c */
4348+static const loff_t au_loff_max = LLONG_MAX;
4349+
4350+int au_xib_trunc(struct super_block *sb);
5527c038 4351+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
1facf9fc 4352+ loff_t *pos);
5527c038
JR
4353+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4354+ size_t size, loff_t *pos);
1facf9fc 4355+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4356+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4357+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4358+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4359+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4360+ ino_t ino);
4361+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4362+ ino_t *ino);
4363+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4364+ struct file *base_file, int do_test);
4365+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4366+
4367+struct au_opt_xino;
4368+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4369+void au_xino_clr(struct super_block *sb);
4370+struct file *au_xino_def(struct super_block *sb);
4371+int au_xino_path(struct seq_file *seq, struct file *file);
4372+
4373+/* ---------------------------------------------------------------------- */
4374+
4375+/* Superblock to branch */
4376+static inline
4377+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4378+{
4379+ return au_sbr(sb, bindex)->br_id;
4380+}
4381+
4382+static inline
4383+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4384+{
86dc4139 4385+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4386+}
4387+
4388+static inline
4389+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4390+{
86dc4139 4391+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4392+}
4393+
4394+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4395+{
e49829fe 4396+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 4397+}
4398+
4399+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4400+{
4401+ return au_sbr(sb, bindex)->br_perm;
4402+}
4403+
4404+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4405+{
4406+ return au_br_whable(au_sbr_perm(sb, bindex));
4407+}
4408+
4409+/* ---------------------------------------------------------------------- */
4410+
4411+/*
4412+ * wbr_wh_read_lock, wbr_wh_write_lock
4413+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4414+ */
4415+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4416+
dece6358
AM
4417+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4418+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4419+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4420+
076b876e
AM
4421+/* ---------------------------------------------------------------------- */
4422+
4423+#ifdef CONFIG_AUFS_FHSM
4424+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4425+{
4426+ mutex_init(&brfhsm->bf_lock);
4427+ brfhsm->bf_jiffy = 0;
4428+ brfhsm->bf_readable = 0;
4429+}
4430+
4431+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4432+{
4433+ mutex_destroy(&brfhsm->bf_lock);
4434+}
4435+#else
4436+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4437+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4438+#endif
4439+
1facf9fc 4440+#endif /* __KERNEL__ */
4441+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
4442diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4443--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
ab036dbd 4444+++ linux/fs/aufs/conf.mk 2015-09-24 10:47:58.248052907 +0200
c1595e42 4445@@ -0,0 +1,38 @@
4a4d8108
AM
4446+
4447+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4448+
4449+define AuConf
4450+ifdef ${1}
4451+AuConfStr += ${1}=${${1}}
4452+endif
4453+endef
4454+
b752ccd1 4455+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4456+ SBILIST \
7f207e10 4457+ HNOTIFY HFSNOTIFY \
4a4d8108 4458+ EXPORT INO_T_64 \
c1595e42 4459+ XATTR \
076b876e 4460+ FHSM \
4a4d8108 4461+ RDU \
4a4d8108
AM
4462+ SHWH \
4463+ BR_RAMFS \
4464+ BR_FUSE POLL \
4465+ BR_HFSPLUS \
4466+ BDEV_LOOP \
b752ccd1
AM
4467+ DEBUG MAGIC_SYSRQ
4468+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4469+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4470+
4471+AuConfName = ${obj}/conf.str
4472+${AuConfName}.tmp: FORCE
4473+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4474+${AuConfName}: ${AuConfName}.tmp
4475+ @diff -q $< $@ > /dev/null 2>&1 || { \
4476+ echo ' GEN ' $@; \
4477+ cp -p $< $@; \
4478+ }
4479+FORCE:
4480+clean-files += ${AuConfName} ${AuConfName}.tmp
4481+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4482+
4483+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
4484diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4485--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 4486+++ linux/fs/aufs/cpup.c 2015-11-11 17:21:46.915530388 +0100
5527c038 4487@@ -0,0 +1,1319 @@
1facf9fc 4488+/*
2000de60 4489+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 4490+ *
4491+ * This program, aufs is free software; you can redistribute it and/or modify
4492+ * it under the terms of the GNU General Public License as published by
4493+ * the Free Software Foundation; either version 2 of the License, or
4494+ * (at your option) any later version.
dece6358
AM
4495+ *
4496+ * This program is distributed in the hope that it will be useful,
4497+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4498+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4499+ * GNU General Public License for more details.
4500+ *
4501+ * You should have received a copy of the GNU General Public License
523b37e3 4502+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4503+ */
4504+
4505+/*
4506+ * copy-up functions, see wbr_policy.c for copy-down
4507+ */
4508+
4509+#include <linux/fs_stack.h>
dece6358 4510+#include <linux/mm.h>
1facf9fc 4511+#include "aufs.h"
4512+
86dc4139 4513+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4514+{
4515+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4516+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4517+
86dc4139
AM
4518+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4519+
4520+ dst->i_flags |= iflags & ~mask;
1facf9fc 4521+ if (au_test_fs_notime(dst->i_sb))
4522+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4523+}
4524+
4525+void au_cpup_attr_timesizes(struct inode *inode)
4526+{
4527+ struct inode *h_inode;
4528+
4529+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4530+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4531+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4532+}
4533+
4534+void au_cpup_attr_nlink(struct inode *inode, int force)
4535+{
4536+ struct inode *h_inode;
4537+ struct super_block *sb;
4538+ aufs_bindex_t bindex, bend;
4539+
4540+ sb = inode->i_sb;
4541+ bindex = au_ibstart(inode);
4542+ h_inode = au_h_iptr(inode, bindex);
4543+ if (!force
4544+ && !S_ISDIR(h_inode->i_mode)
4545+ && au_opt_test(au_mntflags(sb), PLINK)
4546+ && au_plink_test(inode))
4547+ return;
4548+
7eafdf33
AM
4549+ /*
4550+ * 0 can happen in revalidating.
38d290e6
JR
4551+ * h_inode->i_mutex may not be held here, but it is harmless since once
4552+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4553+ * case.
4554+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4555+ * the incorrect link count.
7eafdf33 4556+ */
92d182d2 4557+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4558+
4559+ /*
4560+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4561+ * it may includes whplink directory.
4562+ */
4563+ if (S_ISDIR(h_inode->i_mode)) {
4564+ bend = au_ibend(inode);
4565+ for (bindex++; bindex <= bend; bindex++) {
4566+ h_inode = au_h_iptr(inode, bindex);
4567+ if (h_inode)
4568+ au_add_nlink(inode, h_inode);
4569+ }
4570+ }
4571+}
4572+
4573+void au_cpup_attr_changeable(struct inode *inode)
4574+{
4575+ struct inode *h_inode;
4576+
4577+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4578+ inode->i_mode = h_inode->i_mode;
4579+ inode->i_uid = h_inode->i_uid;
4580+ inode->i_gid = h_inode->i_gid;
4581+ au_cpup_attr_timesizes(inode);
86dc4139 4582+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4583+}
4584+
4585+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4586+{
4587+ struct au_iinfo *iinfo = au_ii(inode);
4588+
1308ab2a 4589+ IiMustWriteLock(inode);
4590+
1facf9fc 4591+ iinfo->ii_higen = h_inode->i_generation;
4592+ iinfo->ii_hsb1 = h_inode->i_sb;
4593+}
4594+
4595+void au_cpup_attr_all(struct inode *inode, int force)
4596+{
4597+ struct inode *h_inode;
4598+
4599+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4600+ au_cpup_attr_changeable(inode);
4601+ if (inode->i_nlink > 0)
4602+ au_cpup_attr_nlink(inode, force);
4603+ inode->i_rdev = h_inode->i_rdev;
4604+ inode->i_blkbits = h_inode->i_blkbits;
4605+ au_cpup_igen(inode, h_inode);
4606+}
4607+
4608+/* ---------------------------------------------------------------------- */
4609+
4610+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4611+
4612+/* keep the timestamps of the parent dir when cpup */
4613+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4614+ struct path *h_path)
4615+{
4616+ struct inode *h_inode;
4617+
4618+ dt->dt_dentry = dentry;
4619+ dt->dt_h_path = *h_path;
5527c038 4620+ h_inode = d_inode(h_path->dentry);
1facf9fc 4621+ dt->dt_atime = h_inode->i_atime;
4622+ dt->dt_mtime = h_inode->i_mtime;
4623+ /* smp_mb(); */
4624+}
4625+
4626+void au_dtime_revert(struct au_dtime *dt)
4627+{
4628+ struct iattr attr;
4629+ int err;
4630+
4631+ attr.ia_atime = dt->dt_atime;
4632+ attr.ia_mtime = dt->dt_mtime;
4633+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4634+ | ATTR_ATIME | ATTR_ATIME_SET;
4635+
523b37e3
AM
4636+ /* no delegation since this is a directory */
4637+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4638+ if (unlikely(err))
0c3ec466 4639+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4640+}
4641+
4642+/* ---------------------------------------------------------------------- */
4643+
86dc4139
AM
4644+/* internal use only */
4645+struct au_cpup_reg_attr {
4646+ int valid;
4647+ struct kstat st;
4648+ unsigned int iflags; /* inode->i_flags */
4649+};
4650+
1facf9fc 4651+static noinline_for_stack
86dc4139
AM
4652+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4653+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4654+{
c1595e42 4655+ int err, sbits, icex;
7e9cd9fe
AM
4656+ unsigned int mnt_flags;
4657+ unsigned char verbose;
1facf9fc 4658+ struct iattr ia;
4659+ struct path h_path;
1308ab2a 4660+ struct inode *h_isrc, *h_idst;
86dc4139 4661+ struct kstat *h_st;
c1595e42 4662+ struct au_branch *br;
1facf9fc 4663+
4664+ h_path.dentry = au_h_dptr(dst, bindex);
5527c038 4665+ h_idst = d_inode(h_path.dentry);
c1595e42
JR
4666+ br = au_sbr(dst->d_sb, bindex);
4667+ h_path.mnt = au_br_mnt(br);
5527c038 4668+ h_isrc = d_inode(h_src);
1308ab2a 4669+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4670+ | ATTR_ATIME | ATTR_MTIME
4671+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4672+ if (h_src_attr && h_src_attr->valid) {
4673+ h_st = &h_src_attr->st;
4674+ ia.ia_uid = h_st->uid;
4675+ ia.ia_gid = h_st->gid;
4676+ ia.ia_atime = h_st->atime;
4677+ ia.ia_mtime = h_st->mtime;
4678+ if (h_idst->i_mode != h_st->mode
4679+ && !S_ISLNK(h_idst->i_mode)) {
4680+ ia.ia_valid |= ATTR_MODE;
4681+ ia.ia_mode = h_st->mode;
4682+ }
4683+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4684+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4685+ } else {
4686+ ia.ia_uid = h_isrc->i_uid;
4687+ ia.ia_gid = h_isrc->i_gid;
4688+ ia.ia_atime = h_isrc->i_atime;
4689+ ia.ia_mtime = h_isrc->i_mtime;
4690+ if (h_idst->i_mode != h_isrc->i_mode
4691+ && !S_ISLNK(h_idst->i_mode)) {
4692+ ia.ia_valid |= ATTR_MODE;
4693+ ia.ia_mode = h_isrc->i_mode;
4694+ }
4695+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4696+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4697+ }
523b37e3
AM
4698+ /* no delegation since it is just created */
4699+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4700+
4701+ /* is this nfs only? */
4702+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4703+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4704+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4705+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4706+ }
4707+
c1595e42 4708+ icex = br->br_perm & AuBrAttr_ICEX;
7e9cd9fe
AM
4709+ if (!err) {
4710+ mnt_flags = au_mntflags(dst->d_sb);
4711+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4712+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4713+ }
c1595e42 4714+
1facf9fc 4715+ return err;
4716+}
4717+
4718+/* ---------------------------------------------------------------------- */
4719+
4720+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4721+ char *buf, unsigned long blksize)
4722+{
4723+ int err;
4724+ size_t sz, rbytes, wbytes;
4725+ unsigned char all_zero;
4726+ char *p, *zp;
4727+ struct mutex *h_mtx;
4728+ /* reduce stack usage */
4729+ struct iattr *ia;
4730+
4731+ zp = page_address(ZERO_PAGE(0));
4732+ if (unlikely(!zp))
4733+ return -ENOMEM; /* possible? */
4734+
4735+ err = 0;
4736+ all_zero = 0;
4737+ while (len) {
4738+ AuDbg("len %lld\n", len);
4739+ sz = blksize;
4740+ if (len < blksize)
4741+ sz = len;
4742+
4743+ rbytes = 0;
4744+ /* todo: signal_pending? */
4745+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4746+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4747+ err = rbytes;
4748+ }
4749+ if (unlikely(err < 0))
4750+ break;
4751+
4752+ all_zero = 0;
4753+ if (len >= rbytes && rbytes == blksize)
4754+ all_zero = !memcmp(buf, zp, rbytes);
4755+ if (!all_zero) {
4756+ wbytes = rbytes;
4757+ p = buf;
4758+ while (wbytes) {
4759+ size_t b;
4760+
4761+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4762+ err = b;
4763+ /* todo: signal_pending? */
4764+ if (unlikely(err == -EAGAIN || err == -EINTR))
4765+ continue;
4766+ if (unlikely(err < 0))
4767+ break;
4768+ wbytes -= b;
4769+ p += b;
4770+ }
392086de
AM
4771+ if (unlikely(err < 0))
4772+ break;
1facf9fc 4773+ } else {
4774+ loff_t res;
4775+
4776+ AuLabel(hole);
4777+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4778+ err = res;
4779+ if (unlikely(res < 0))
4780+ break;
4781+ }
4782+ len -= rbytes;
4783+ err = 0;
4784+ }
4785+
4786+ /* the last block may be a hole */
4787+ if (!err && all_zero) {
4788+ AuLabel(last hole);
4789+
4790+ err = 1;
2000de60 4791+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
1facf9fc 4792+ /* nfs requires this step to make last hole */
4793+ /* is this only nfs? */
4794+ do {
4795+ /* todo: signal_pending? */
4796+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4797+ } while (err == -EAGAIN || err == -EINTR);
4798+ if (err == 1)
4799+ dst->f_pos--;
4800+ }
4801+
4802+ if (err == 1) {
4803+ ia = (void *)buf;
4804+ ia->ia_size = dst->f_pos;
4805+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4806+ ia->ia_file = dst;
c06a8ce3 4807+ h_mtx = &file_inode(dst)->i_mutex;
1facf9fc 4808+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
523b37e3
AM
4809+ /* no delegation since it is just created */
4810+ err = vfsub_notify_change(&dst->f_path, ia,
4811+ /*delegated*/NULL);
1facf9fc 4812+ mutex_unlock(h_mtx);
4813+ }
4814+ }
4815+
4816+ return err;
4817+}
4818+
4819+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4820+{
4821+ int err;
4822+ unsigned long blksize;
4823+ unsigned char do_kfree;
4824+ char *buf;
4825+
4826+ err = -ENOMEM;
2000de60 4827+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
1facf9fc 4828+ if (!blksize || PAGE_SIZE < blksize)
4829+ blksize = PAGE_SIZE;
4830+ AuDbg("blksize %lu\n", blksize);
4831+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4832+ if (do_kfree)
4833+ buf = kmalloc(blksize, GFP_NOFS);
4834+ else
4835+ buf = (void *)__get_free_page(GFP_NOFS);
4836+ if (unlikely(!buf))
4837+ goto out;
4838+
4839+ if (len > (1 << 22))
4840+ AuDbg("copying a large file %lld\n", (long long)len);
4841+
4842+ src->f_pos = 0;
4843+ dst->f_pos = 0;
4844+ err = au_do_copy_file(dst, src, len, buf, blksize);
4845+ if (do_kfree)
4846+ kfree(buf);
4847+ else
4848+ free_page((unsigned long)buf);
4849+
4f0767ce 4850+out:
1facf9fc 4851+ return err;
4852+}
4853+
4854+/*
4855+ * to support a sparse file which is opened with O_APPEND,
4856+ * we need to close the file.
4857+ */
c2b27bf2 4858+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 4859+{
4860+ int err, i;
4861+ enum { SRC, DST };
4862+ struct {
4863+ aufs_bindex_t bindex;
4864+ unsigned int flags;
4865+ struct dentry *dentry;
392086de 4866+ int force_wr;
1facf9fc 4867+ struct file *file;
523b37e3 4868+ void *label;
1facf9fc 4869+ } *f, file[] = {
4870+ {
c2b27bf2 4871+ .bindex = cpg->bsrc,
1facf9fc 4872+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 4873+ .label = &&out
1facf9fc 4874+ },
4875+ {
c2b27bf2 4876+ .bindex = cpg->bdst,
1facf9fc 4877+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 4878+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 4879+ .label = &&out_src
1facf9fc 4880+ }
4881+ };
4882+ struct super_block *sb;
4883+
4884+ /* bsrc branch can be ro/rw. */
c2b27bf2 4885+ sb = cpg->dentry->d_sb;
1facf9fc 4886+ f = file;
4887+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
4888+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
4889+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 4890+ /*file*/NULL, f->force_wr);
1facf9fc 4891+ err = PTR_ERR(f->file);
4892+ if (IS_ERR(f->file))
4893+ goto *f->label;
1facf9fc 4894+ }
4895+
4896+ /* try stopping to update while we copyup */
5527c038 4897+ IMustLock(d_inode(file[SRC].dentry));
c2b27bf2 4898+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
1facf9fc 4899+
1facf9fc 4900+ fput(file[DST].file);
4901+ au_sbr_put(sb, file[DST].bindex);
523b37e3 4902+
4f0767ce 4903+out_src:
1facf9fc 4904+ fput(file[SRC].file);
4905+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 4906+out:
1facf9fc 4907+ return err;
4908+}
4909+
c2b27bf2 4910+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 4911+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4912+{
4913+ int err, rerr;
4914+ loff_t l;
86dc4139 4915+ struct path h_path;
38d290e6 4916+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 4917+
4918+ err = 0;
5527c038 4919+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
86dc4139 4920+ l = i_size_read(h_src_inode);
c2b27bf2
AM
4921+ if (cpg->len == -1 || l < cpg->len)
4922+ cpg->len = l;
4923+ if (cpg->len) {
86dc4139
AM
4924+ /* try stopping to update while we are referencing */
4925+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2 4926+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 4927+
c2b27bf2
AM
4928+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
4929+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139 4930+ h_src_attr->iflags = h_src_inode->i_flags;
5527c038
JR
4931+ if (!au_test_nfs(h_src_inode->i_sb))
4932+ err = vfs_getattr(&h_path, &h_src_attr->st);
4933+ else {
4934+ mutex_unlock(&h_src_inode->i_mutex);
4935+ err = vfs_getattr(&h_path, &h_src_attr->st);
4936+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
4937+ }
86dc4139
AM
4938+ if (unlikely(err)) {
4939+ mutex_unlock(&h_src_inode->i_mutex);
4940+ goto out;
4941+ }
4942+ h_src_attr->valid = 1;
c2b27bf2 4943+ err = au_cp_regular(cpg);
86dc4139 4944+ mutex_unlock(&h_src_inode->i_mutex);
c2b27bf2 4945+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
4946+ if (!err && rerr)
4947+ err = rerr;
1facf9fc 4948+ }
38d290e6
JR
4949+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
4950+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5527c038 4951+ h_dst_inode = d_inode(h_path.dentry);
38d290e6
JR
4952+ spin_lock(&h_dst_inode->i_lock);
4953+ h_dst_inode->i_state |= I_LINKABLE;
4954+ spin_unlock(&h_dst_inode->i_lock);
4955+ }
1facf9fc 4956+
4f0767ce 4957+out:
1facf9fc 4958+ return err;
4959+}
4960+
4961+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
4962+ struct inode *h_dir)
4963+{
4964+ int err, symlen;
4965+ mm_segment_t old_fs;
b752ccd1
AM
4966+ union {
4967+ char *k;
4968+ char __user *u;
4969+ } sym;
5527c038
JR
4970+ struct inode *h_inode = d_inode(h_src);
4971+ const struct inode_operations *h_iop = h_inode->i_op;
1facf9fc 4972+
4973+ err = -ENOSYS;
5527c038 4974+ if (unlikely(!h_iop->readlink))
1facf9fc 4975+ goto out;
4976+
4977+ err = -ENOMEM;
537831f9 4978+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 4979+ if (unlikely(!sym.k))
1facf9fc 4980+ goto out;
4981+
9dbd164d 4982+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 4983+ old_fs = get_fs();
4984+ set_fs(KERNEL_DS);
5527c038 4985+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 4986+ err = symlen;
4987+ set_fs(old_fs);
4988+
4989+ if (symlen > 0) {
b752ccd1
AM
4990+ sym.k[symlen] = 0;
4991+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 4992+ }
537831f9 4993+ free_page((unsigned long)sym.k);
1facf9fc 4994+
4f0767ce 4995+out:
1facf9fc 4996+ return err;
4997+}
4998+
1facf9fc 4999+static noinline_for_stack
c2b27bf2 5000+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 5001+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5002+{
5003+ int err;
5004+ umode_t mode;
5005+ unsigned int mnt_flags;
076b876e 5006+ unsigned char isdir, isreg, force;
c2b27bf2 5007+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5008+ struct au_dtime dt;
5009+ struct path h_path;
5010+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5011+ struct inode *h_inode, *h_dir, *dir, *inode;
1facf9fc 5012+ struct super_block *sb;
5013+
5014+ /* bsrc branch can be ro/rw. */
c2b27bf2 5015+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038
JR
5016+ h_inode = d_inode(h_src);
5017+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
1facf9fc 5018+
5019+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
5020+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5021+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
5022+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5023+ AUFS_WH_PFX_LEN));
1facf9fc 5024+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5025+ h_dir = d_inode(h_parent);
1facf9fc 5026+ IMustLock(h_dir);
5027+ AuDebugOn(h_parent != h_dst->d_parent);
5028+
c2b27bf2
AM
5029+ sb = cpg->dentry->d_sb;
5030+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 5031+ if (do_dt) {
5032+ h_path.dentry = h_parent;
5033+ au_dtime_store(&dt, dst_parent, &h_path);
5034+ }
5035+ h_path.dentry = h_dst;
5036+
076b876e 5037+ isreg = 0;
1facf9fc 5038+ isdir = 0;
5039+ mode = h_inode->i_mode;
5040+ switch (mode & S_IFMT) {
5041+ case S_IFREG:
076b876e 5042+ isreg = 1;
b4510431
AM
5043+ err = vfsub_create(h_dir, &h_path, mode | S_IWUSR,
5044+ /*want_excl*/true);
1facf9fc 5045+ if (!err)
c2b27bf2 5046+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 5047+ break;
5048+ case S_IFDIR:
5049+ isdir = 1;
5050+ err = vfsub_mkdir(h_dir, &h_path, mode);
5051+ if (!err) {
5052+ /*
5053+ * strange behaviour from the users view,
5054+ * particularry setattr case
5055+ */
5527c038
JR
5056+ dir = d_inode(dst_parent);
5057+ if (au_ibstart(dir) == cpg->bdst)
5058+ au_cpup_attr_nlink(dir, /*force*/1);
5059+ inode = d_inode(cpg->dentry);
5060+ au_cpup_attr_nlink(inode, /*force*/1);
1facf9fc 5061+ }
5062+ break;
5063+ case S_IFLNK:
5064+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5065+ break;
5066+ case S_IFCHR:
5067+ case S_IFBLK:
5068+ AuDebugOn(!capable(CAP_MKNOD));
5069+ /*FALLTHROUGH*/
5070+ case S_IFIFO:
5071+ case S_IFSOCK:
5072+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5073+ break;
5074+ default:
5075+ AuIOErr("Unknown inode type 0%o\n", mode);
5076+ err = -EIO;
5077+ }
5078+
5079+ mnt_flags = au_mntflags(sb);
5080+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5081+ && !isdir
5082+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
5083+ && (h_inode->i_nlink == 1
5084+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 5085+ /* todo: unnecessary? */
5527c038 5086+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
c2b27bf2
AM
5087+ && cpg->bdst < cpg->bsrc
5088+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5089+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 5090+ /* ignore this error */
5091+
076b876e
AM
5092+ if (!err) {
5093+ force = 0;
5094+ if (isreg) {
5095+ force = !!cpg->len;
5096+ if (cpg->len == -1)
5097+ force = !!i_size_read(h_inode);
5098+ }
5099+ au_fhsm_wrote(sb, cpg->bdst, force);
5100+ }
5101+
1facf9fc 5102+ if (do_dt)
5103+ au_dtime_revert(&dt);
5104+ return err;
5105+}
5106+
392086de 5107+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
5108+{
5109+ int err;
392086de 5110+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 5111+ struct inode *h_dir;
392086de 5112+ aufs_bindex_t bdst;
86dc4139 5113+
392086de
AM
5114+ dentry = cpg->dentry;
5115+ bdst = cpg->bdst;
5116+ h_dentry = au_h_dptr(dentry, bdst);
5117+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5118+ dget(h_dentry);
5119+ au_set_h_dptr(dentry, bdst, NULL);
5120+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5121+ if (!err)
5122+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 5123+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
5124+ } else {
5125+ err = 0;
5126+ parent = dget_parent(dentry);
5127+ h_parent = au_h_dptr(parent, bdst);
5128+ dput(parent);
5129+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5130+ if (IS_ERR(h_path->dentry))
5131+ err = PTR_ERR(h_path->dentry);
86dc4139 5132+ }
392086de
AM
5133+ if (unlikely(err))
5134+ goto out;
86dc4139 5135+
86dc4139 5136+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 5137+ h_dir = d_inode(h_parent);
86dc4139 5138+ IMustLock(h_dir);
523b37e3
AM
5139+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5140+ /* no delegation since it is just created */
5141+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL);
86dc4139
AM
5142+ dput(h_path->dentry);
5143+
5144+out:
5145+ return err;
5146+}
5147+
1facf9fc 5148+/*
5149+ * copyup the @dentry from @bsrc to @bdst.
5150+ * the caller must set the both of lower dentries.
5151+ * @len is for truncating when it is -1 copyup the entire file.
5152+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5153+ * basic->bsrc can be larger than basic->bdst.
1facf9fc 5154+ */
c2b27bf2 5155+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5156+{
5157+ int err, rerr;
5158+ aufs_bindex_t old_ibstart;
5159+ unsigned char isdir, plink;
1facf9fc 5160+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5161+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
1facf9fc 5162+ struct super_block *sb;
86dc4139 5163+ struct au_branch *br;
c2b27bf2
AM
5164+ /* to reuduce stack size */
5165+ struct {
5166+ struct au_dtime dt;
5167+ struct path h_path;
5168+ struct au_cpup_reg_attr h_src_attr;
5169+ } *a;
1facf9fc 5170+
c2b27bf2
AM
5171+ err = -ENOMEM;
5172+ a = kmalloc(sizeof(*a), GFP_NOFS);
5173+ if (unlikely(!a))
5174+ goto out;
5175+ a->h_src_attr.valid = 0;
1facf9fc 5176+
c2b27bf2
AM
5177+ sb = cpg->dentry->d_sb;
5178+ br = au_sbr(sb, cpg->bdst);
5179+ a->h_path.mnt = au_br_mnt(br);
5180+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5181+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5182+ h_dir = d_inode(h_parent);
1facf9fc 5183+ IMustLock(h_dir);
5184+
c2b27bf2 5185+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5186+ inode = d_inode(cpg->dentry);
1facf9fc 5187+
5188+ if (!dst_parent)
c2b27bf2 5189+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5190+ else
5191+ dget(dst_parent);
5192+
5193+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5194+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5195+ if (dst_inode) {
5196+ if (unlikely(!plink)) {
5197+ err = -EIO;
027c5e7a
AM
5198+ AuIOErr("hi%lu(i%lu) exists on b%d "
5199+ "but plink is disabled\n",
c2b27bf2
AM
5200+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5201+ goto out_parent;
1facf9fc 5202+ }
5203+
5204+ if (dst_inode->i_nlink) {
c2b27bf2 5205+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5206+
c2b27bf2 5207+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5208+ err = PTR_ERR(h_src);
5209+ if (IS_ERR(h_src))
c2b27bf2 5210+ goto out_parent;
5527c038 5211+ if (unlikely(d_is_negative(h_src))) {
1facf9fc 5212+ err = -EIO;
ab036dbd 5213+ AuIOErr("i%lu exists on b%d "
027c5e7a 5214+ "but not pseudo-linked\n",
ab036dbd 5215+ inode->i_ino, cpg->bdst);
1facf9fc 5216+ dput(h_src);
c2b27bf2 5217+ goto out_parent;
1facf9fc 5218+ }
5219+
5220+ if (do_dt) {
c2b27bf2
AM
5221+ a->h_path.dentry = h_parent;
5222+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5223+ }
86dc4139 5224+
c2b27bf2 5225+ a->h_path.dentry = h_dst;
523b37e3
AM
5226+ delegated = NULL;
5227+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5228+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5229+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5230+ if (do_dt)
c2b27bf2 5231+ au_dtime_revert(&a->dt);
523b37e3
AM
5232+ if (unlikely(err == -EWOULDBLOCK)) {
5233+ pr_warn("cannot retry for NFSv4 delegation"
5234+ " for an internal link\n");
5235+ iput(delegated);
5236+ }
1facf9fc 5237+ dput(h_src);
c2b27bf2 5238+ goto out_parent;
1facf9fc 5239+ } else
5240+ /* todo: cpup_wh_file? */
5241+ /* udba work */
4a4d8108 5242+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5243+ }
5244+
86dc4139 5245+ isdir = S_ISDIR(inode->i_mode);
1facf9fc 5246+ old_ibstart = au_ibstart(inode);
c2b27bf2 5247+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5248+ if (unlikely(err))
86dc4139 5249+ goto out_rev;
5527c038 5250+ dst_inode = d_inode(h_dst);
1facf9fc 5251+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
86dc4139 5252+ /* todo: necessary? */
c2b27bf2 5253+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5254+
c2b27bf2 5255+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5256+ if (unlikely(err)) {
5257+ /* todo: necessary? */
c2b27bf2 5258+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
86dc4139
AM
5259+ mutex_unlock(&dst_inode->i_mutex);
5260+ goto out_rev;
5261+ }
5262+
c2b27bf2 5263+ if (cpg->bdst < old_ibstart) {
86dc4139 5264+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5265+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5266+ if (unlikely(err)) {
c2b27bf2
AM
5267+ /* ignore an error */
5268+ /* au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5269+ mutex_unlock(&dst_inode->i_mutex);
5270+ goto out_rev;
4a4d8108 5271+ }
4a4d8108 5272+ }
c2b27bf2
AM
5273+ au_set_ibstart(inode, cpg->bdst);
5274+ } else
5275+ au_set_ibend(inode, cpg->bdst);
5276+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5277+ au_hi_flags(inode, isdir));
5278+
5279+ /* todo: necessary? */
c2b27bf2 5280+ /* err = au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5281+ mutex_unlock(&dst_inode->i_mutex);
5282+ if (unlikely(err))
5283+ goto out_rev;
5284+
5527c038 5285+ src_inode = d_inode(h_src);
86dc4139 5286+ if (!isdir
5527c038
JR
5287+ && (src_inode->i_nlink > 1
5288+ || src_inode->i_state & I_LINKABLE)
86dc4139 5289+ && plink)
c2b27bf2 5290+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5291+
c2b27bf2
AM
5292+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5293+ a->h_path.dentry = h_dst;
392086de 5294+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5295+ }
5296+ if (!err)
c2b27bf2 5297+ goto out_parent; /* success */
1facf9fc 5298+
5299+ /* revert */
4a4d8108 5300+out_rev:
c2b27bf2
AM
5301+ a->h_path.dentry = h_parent;
5302+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5303+ a->h_path.dentry = h_dst;
86dc4139 5304+ rerr = 0;
5527c038 5305+ if (d_is_positive(h_dst)) {
523b37e3
AM
5306+ if (!isdir) {
5307+ /* no delegation since it is just created */
5308+ rerr = vfsub_unlink(h_dir, &a->h_path,
5309+ /*delegated*/NULL, /*force*/0);
5310+ } else
c2b27bf2 5311+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5312+ }
c2b27bf2 5313+ au_dtime_revert(&a->dt);
1facf9fc 5314+ if (rerr) {
5315+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5316+ err = -EIO;
5317+ }
c2b27bf2 5318+out_parent:
1facf9fc 5319+ dput(dst_parent);
c2b27bf2
AM
5320+ kfree(a);
5321+out:
1facf9fc 5322+ return err;
5323+}
5324+
7e9cd9fe 5325+#if 0 /* reserved */
1facf9fc 5326+struct au_cpup_single_args {
5327+ int *errp;
c2b27bf2 5328+ struct au_cp_generic *cpg;
1facf9fc 5329+ struct dentry *dst_parent;
5330+};
5331+
5332+static void au_call_cpup_single(void *args)
5333+{
5334+ struct au_cpup_single_args *a = args;
86dc4139 5335+
c2b27bf2
AM
5336+ au_pin_hdir_acquire_nest(a->cpg->pin);
5337+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5338+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5339+}
c2b27bf2 5340+#endif
1facf9fc 5341+
53392da6
AM
5342+/*
5343+ * prevent SIGXFSZ in copy-up.
5344+ * testing CAP_MKNOD is for generic fs,
5345+ * but CAP_FSETID is for xfs only, currently.
5346+ */
86dc4139 5347+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5348+{
5349+ int do_sio;
86dc4139
AM
5350+ struct super_block *sb;
5351+ struct inode *h_dir;
53392da6
AM
5352+
5353+ do_sio = 0;
86dc4139 5354+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5355+ if (!au_wkq_test()
5356+ && (!au_sbi(sb)->si_plink_maint_pid
5357+ || au_plink_maint(sb, AuLock_NOPLM))) {
5358+ switch (mode & S_IFMT) {
5359+ case S_IFREG:
5360+ /* no condition about RLIMIT_FSIZE and the file size */
5361+ do_sio = 1;
5362+ break;
5363+ case S_IFCHR:
5364+ case S_IFBLK:
5365+ do_sio = !capable(CAP_MKNOD);
5366+ break;
5367+ }
5368+ if (!do_sio)
5369+ do_sio = ((mode & (S_ISUID | S_ISGID))
5370+ && !capable(CAP_FSETID));
86dc4139
AM
5371+ /* this workaround may be removed in the future */
5372+ if (!do_sio) {
5373+ h_dir = au_pinned_h_dir(pin);
5374+ do_sio = h_dir->i_mode & S_ISVTX;
5375+ }
53392da6
AM
5376+ }
5377+
5378+ return do_sio;
5379+}
5380+
7e9cd9fe 5381+#if 0 /* reserved */
c2b27bf2 5382+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5383+{
5384+ int err, wkq_err;
1facf9fc 5385+ struct dentry *h_dentry;
5386+
c2b27bf2 5387+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5388+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
c2b27bf2 5389+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5390+ else {
5391+ struct au_cpup_single_args args = {
5392+ .errp = &err,
c2b27bf2
AM
5393+ .cpg = cpg,
5394+ .dst_parent = dst_parent
1facf9fc 5395+ };
5396+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5397+ if (unlikely(wkq_err))
5398+ err = wkq_err;
5399+ }
5400+
5401+ return err;
5402+}
c2b27bf2 5403+#endif
1facf9fc 5404+
5405+/*
5406+ * copyup the @dentry from the first active lower branch to @bdst,
5407+ * using au_cpup_single().
5408+ */
c2b27bf2 5409+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5410+{
5411+ int err;
c2b27bf2
AM
5412+ unsigned int flags_orig;
5413+ struct dentry *dentry;
5414+
5415+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5416+
c2b27bf2 5417+ dentry = cpg->dentry;
86dc4139 5418+ DiMustWriteLock(dentry);
1facf9fc 5419+
c2b27bf2 5420+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5421+ if (!err) {
c2b27bf2
AM
5422+ flags_orig = cpg->flags;
5423+ au_fset_cpup(cpg->flags, RENAME);
5424+ err = au_cpup_single(cpg, NULL);
5425+ cpg->flags = flags_orig;
1facf9fc 5426+ if (!err)
5427+ return 0; /* success */
5428+
5429+ /* revert */
c2b27bf2
AM
5430+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5431+ au_set_dbstart(dentry, cpg->bsrc);
1facf9fc 5432+ }
5433+
5434+ return err;
5435+}
5436+
5437+struct au_cpup_simple_args {
5438+ int *errp;
c2b27bf2 5439+ struct au_cp_generic *cpg;
1facf9fc 5440+};
5441+
5442+static void au_call_cpup_simple(void *args)
5443+{
5444+ struct au_cpup_simple_args *a = args;
86dc4139 5445+
c2b27bf2
AM
5446+ au_pin_hdir_acquire_nest(a->cpg->pin);
5447+ *a->errp = au_cpup_simple(a->cpg);
5448+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5449+}
5450+
c2b27bf2 5451+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5452+{
5453+ int err, wkq_err;
c2b27bf2
AM
5454+ struct dentry *dentry, *parent;
5455+ struct file *h_file;
1facf9fc 5456+ struct inode *h_dir;
5457+
c2b27bf2
AM
5458+ dentry = cpg->dentry;
5459+ h_file = NULL;
5460+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5461+ AuDebugOn(cpg->bsrc < 0);
392086de 5462+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5463+ err = PTR_ERR(h_file);
5464+ if (IS_ERR(h_file))
5465+ goto out;
5466+ }
5467+
1facf9fc 5468+ parent = dget_parent(dentry);
5527c038 5469+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
53392da6 5470+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5527c038 5471+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5472+ err = au_cpup_simple(cpg);
1facf9fc 5473+ else {
5474+ struct au_cpup_simple_args args = {
5475+ .errp = &err,
c2b27bf2 5476+ .cpg = cpg
1facf9fc 5477+ };
5478+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5479+ if (unlikely(wkq_err))
5480+ err = wkq_err;
5481+ }
5482+
5483+ dput(parent);
c2b27bf2
AM
5484+ if (h_file)
5485+ au_h_open_post(dentry, cpg->bsrc, h_file);
5486+
5487+out:
1facf9fc 5488+ return err;
5489+}
5490+
c2b27bf2 5491+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5492+{
c2b27bf2
AM
5493+ aufs_bindex_t bsrc, bend;
5494+ struct dentry *dentry, *h_dentry;
367653fa 5495+
c2b27bf2
AM
5496+ if (cpg->bsrc < 0) {
5497+ dentry = cpg->dentry;
5498+ bend = au_dbend(dentry);
5499+ for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
5500+ h_dentry = au_h_dptr(dentry, bsrc);
5501+ if (h_dentry) {
5527c038 5502+ AuDebugOn(d_is_negative(h_dentry));
c2b27bf2
AM
5503+ break;
5504+ }
5505+ }
5506+ AuDebugOn(bsrc > bend);
5507+ cpg->bsrc = bsrc;
367653fa 5508+ }
c2b27bf2
AM
5509+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5510+ return au_do_sio_cpup_simple(cpg);
5511+}
367653fa 5512+
c2b27bf2
AM
5513+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5514+{
5515+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5516+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5517+}
5518+
1facf9fc 5519+/* ---------------------------------------------------------------------- */
5520+
5521+/*
5522+ * copyup the deleted file for writing.
5523+ */
c2b27bf2
AM
5524+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5525+ struct file *file)
1facf9fc 5526+{
5527+ int err;
c2b27bf2
AM
5528+ unsigned int flags_orig;
5529+ aufs_bindex_t bsrc_orig;
1facf9fc 5530+ struct dentry *h_d_dst, *h_d_start;
c2b27bf2 5531+ struct au_dinfo *dinfo;
4a4d8108 5532+ struct au_hdentry *hdp;
1facf9fc 5533+
c2b27bf2 5534+ dinfo = au_di(cpg->dentry);
1308ab2a 5535+ AuRwMustWriteLock(&dinfo->di_rwsem);
5536+
c2b27bf2
AM
5537+ bsrc_orig = cpg->bsrc;
5538+ cpg->bsrc = dinfo->di_bstart;
4a4d8108 5539+ hdp = dinfo->di_hdentry;
c2b27bf2
AM
5540+ h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
5541+ dinfo->di_bstart = cpg->bdst;
5542+ hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
86dc4139 5543+ h_d_start = NULL;
027c5e7a 5544+ if (file) {
c2b27bf2 5545+ h_d_start = hdp[0 + cpg->bsrc].hd_dentry;
2000de60 5546+ hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_path.dentry;
027c5e7a 5547+ }
c2b27bf2
AM
5548+ flags_orig = cpg->flags;
5549+ cpg->flags = !AuCpup_DTIME;
5550+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5551+ cpg->flags = flags_orig;
027c5e7a
AM
5552+ if (file) {
5553+ if (!err)
5554+ err = au_reopen_nondir(file);
c2b27bf2 5555+ hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
1facf9fc 5556+ }
c2b27bf2
AM
5557+ hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
5558+ dinfo->di_bstart = cpg->bsrc;
5559+ cpg->bsrc = bsrc_orig;
1facf9fc 5560+
5561+ return err;
5562+}
5563+
c2b27bf2 5564+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5565+{
5566+ int err;
c2b27bf2 5567+ aufs_bindex_t bdst;
1facf9fc 5568+ struct au_dtime dt;
c2b27bf2 5569+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5570+ struct au_branch *br;
5571+ struct path h_path;
5572+
c2b27bf2
AM
5573+ dentry = cpg->dentry;
5574+ bdst = cpg->bdst;
1facf9fc 5575+ br = au_sbr(dentry->d_sb, bdst);
5576+ parent = dget_parent(dentry);
5577+ h_parent = au_h_dptr(parent, bdst);
5578+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5579+ err = PTR_ERR(wh_dentry);
5580+ if (IS_ERR(wh_dentry))
5581+ goto out;
5582+
5583+ h_path.dentry = h_parent;
86dc4139 5584+ h_path.mnt = au_br_mnt(br);
1facf9fc 5585+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5586+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5587+ if (unlikely(err))
5588+ goto out_wh;
5589+
5590+ dget(wh_dentry);
5591+ h_path.dentry = wh_dentry;
2000de60 5592+ if (!d_is_dir(wh_dentry)) {
523b37e3 5593+ /* no delegation since it is just created */
5527c038 5594+ err = vfsub_unlink(d_inode(h_parent), &h_path,
523b37e3
AM
5595+ /*delegated*/NULL, /*force*/0);
5596+ } else
5527c038 5597+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
1facf9fc 5598+ if (unlikely(err)) {
523b37e3
AM
5599+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5600+ wh_dentry, err);
1facf9fc 5601+ err = -EIO;
5602+ }
5603+ au_dtime_revert(&dt);
5527c038 5604+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
1facf9fc 5605+
4f0767ce 5606+out_wh:
1facf9fc 5607+ dput(wh_dentry);
4f0767ce 5608+out:
1facf9fc 5609+ dput(parent);
5610+ return err;
5611+}
5612+
5613+struct au_cpup_wh_args {
5614+ int *errp;
c2b27bf2 5615+ struct au_cp_generic *cpg;
1facf9fc 5616+ struct file *file;
5617+};
5618+
5619+static void au_call_cpup_wh(void *args)
5620+{
5621+ struct au_cpup_wh_args *a = args;
86dc4139 5622+
c2b27bf2
AM
5623+ au_pin_hdir_acquire_nest(a->cpg->pin);
5624+ *a->errp = au_cpup_wh(a->cpg, a->file);
5625+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5626+}
5627+
c2b27bf2 5628+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5629+{
5630+ int err, wkq_err;
c2b27bf2 5631+ aufs_bindex_t bdst;
c1595e42 5632+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5633+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5634+ struct au_wbr *wbr;
c2b27bf2 5635+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5636+
c2b27bf2
AM
5637+ dentry = cpg->dentry;
5638+ bdst = cpg->bdst;
1facf9fc 5639+ parent = dget_parent(dentry);
5527c038 5640+ dir = d_inode(parent);
1facf9fc 5641+ h_orph = NULL;
5642+ h_parent = NULL;
5643+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5644+ h_tmpdir = h_dir;
c2b27bf2 5645+ pin_orig = NULL;
1facf9fc 5646+ if (!h_dir->i_nlink) {
5647+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5648+ h_orph = wbr->wbr_orph;
5649+
5650+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5651+ au_set_h_dptr(parent, bdst, dget(h_orph));
5527c038 5652+ h_tmpdir = d_inode(h_orph);
1facf9fc 5653+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5654+
dece6358 5655+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
4a4d8108 5656+ /* todo: au_h_open_pre()? */
86dc4139 5657+
c2b27bf2 5658+ pin_orig = cpg->pin;
86dc4139 5659+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5660+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5661+ cpg->pin = &wh_pin;
1facf9fc 5662+ }
5663+
53392da6 5664+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5527c038 5665+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5666+ err = au_cpup_wh(cpg, file);
1facf9fc 5667+ else {
5668+ struct au_cpup_wh_args args = {
5669+ .errp = &err,
c2b27bf2
AM
5670+ .cpg = cpg,
5671+ .file = file
1facf9fc 5672+ };
5673+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5674+ if (unlikely(wkq_err))
5675+ err = wkq_err;
5676+ }
5677+
5678+ if (h_orph) {
5679+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 5680+ /* todo: au_h_open_post()? */
1facf9fc 5681+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5682+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5683+ AuDebugOn(!pin_orig);
5684+ cpg->pin = pin_orig;
1facf9fc 5685+ }
5686+ iput(h_dir);
5687+ dput(parent);
5688+
5689+ return err;
5690+}
5691+
5692+/* ---------------------------------------------------------------------- */
5693+
5694+/*
5695+ * generic routine for both of copy-up and copy-down.
5696+ */
5697+/* cf. revalidate function in file.c */
5698+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5699+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5700+ struct au_pin *pin,
1facf9fc 5701+ struct dentry *h_parent, void *arg),
5702+ void *arg)
5703+{
5704+ int err;
5705+ struct au_pin pin;
5527c038 5706+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
1facf9fc 5707+
5708+ err = 0;
5709+ parent = dget_parent(dentry);
5710+ if (IS_ROOT(parent))
5711+ goto out;
5712+
5713+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5714+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5715+
5716+ /* do not use au_dpage */
5717+ real_parent = parent;
5718+ while (1) {
5719+ dput(parent);
5720+ parent = dget_parent(dentry);
5721+ h_parent = au_h_dptr(parent, bdst);
5722+ if (h_parent)
5723+ goto out; /* success */
5724+
5725+ /* find top dir which is necessary to cpup */
5726+ do {
5727+ d = parent;
5728+ dput(parent);
5729+ parent = dget_parent(d);
5730+ di_read_lock_parent3(parent, !AuLock_IR);
5731+ h_parent = au_h_dptr(parent, bdst);
5732+ di_read_unlock(parent, !AuLock_IR);
5733+ } while (!h_parent);
5734+
5735+ if (d != real_parent)
5736+ di_write_lock_child3(d);
5737+
5738+ /* somebody else might create while we were sleeping */
5527c038
JR
5739+ h_dentry = au_h_dptr(d, bdst);
5740+ if (!h_dentry || d_is_negative(h_dentry)) {
5741+ if (h_dentry)
1facf9fc 5742+ au_update_dbstart(d);
5743+
5744+ au_pin_set_dentry(&pin, d);
5745+ err = au_do_pin(&pin);
5746+ if (!err) {
86dc4139 5747+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5748+ au_unpin(&pin);
5749+ }
5750+ }
5751+
5752+ if (d != real_parent)
5753+ di_write_unlock(d);
5754+ if (unlikely(err))
5755+ break;
5756+ }
5757+
4f0767ce 5758+out:
1facf9fc 5759+ dput(parent);
5760+ return err;
5761+}
5762+
5763+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5764+ struct au_pin *pin,
2000de60 5765+ struct dentry *h_parent __maybe_unused,
1facf9fc 5766+ void *arg __maybe_unused)
5767+{
c2b27bf2
AM
5768+ struct au_cp_generic cpg = {
5769+ .dentry = dentry,
5770+ .bdst = bdst,
5771+ .bsrc = -1,
5772+ .len = 0,
5773+ .pin = pin,
5774+ .flags = AuCpup_DTIME
5775+ };
5776+ return au_sio_cpup_simple(&cpg);
1facf9fc 5777+}
5778+
5779+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5780+{
5781+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
5782+}
5783+
5784+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5785+{
5786+ int err;
5787+ struct dentry *parent;
5788+ struct inode *dir;
5789+
5790+ parent = dget_parent(dentry);
5527c038 5791+ dir = d_inode(parent);
1facf9fc 5792+ err = 0;
5793+ if (au_h_iptr(dir, bdst))
5794+ goto out;
5795+
5796+ di_read_unlock(parent, AuLock_IR);
5797+ di_write_lock_parent(parent);
5798+ /* someone else might change our inode while we were sleeping */
5799+ if (!au_h_iptr(dir, bdst))
5800+ err = au_cpup_dirs(dentry, bdst);
5801+ di_downgrade_lock(parent, AuLock_IR);
5802+
4f0767ce 5803+out:
1facf9fc 5804+ dput(parent);
5805+ return err;
5806+}
7f207e10
AM
5807diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
5808--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 5809+++ linux/fs/aufs/cpup.h 2015-09-24 10:47:58.248052907 +0200
523b37e3 5810@@ -0,0 +1,94 @@
1facf9fc 5811+/*
2000de60 5812+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 5813+ *
5814+ * This program, aufs is free software; you can redistribute it and/or modify
5815+ * it under the terms of the GNU General Public License as published by
5816+ * the Free Software Foundation; either version 2 of the License, or
5817+ * (at your option) any later version.
dece6358
AM
5818+ *
5819+ * This program is distributed in the hope that it will be useful,
5820+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5821+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5822+ * GNU General Public License for more details.
5823+ *
5824+ * You should have received a copy of the GNU General Public License
523b37e3 5825+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5826+ */
5827+
5828+/*
5829+ * copy-up/down functions
5830+ */
5831+
5832+#ifndef __AUFS_CPUP_H__
5833+#define __AUFS_CPUP_H__
5834+
5835+#ifdef __KERNEL__
5836+
dece6358 5837+#include <linux/path.h>
1facf9fc 5838+
dece6358
AM
5839+struct inode;
5840+struct file;
86dc4139 5841+struct au_pin;
dece6358 5842+
86dc4139 5843+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 5844+void au_cpup_attr_timesizes(struct inode *inode);
5845+void au_cpup_attr_nlink(struct inode *inode, int force);
5846+void au_cpup_attr_changeable(struct inode *inode);
5847+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
5848+void au_cpup_attr_all(struct inode *inode, int force);
5849+
5850+/* ---------------------------------------------------------------------- */
5851+
c2b27bf2
AM
5852+struct au_cp_generic {
5853+ struct dentry *dentry;
5854+ aufs_bindex_t bdst, bsrc;
5855+ loff_t len;
5856+ struct au_pin *pin;
5857+ unsigned int flags;
5858+};
5859+
1facf9fc 5860+/* cpup flags */
392086de
AM
5861+#define AuCpup_DTIME 1 /* do dtime_store/revert */
5862+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
5863+ for link(2) */
5864+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
5865+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
5866+ cpup */
5867+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
5868+ existing entry */
5869+#define AuCpup_RWDST (1 << 5) /* force write target even if
5870+ the branch is marked as RO */
c2b27bf2 5871+
1facf9fc 5872+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
5873+#define au_fset_cpup(flags, name) \
5874+ do { (flags) |= AuCpup_##name; } while (0)
5875+#define au_fclr_cpup(flags, name) \
5876+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 5877+
5878+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
5879+int au_sio_cpup_simple(struct au_cp_generic *cpg);
5880+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
5881+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 5882+
5883+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5884+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5885+ struct au_pin *pin,
1facf9fc 5886+ struct dentry *h_parent, void *arg),
5887+ void *arg);
5888+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5889+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5890+
5891+/* ---------------------------------------------------------------------- */
5892+
5893+/* keep timestamps when copyup */
5894+struct au_dtime {
5895+ struct dentry *dt_dentry;
5896+ struct path dt_h_path;
5897+ struct timespec dt_atime, dt_mtime;
5898+};
5899+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
5900+ struct path *h_path);
5901+void au_dtime_revert(struct au_dtime *dt);
5902+
5903+#endif /* __KERNEL__ */
5904+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
5905diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
5906--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 5907+++ linux/fs/aufs/dbgaufs.c 2015-09-24 10:47:58.248052907 +0200
523b37e3 5908@@ -0,0 +1,432 @@
1facf9fc 5909+/*
2000de60 5910+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 5911+ *
5912+ * This program, aufs is free software; you can redistribute it and/or modify
5913+ * it under the terms of the GNU General Public License as published by
5914+ * the Free Software Foundation; either version 2 of the License, or
5915+ * (at your option) any later version.
dece6358
AM
5916+ *
5917+ * This program is distributed in the hope that it will be useful,
5918+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5919+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5920+ * GNU General Public License for more details.
5921+ *
5922+ * You should have received a copy of the GNU General Public License
523b37e3 5923+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5924+ */
5925+
5926+/*
5927+ * debugfs interface
5928+ */
5929+
5930+#include <linux/debugfs.h>
5931+#include "aufs.h"
5932+
5933+#ifndef CONFIG_SYSFS
5934+#error DEBUG_FS depends upon SYSFS
5935+#endif
5936+
5937+static struct dentry *dbgaufs;
5938+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
5939+
5940+/* 20 is max digits length of ulong 64 */
5941+struct dbgaufs_arg {
5942+ int n;
5943+ char a[20 * 4];
5944+};
5945+
5946+/*
5947+ * common function for all XINO files
5948+ */
5949+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
5950+ struct file *file)
5951+{
5952+ kfree(file->private_data);
5953+ return 0;
5954+}
5955+
5956+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
5957+{
5958+ int err;
5959+ struct kstat st;
5960+ struct dbgaufs_arg *p;
5961+
5962+ err = -ENOMEM;
5963+ p = kmalloc(sizeof(*p), GFP_NOFS);
5964+ if (unlikely(!p))
5965+ goto out;
5966+
5967+ err = 0;
5968+ p->n = 0;
5969+ file->private_data = p;
5970+ if (!xf)
5971+ goto out;
5972+
c06a8ce3 5973+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 5974+ if (!err) {
5975+ if (do_fcnt)
5976+ p->n = snprintf
5977+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
5978+ (long)file_count(xf), st.blocks, st.blksize,
5979+ (long long)st.size);
5980+ else
5981+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
5982+ st.blocks, st.blksize,
5983+ (long long)st.size);
5984+ AuDebugOn(p->n >= sizeof(p->a));
5985+ } else {
5986+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
5987+ err = 0;
5988+ }
5989+
4f0767ce 5990+out:
1facf9fc 5991+ return err;
5992+
5993+}
5994+
5995+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
5996+ size_t count, loff_t *ppos)
5997+{
5998+ struct dbgaufs_arg *p;
5999+
6000+ p = file->private_data;
6001+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6002+}
6003+
6004+/* ---------------------------------------------------------------------- */
6005+
86dc4139
AM
6006+struct dbgaufs_plink_arg {
6007+ int n;
6008+ char a[];
6009+};
6010+
6011+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6012+ struct file *file)
6013+{
6014+ free_page((unsigned long)file->private_data);
6015+ return 0;
6016+}
6017+
6018+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6019+{
6020+ int err, i, limit;
6021+ unsigned long n, sum;
6022+ struct dbgaufs_plink_arg *p;
6023+ struct au_sbinfo *sbinfo;
6024+ struct super_block *sb;
6025+ struct au_sphlhead *sphl;
6026+
6027+ err = -ENOMEM;
6028+ p = (void *)get_zeroed_page(GFP_NOFS);
6029+ if (unlikely(!p))
6030+ goto out;
6031+
6032+ err = -EFBIG;
6033+ sbinfo = inode->i_private;
6034+ sb = sbinfo->si_sb;
6035+ si_noflush_read_lock(sb);
6036+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6037+ limit = PAGE_SIZE - sizeof(p->n);
6038+
6039+ /* the number of buckets */
6040+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6041+ p->n += n;
6042+ limit -= n;
6043+
6044+ sum = 0;
6045+ for (i = 0, sphl = sbinfo->si_plink;
6046+ i < AuPlink_NHASH;
6047+ i++, sphl++) {
6048+ n = au_sphl_count(sphl);
6049+ sum += n;
6050+
6051+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6052+ p->n += n;
6053+ limit -= n;
6054+ if (unlikely(limit <= 0))
6055+ goto out_free;
6056+ }
6057+ p->a[p->n - 1] = '\n';
6058+
6059+ /* the sum of plinks */
6060+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6061+ p->n += n;
6062+ limit -= n;
6063+ if (unlikely(limit <= 0))
6064+ goto out_free;
6065+ } else {
6066+#define str "1\n0\n0\n"
6067+ p->n = sizeof(str) - 1;
6068+ strcpy(p->a, str);
6069+#undef str
6070+ }
6071+ si_read_unlock(sb);
6072+
6073+ err = 0;
6074+ file->private_data = p;
6075+ goto out; /* success */
6076+
6077+out_free:
6078+ free_page((unsigned long)p);
6079+out:
6080+ return err;
6081+}
6082+
6083+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6084+ size_t count, loff_t *ppos)
6085+{
6086+ struct dbgaufs_plink_arg *p;
6087+
6088+ p = file->private_data;
6089+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6090+}
6091+
6092+static const struct file_operations dbgaufs_plink_fop = {
6093+ .owner = THIS_MODULE,
6094+ .open = dbgaufs_plink_open,
6095+ .release = dbgaufs_plink_release,
6096+ .read = dbgaufs_plink_read
6097+};
6098+
6099+/* ---------------------------------------------------------------------- */
6100+
1facf9fc 6101+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6102+{
6103+ int err;
6104+ struct au_sbinfo *sbinfo;
6105+ struct super_block *sb;
6106+
6107+ sbinfo = inode->i_private;
6108+ sb = sbinfo->si_sb;
6109+ si_noflush_read_lock(sb);
6110+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6111+ si_read_unlock(sb);
6112+ return err;
6113+}
6114+
6115+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 6116+ .owner = THIS_MODULE,
1facf9fc 6117+ .open = dbgaufs_xib_open,
6118+ .release = dbgaufs_xi_release,
6119+ .read = dbgaufs_xi_read
6120+};
6121+
6122+/* ---------------------------------------------------------------------- */
6123+
6124+#define DbgaufsXi_PREFIX "xi"
6125+
6126+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6127+{
6128+ int err;
6129+ long l;
6130+ struct au_sbinfo *sbinfo;
6131+ struct super_block *sb;
6132+ struct file *xf;
6133+ struct qstr *name;
6134+
6135+ err = -ENOENT;
6136+ xf = NULL;
2000de60 6137+ name = &file->f_path.dentry->d_name;
1facf9fc 6138+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6139+ || memcmp(name->name, DbgaufsXi_PREFIX,
6140+ sizeof(DbgaufsXi_PREFIX) - 1)))
6141+ goto out;
9dbd164d 6142+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6143+ if (unlikely(err))
6144+ goto out;
6145+
6146+ sbinfo = inode->i_private;
6147+ sb = sbinfo->si_sb;
6148+ si_noflush_read_lock(sb);
6149+ if (l <= au_sbend(sb)) {
6150+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6151+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6152+ } else
6153+ err = -ENOENT;
6154+ si_read_unlock(sb);
6155+
4f0767ce 6156+out:
1facf9fc 6157+ return err;
6158+}
6159+
6160+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6161+ .owner = THIS_MODULE,
1facf9fc 6162+ .open = dbgaufs_xino_open,
6163+ .release = dbgaufs_xi_release,
6164+ .read = dbgaufs_xi_read
6165+};
6166+
6167+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6168+{
6169+ aufs_bindex_t bend;
6170+ struct au_branch *br;
6171+ struct au_xino_file *xi;
6172+
6173+ if (!au_sbi(sb)->si_dbgaufs)
6174+ return;
6175+
6176+ bend = au_sbend(sb);
6177+ for (; bindex <= bend; bindex++) {
6178+ br = au_sbr(sb, bindex);
6179+ xi = &br->br_xino;
c06a8ce3
AM
6180+ debugfs_remove(xi->xi_dbgaufs);
6181+ xi->xi_dbgaufs = NULL;
1facf9fc 6182+ }
6183+}
6184+
6185+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6186+{
6187+ struct au_sbinfo *sbinfo;
6188+ struct dentry *parent;
6189+ struct au_branch *br;
6190+ struct au_xino_file *xi;
6191+ aufs_bindex_t bend;
6192+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6193+
6194+ sbinfo = au_sbi(sb);
6195+ parent = sbinfo->si_dbgaufs;
6196+ if (!parent)
6197+ return;
6198+
6199+ bend = au_sbend(sb);
6200+ for (; bindex <= bend; bindex++) {
6201+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6202+ br = au_sbr(sb, bindex);
6203+ xi = &br->br_xino;
6204+ AuDebugOn(xi->xi_dbgaufs);
6205+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6206+ sbinfo, &dbgaufs_xino_fop);
6207+ /* ignore an error */
6208+ if (unlikely(!xi->xi_dbgaufs))
6209+ AuWarn1("failed %s under debugfs\n", name);
6210+ }
6211+}
6212+
6213+/* ---------------------------------------------------------------------- */
6214+
6215+#ifdef CONFIG_AUFS_EXPORT
6216+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6217+{
6218+ int err;
6219+ struct au_sbinfo *sbinfo;
6220+ struct super_block *sb;
6221+
6222+ sbinfo = inode->i_private;
6223+ sb = sbinfo->si_sb;
6224+ si_noflush_read_lock(sb);
6225+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6226+ si_read_unlock(sb);
6227+ return err;
6228+}
6229+
6230+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6231+ .owner = THIS_MODULE,
1facf9fc 6232+ .open = dbgaufs_xigen_open,
6233+ .release = dbgaufs_xi_release,
6234+ .read = dbgaufs_xi_read
6235+};
6236+
6237+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6238+{
6239+ int err;
6240+
dece6358 6241+ /*
c1595e42 6242+ * This function is a dynamic '__init' function actually,
dece6358
AM
6243+ * so the tiny check for si_rwsem is unnecessary.
6244+ */
6245+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6246+
1facf9fc 6247+ err = -EIO;
6248+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6249+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6250+ &dbgaufs_xigen_fop);
6251+ if (sbinfo->si_dbgaufs_xigen)
6252+ err = 0;
6253+
6254+ return err;
6255+}
6256+#else
6257+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6258+{
6259+ return 0;
6260+}
6261+#endif /* CONFIG_AUFS_EXPORT */
6262+
6263+/* ---------------------------------------------------------------------- */
6264+
6265+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6266+{
dece6358 6267+ /*
7e9cd9fe 6268+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6269+ * so the tiny check for si_rwsem is unnecessary.
6270+ */
6271+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6272+
1facf9fc 6273+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6274+ sbinfo->si_dbgaufs = NULL;
6275+ kobject_put(&sbinfo->si_kobj);
6276+}
6277+
6278+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6279+{
6280+ int err;
6281+ char name[SysaufsSiNameLen];
6282+
dece6358 6283+ /*
c1595e42 6284+ * This function is a dynamic '__init' function actually,
dece6358
AM
6285+ * so the tiny check for si_rwsem is unnecessary.
6286+ */
6287+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6288+
1facf9fc 6289+ err = -ENOENT;
6290+ if (!dbgaufs) {
6291+ AuErr1("/debug/aufs is uninitialized\n");
6292+ goto out;
6293+ }
6294+
6295+ err = -EIO;
6296+ sysaufs_name(sbinfo, name);
6297+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6298+ if (unlikely(!sbinfo->si_dbgaufs))
6299+ goto out;
6300+ kobject_get(&sbinfo->si_kobj);
6301+
6302+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6303+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6304+ &dbgaufs_xib_fop);
6305+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6306+ goto out_dir;
6307+
86dc4139
AM
6308+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6309+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6310+ &dbgaufs_plink_fop);
6311+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6312+ goto out_dir;
6313+
1facf9fc 6314+ err = dbgaufs_xigen_init(sbinfo);
6315+ if (!err)
6316+ goto out; /* success */
6317+
4f0767ce 6318+out_dir:
1facf9fc 6319+ dbgaufs_si_fin(sbinfo);
4f0767ce 6320+out:
1facf9fc 6321+ return err;
6322+}
6323+
6324+/* ---------------------------------------------------------------------- */
6325+
6326+void dbgaufs_fin(void)
6327+{
6328+ debugfs_remove(dbgaufs);
6329+}
6330+
6331+int __init dbgaufs_init(void)
6332+{
6333+ int err;
6334+
6335+ err = -EIO;
6336+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6337+ if (dbgaufs)
6338+ err = 0;
6339+ return err;
6340+}
7f207e10
AM
6341diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6342--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 6343+++ linux/fs/aufs/dbgaufs.h 2015-09-24 10:47:58.248052907 +0200
523b37e3 6344@@ -0,0 +1,48 @@
1facf9fc 6345+/*
2000de60 6346+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6347+ *
6348+ * This program, aufs is free software; you can redistribute it and/or modify
6349+ * it under the terms of the GNU General Public License as published by
6350+ * the Free Software Foundation; either version 2 of the License, or
6351+ * (at your option) any later version.
dece6358
AM
6352+ *
6353+ * This program is distributed in the hope that it will be useful,
6354+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6355+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6356+ * GNU General Public License for more details.
6357+ *
6358+ * You should have received a copy of the GNU General Public License
523b37e3 6359+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6360+ */
6361+
6362+/*
6363+ * debugfs interface
6364+ */
6365+
6366+#ifndef __DBGAUFS_H__
6367+#define __DBGAUFS_H__
6368+
6369+#ifdef __KERNEL__
6370+
dece6358 6371+struct super_block;
1facf9fc 6372+struct au_sbinfo;
dece6358 6373+
1facf9fc 6374+#ifdef CONFIG_DEBUG_FS
6375+/* dbgaufs.c */
6376+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6377+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6378+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6379+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6380+void dbgaufs_fin(void);
6381+int __init dbgaufs_init(void);
1facf9fc 6382+#else
4a4d8108
AM
6383+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6384+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6385+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6386+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6387+AuStubVoid(dbgaufs_fin, void)
6388+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6389+#endif /* CONFIG_DEBUG_FS */
6390+
6391+#endif /* __KERNEL__ */
6392+#endif /* __DBGAUFS_H__ */
7f207e10
AM
6393diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6394--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 6395+++ linux/fs/aufs/dcsub.c 2015-09-24 10:47:58.248052907 +0200
c1595e42 6396@@ -0,0 +1,224 @@
1facf9fc 6397+/*
2000de60 6398+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6399+ *
6400+ * This program, aufs is free software; you can redistribute it and/or modify
6401+ * it under the terms of the GNU General Public License as published by
6402+ * the Free Software Foundation; either version 2 of the License, or
6403+ * (at your option) any later version.
dece6358
AM
6404+ *
6405+ * This program is distributed in the hope that it will be useful,
6406+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6407+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6408+ * GNU General Public License for more details.
6409+ *
6410+ * You should have received a copy of the GNU General Public License
523b37e3 6411+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6412+ */
6413+
6414+/*
6415+ * sub-routines for dentry cache
6416+ */
6417+
6418+#include "aufs.h"
6419+
6420+static void au_dpage_free(struct au_dpage *dpage)
6421+{
6422+ int i;
6423+ struct dentry **p;
6424+
6425+ p = dpage->dentries;
6426+ for (i = 0; i < dpage->ndentry; i++)
6427+ dput(*p++);
6428+ free_page((unsigned long)dpage->dentries);
6429+}
6430+
6431+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6432+{
6433+ int err;
6434+ void *p;
6435+
6436+ err = -ENOMEM;
6437+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6438+ if (unlikely(!dpages->dpages))
6439+ goto out;
6440+
6441+ p = (void *)__get_free_page(gfp);
6442+ if (unlikely(!p))
6443+ goto out_dpages;
6444+
6445+ dpages->dpages[0].ndentry = 0;
6446+ dpages->dpages[0].dentries = p;
6447+ dpages->ndpage = 1;
6448+ return 0; /* success */
6449+
4f0767ce 6450+out_dpages:
1facf9fc 6451+ kfree(dpages->dpages);
4f0767ce 6452+out:
1facf9fc 6453+ return err;
6454+}
6455+
6456+void au_dpages_free(struct au_dcsub_pages *dpages)
6457+{
6458+ int i;
6459+ struct au_dpage *p;
6460+
6461+ p = dpages->dpages;
6462+ for (i = 0; i < dpages->ndpage; i++)
6463+ au_dpage_free(p++);
6464+ kfree(dpages->dpages);
6465+}
6466+
6467+static int au_dpages_append(struct au_dcsub_pages *dpages,
6468+ struct dentry *dentry, gfp_t gfp)
6469+{
6470+ int err, sz;
6471+ struct au_dpage *dpage;
6472+ void *p;
6473+
6474+ dpage = dpages->dpages + dpages->ndpage - 1;
6475+ sz = PAGE_SIZE / sizeof(dentry);
6476+ if (unlikely(dpage->ndentry >= sz)) {
6477+ AuLabel(new dpage);
6478+ err = -ENOMEM;
6479+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6480+ p = au_kzrealloc(dpages->dpages, sz,
6481+ sz + sizeof(*dpages->dpages), gfp);
6482+ if (unlikely(!p))
6483+ goto out;
6484+
6485+ dpages->dpages = p;
6486+ dpage = dpages->dpages + dpages->ndpage;
6487+ p = (void *)__get_free_page(gfp);
6488+ if (unlikely(!p))
6489+ goto out;
6490+
6491+ dpage->ndentry = 0;
6492+ dpage->dentries = p;
6493+ dpages->ndpage++;
6494+ }
6495+
c1595e42 6496+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6497+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6498+ return 0; /* success */
6499+
4f0767ce 6500+out:
1facf9fc 6501+ return err;
6502+}
6503+
c1595e42
JR
6504+/* todo: BAD approach */
6505+/* copied from linux/fs/dcache.c */
6506+enum d_walk_ret {
6507+ D_WALK_CONTINUE,
6508+ D_WALK_QUIT,
6509+ D_WALK_NORETRY,
6510+ D_WALK_SKIP,
6511+};
6512+
6513+extern void d_walk(struct dentry *parent, void *data,
6514+ enum d_walk_ret (*enter)(void *, struct dentry *),
6515+ void (*finish)(void *));
6516+
6517+struct ac_dpages_arg {
1facf9fc 6518+ int err;
c1595e42
JR
6519+ struct au_dcsub_pages *dpages;
6520+ struct super_block *sb;
6521+ au_dpages_test test;
6522+ void *arg;
6523+};
1facf9fc 6524+
c1595e42
JR
6525+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6526+{
6527+ enum d_walk_ret ret;
6528+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6529+
c1595e42
JR
6530+ ret = D_WALK_CONTINUE;
6531+ if (dentry->d_sb == arg->sb
6532+ && !IS_ROOT(dentry)
6533+ && au_dcount(dentry) > 0
6534+ && au_di(dentry)
6535+ && (!arg->test || arg->test(dentry, arg->arg))) {
6536+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6537+ if (unlikely(arg->err))
6538+ ret = D_WALK_QUIT;
1facf9fc 6539+ }
6540+
c1595e42
JR
6541+ return ret;
6542+}
027c5e7a 6543+
c1595e42
JR
6544+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6545+ au_dpages_test test, void *arg)
6546+{
6547+ struct ac_dpages_arg args = {
6548+ .err = 0,
6549+ .dpages = dpages,
6550+ .sb = root->d_sb,
6551+ .test = test,
6552+ .arg = arg
6553+ };
027c5e7a 6554+
c1595e42
JR
6555+ d_walk(root, &args, au_call_dpages_append, NULL);
6556+
6557+ return args.err;
1facf9fc 6558+}
6559+
6560+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6561+ int do_include, au_dpages_test test, void *arg)
6562+{
6563+ int err;
6564+
6565+ err = 0;
027c5e7a
AM
6566+ write_seqlock(&rename_lock);
6567+ spin_lock(&dentry->d_lock);
6568+ if (do_include
c1595e42 6569+ && au_dcount(dentry) > 0
027c5e7a 6570+ && (!test || test(dentry, arg)))
1facf9fc 6571+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6572+ spin_unlock(&dentry->d_lock);
6573+ if (unlikely(err))
6574+ goto out;
6575+
6576+ /*
523b37e3 6577+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6578+ * mount
6579+ */
1facf9fc 6580+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6581+ dentry = dentry->d_parent; /* rename_lock is locked */
6582+ spin_lock(&dentry->d_lock);
c1595e42 6583+ if (au_dcount(dentry) > 0
027c5e7a 6584+ && (!test || test(dentry, arg)))
1facf9fc 6585+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6586+ spin_unlock(&dentry->d_lock);
6587+ if (unlikely(err))
6588+ break;
1facf9fc 6589+ }
6590+
4f0767ce 6591+out:
027c5e7a 6592+ write_sequnlock(&rename_lock);
1facf9fc 6593+ return err;
6594+}
6595+
027c5e7a
AM
6596+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6597+{
6598+ return au_di(dentry) && dentry->d_sb == arg;
6599+}
6600+
6601+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6602+ struct dentry *dentry, int do_include)
6603+{
6604+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6605+ au_dcsub_dpages_aufs, dentry->d_sb);
6606+}
6607+
4a4d8108 6608+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6609+{
4a4d8108
AM
6610+ struct path path[2] = {
6611+ {
6612+ .dentry = d1
6613+ },
6614+ {
6615+ .dentry = d2
6616+ }
6617+ };
1facf9fc 6618+
4a4d8108 6619+ return path_is_under(path + 0, path + 1);
1facf9fc 6620+}
7f207e10
AM
6621diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6622--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 6623+++ linux/fs/aufs/dcsub.h 2015-09-24 10:47:58.251386326 +0200
5527c038 6624@@ -0,0 +1,136 @@
1facf9fc 6625+/*
2000de60 6626+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6627+ *
6628+ * This program, aufs is free software; you can redistribute it and/or modify
6629+ * it under the terms of the GNU General Public License as published by
6630+ * the Free Software Foundation; either version 2 of the License, or
6631+ * (at your option) any later version.
dece6358
AM
6632+ *
6633+ * This program is distributed in the hope that it will be useful,
6634+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6635+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6636+ * GNU General Public License for more details.
6637+ *
6638+ * You should have received a copy of the GNU General Public License
523b37e3 6639+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6640+ */
6641+
6642+/*
6643+ * sub-routines for dentry cache
6644+ */
6645+
6646+#ifndef __AUFS_DCSUB_H__
6647+#define __AUFS_DCSUB_H__
6648+
6649+#ifdef __KERNEL__
6650+
7f207e10 6651+#include <linux/dcache.h>
027c5e7a 6652+#include <linux/fs.h>
dece6358 6653+
1facf9fc 6654+struct au_dpage {
6655+ int ndentry;
6656+ struct dentry **dentries;
6657+};
6658+
6659+struct au_dcsub_pages {
6660+ int ndpage;
6661+ struct au_dpage *dpages;
6662+};
6663+
6664+/* ---------------------------------------------------------------------- */
6665+
7f207e10 6666+/* dcsub.c */
1facf9fc 6667+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6668+void au_dpages_free(struct au_dcsub_pages *dpages);
6669+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6670+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6671+ au_dpages_test test, void *arg);
6672+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6673+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6674+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6675+ struct dentry *dentry, int do_include);
4a4d8108 6676+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6677+
7f207e10
AM
6678+/* ---------------------------------------------------------------------- */
6679+
523b37e3
AM
6680+/*
6681+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6682+ * include/linux/dcache.h. Try them (in the future).
6683+ */
6684+
027c5e7a
AM
6685+static inline int au_d_hashed_positive(struct dentry *d)
6686+{
6687+ int err;
5527c038 6688+ struct inode *inode = d_inode(d);
076b876e 6689+
027c5e7a 6690+ err = 0;
5527c038
JR
6691+ if (unlikely(d_unhashed(d)
6692+ || d_is_negative(d)
6693+ || !inode->i_nlink))
027c5e7a
AM
6694+ err = -ENOENT;
6695+ return err;
6696+}
6697+
38d290e6
JR
6698+static inline int au_d_linkable(struct dentry *d)
6699+{
6700+ int err;
5527c038 6701+ struct inode *inode = d_inode(d);
076b876e 6702+
38d290e6
JR
6703+ err = au_d_hashed_positive(d);
6704+ if (err
5527c038 6705+ && d_is_positive(d)
38d290e6
JR
6706+ && (inode->i_state & I_LINKABLE))
6707+ err = 0;
6708+ return err;
6709+}
6710+
027c5e7a
AM
6711+static inline int au_d_alive(struct dentry *d)
6712+{
6713+ int err;
6714+ struct inode *inode;
076b876e 6715+
027c5e7a
AM
6716+ err = 0;
6717+ if (!IS_ROOT(d))
6718+ err = au_d_hashed_positive(d);
6719+ else {
5527c038
JR
6720+ inode = d_inode(d);
6721+ if (unlikely(d_unlinked(d)
6722+ || d_is_negative(d)
6723+ || !inode->i_nlink))
027c5e7a
AM
6724+ err = -ENOENT;
6725+ }
6726+ return err;
6727+}
6728+
6729+static inline int au_alive_dir(struct dentry *d)
7f207e10 6730+{
027c5e7a 6731+ int err;
076b876e 6732+
027c5e7a 6733+ err = au_d_alive(d);
5527c038 6734+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
027c5e7a
AM
6735+ err = -ENOENT;
6736+ return err;
7f207e10
AM
6737+}
6738+
38d290e6
JR
6739+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6740+{
6741+ return a->len == b->len
6742+ && !memcmp(a->name, b->name, a->len);
6743+}
6744+
7e9cd9fe
AM
6745+/*
6746+ * by the commit
6747+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6748+ * taking d_lock
6749+ * the type of d_lockref.count became int, but the inlined function d_count()
6750+ * still returns unsigned int.
6751+ * I don't know why. Maybe it is for every d_count() users?
6752+ * Anyway au_dcount() lives on.
6753+ */
c1595e42
JR
6754+static inline int au_dcount(struct dentry *d)
6755+{
6756+ return (int)d_count(d);
6757+}
6758+
1facf9fc 6759+#endif /* __KERNEL__ */
6760+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
6761diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
6762--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 6763+++ linux/fs/aufs/debug.c 2015-09-24 10:47:58.251386326 +0200
5527c038 6764@@ -0,0 +1,440 @@
1facf9fc 6765+/*
2000de60 6766+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 6767+ *
6768+ * This program, aufs is free software; you can redistribute it and/or modify
6769+ * it under the terms of the GNU General Public License as published by
6770+ * the Free Software Foundation; either version 2 of the License, or
6771+ * (at your option) any later version.
dece6358
AM
6772+ *
6773+ * This program is distributed in the hope that it will be useful,
6774+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6775+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6776+ * GNU General Public License for more details.
6777+ *
6778+ * You should have received a copy of the GNU General Public License
523b37e3 6779+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6780+ */
6781+
6782+/*
6783+ * debug print functions
6784+ */
6785+
6786+#include "aufs.h"
6787+
392086de
AM
6788+/* Returns 0, or -errno. arg is in kp->arg. */
6789+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
6790+{
6791+ int err, n;
6792+
6793+ err = kstrtoint(val, 0, &n);
6794+ if (!err) {
6795+ if (n > 0)
6796+ au_debug_on();
6797+ else
6798+ au_debug_off();
6799+ }
6800+ return err;
6801+}
6802+
6803+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
6804+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
6805+{
6806+ atomic_t *a;
6807+
6808+ a = kp->arg;
6809+ return sprintf(buffer, "%d", atomic_read(a));
6810+}
6811+
6812+static struct kernel_param_ops param_ops_atomic_t = {
6813+ .set = param_atomic_t_set,
6814+ .get = param_atomic_t_get
6815+ /* void (*free)(void *arg) */
6816+};
6817+
6818+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 6819+MODULE_PARM_DESC(debug, "debug print");
392086de 6820+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 6821+
c1595e42 6822+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 6823+char *au_plevel = KERN_DEBUG;
e49829fe
JR
6824+#define dpri(fmt, ...) do { \
6825+ if ((au_plevel \
6826+ && strcmp(au_plevel, KERN_DEBUG)) \
6827+ || au_debug_test()) \
6828+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 6829+} while (0)
6830+
6831+/* ---------------------------------------------------------------------- */
6832+
6833+void au_dpri_whlist(struct au_nhash *whlist)
6834+{
6835+ unsigned long ul, n;
6836+ struct hlist_head *head;
c06a8ce3 6837+ struct au_vdir_wh *pos;
1facf9fc 6838+
6839+ n = whlist->nh_num;
6840+ head = whlist->nh_head;
6841+ for (ul = 0; ul < n; ul++) {
c06a8ce3 6842+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 6843+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
6844+ pos->wh_bindex,
6845+ pos->wh_str.len, pos->wh_str.name,
6846+ pos->wh_str.len);
1facf9fc 6847+ head++;
6848+ }
6849+}
6850+
6851+void au_dpri_vdir(struct au_vdir *vdir)
6852+{
6853+ unsigned long ul;
6854+ union au_vdir_deblk_p p;
6855+ unsigned char *o;
6856+
6857+ if (!vdir || IS_ERR(vdir)) {
6858+ dpri("err %ld\n", PTR_ERR(vdir));
6859+ return;
6860+ }
6861+
6862+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
6863+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
6864+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
6865+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
6866+ p.deblk = vdir->vd_deblk[ul];
6867+ o = p.deblk;
6868+ dpri("[%lu]: %p\n", ul, o);
6869+ }
6870+}
6871+
53392da6 6872+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 6873+ struct dentry *wh)
6874+{
6875+ char *n = NULL;
6876+ int l = 0;
6877+
6878+ if (!inode || IS_ERR(inode)) {
6879+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
6880+ return -1;
6881+ }
6882+
c2b27bf2 6883+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 6884+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
6885+ && sizeof(inode->i_blocks) != sizeof(u64));
6886+ if (wh) {
6887+ n = (void *)wh->d_name.name;
6888+ l = wh->d_name.len;
6889+ }
6890+
53392da6
AM
6891+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
6892+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
6893+ bindex, inode,
1facf9fc 6894+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
6895+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
6896+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 6897+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 6898+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
6899+ inode->i_state, inode->i_flags, inode->i_version,
6900+ inode->i_generation,
1facf9fc 6901+ l ? ", wh " : "", l, n);
6902+ return 0;
6903+}
6904+
6905+void au_dpri_inode(struct inode *inode)
6906+{
6907+ struct au_iinfo *iinfo;
6908+ aufs_bindex_t bindex;
53392da6 6909+ int err, hn;
1facf9fc 6910+
53392da6 6911+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 6912+ if (err || !au_test_aufs(inode->i_sb))
6913+ return;
6914+
6915+ iinfo = au_ii(inode);
6916+ if (!iinfo)
6917+ return;
6918+ dpri("i-1: bstart %d, bend %d, gen %d\n",
537831f9 6919+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL));
1facf9fc 6920+ if (iinfo->ii_bstart < 0)
6921+ return;
53392da6
AM
6922+ hn = 0;
6923+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
6924+ hn = !!au_hn(iinfo->ii_hinode + bindex);
6925+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 6926+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 6927+ }
1facf9fc 6928+}
6929+
2cbb1c4b
JR
6930+void au_dpri_dalias(struct inode *inode)
6931+{
6932+ struct dentry *d;
6933+
6934+ spin_lock(&inode->i_lock);
c1595e42 6935+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
6936+ au_dpri_dentry(d);
6937+ spin_unlock(&inode->i_lock);
6938+}
6939+
1facf9fc 6940+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
6941+{
6942+ struct dentry *wh = NULL;
53392da6 6943+ int hn;
076b876e 6944+ struct au_iinfo *iinfo;
1facf9fc 6945+
6946+ if (!dentry || IS_ERR(dentry)) {
6947+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
6948+ return -1;
6949+ }
6950+ /* do not call dget_parent() here */
027c5e7a 6951+ /* note: access d_xxx without d_lock */
523b37e3
AM
6952+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
6953+ bindex, dentry, dentry,
1facf9fc 6954+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 6955+ au_dcount(dentry), dentry->d_flags,
523b37e3 6956+ d_unhashed(dentry) ? "un" : "");
53392da6 6957+ hn = -1;
5527c038
JR
6958+ if (bindex >= 0
6959+ && d_is_positive(dentry)
6960+ && au_test_aufs(dentry->d_sb)) {
6961+ iinfo = au_ii(d_inode(dentry));
53392da6
AM
6962+ if (iinfo) {
6963+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 6964+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 6965+ }
1facf9fc 6966+ }
5527c038 6967+ do_pri_inode(bindex, d_inode(dentry), hn, wh);
1facf9fc 6968+ return 0;
6969+}
6970+
6971+void au_dpri_dentry(struct dentry *dentry)
6972+{
6973+ struct au_dinfo *dinfo;
6974+ aufs_bindex_t bindex;
6975+ int err;
4a4d8108 6976+ struct au_hdentry *hdp;
1facf9fc 6977+
6978+ err = do_pri_dentry(-1, dentry);
6979+ if (err || !au_test_aufs(dentry->d_sb))
6980+ return;
6981+
6982+ dinfo = au_di(dentry);
6983+ if (!dinfo)
6984+ return;
38d290e6 6985+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
1facf9fc 6986+ dinfo->di_bstart, dinfo->di_bend,
38d290e6
JR
6987+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
6988+ dinfo->di_tmpfile);
1facf9fc 6989+ if (dinfo->di_bstart < 0)
6990+ return;
4a4d8108 6991+ hdp = dinfo->di_hdentry;
1facf9fc 6992+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 6993+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 6994+}
6995+
6996+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
6997+{
6998+ char a[32];
6999+
7000+ if (!file || IS_ERR(file)) {
7001+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7002+ return -1;
7003+ }
7004+ a[0] = 0;
7005+ if (bindex < 0
b912730e 7006+ && !IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7007+ && au_test_aufs(file->f_path.dentry->d_sb)
1facf9fc 7008+ && au_fi(file))
e49829fe 7009+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 7010+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 7011+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 7012+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 7013+ file->f_version, file->f_pos, a);
b912730e 7014+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
2000de60 7015+ do_pri_dentry(bindex, file->f_path.dentry);
1facf9fc 7016+ return 0;
7017+}
7018+
7019+void au_dpri_file(struct file *file)
7020+{
7021+ struct au_finfo *finfo;
4a4d8108
AM
7022+ struct au_fidir *fidir;
7023+ struct au_hfile *hfile;
1facf9fc 7024+ aufs_bindex_t bindex;
7025+ int err;
7026+
7027+ err = do_pri_file(-1, file);
2000de60 7028+ if (err
b912730e 7029+ || IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7030+ || !au_test_aufs(file->f_path.dentry->d_sb))
1facf9fc 7031+ return;
7032+
7033+ finfo = au_fi(file);
7034+ if (!finfo)
7035+ return;
4a4d8108 7036+ if (finfo->fi_btop < 0)
1facf9fc 7037+ return;
4a4d8108
AM
7038+ fidir = finfo->fi_hdir;
7039+ if (!fidir)
7040+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7041+ else
e49829fe
JR
7042+ for (bindex = finfo->fi_btop;
7043+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
7044+ bindex++) {
7045+ hfile = fidir->fd_hfile + bindex;
7046+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7047+ }
1facf9fc 7048+}
7049+
7050+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7051+{
7052+ struct vfsmount *mnt;
7053+ struct super_block *sb;
7054+
7055+ if (!br || IS_ERR(br))
7056+ goto out;
86dc4139 7057+ mnt = au_br_mnt(br);
1facf9fc 7058+ if (!mnt || IS_ERR(mnt))
7059+ goto out;
7060+ sb = mnt->mnt_sb;
7061+ if (!sb || IS_ERR(sb))
7062+ goto out;
7063+
1e00d052 7064+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
b752ccd1 7065+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 7066+ "xino %d\n",
1e00d052
AM
7067+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
7068+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 7069+ sb->s_flags, sb->s_count,
1facf9fc 7070+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7071+ return 0;
7072+
4f0767ce 7073+out:
1facf9fc 7074+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7075+ return -1;
7076+}
7077+
7078+void au_dpri_sb(struct super_block *sb)
7079+{
7080+ struct au_sbinfo *sbinfo;
7081+ aufs_bindex_t bindex;
7082+ int err;
7083+ /* to reuduce stack size */
7084+ struct {
7085+ struct vfsmount mnt;
7086+ struct au_branch fake;
7087+ } *a;
7088+
7089+ /* this function can be called from magic sysrq */
7090+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7091+ if (unlikely(!a)) {
7092+ dpri("no memory\n");
7093+ return;
7094+ }
7095+
7096+ a->mnt.mnt_sb = sb;
7097+ a->fake.br_perm = 0;
86dc4139 7098+ a->fake.br_path.mnt = &a->mnt;
1facf9fc 7099+ a->fake.br_xino.xi_file = NULL;
7100+ atomic_set(&a->fake.br_count, 0);
7101+ smp_mb(); /* atomic_set */
7102+ err = do_pri_br(-1, &a->fake);
7103+ kfree(a);
7104+ dpri("dev 0x%x\n", sb->s_dev);
7105+ if (err || !au_test_aufs(sb))
7106+ return;
7107+
7108+ sbinfo = au_sbi(sb);
7109+ if (!sbinfo)
7110+ return;
7111+ dpri("nw %d, gen %u, kobj %d\n",
7112+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
7113+ atomic_read(&sbinfo->si_kobj.kref.refcount));
7114+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
7115+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7116+}
7117+
7118+/* ---------------------------------------------------------------------- */
7119+
027c5e7a
AM
7120+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7121+{
5527c038 7122+ struct inode *h_inode, *inode = d_inode(dentry);
027c5e7a
AM
7123+ struct dentry *h_dentry;
7124+ aufs_bindex_t bindex, bend, bi;
7125+
7126+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7127+ return;
7128+
7129+ bend = au_dbend(dentry);
7130+ bi = au_ibend(inode);
7131+ if (bi < bend)
7132+ bend = bi;
7133+ bindex = au_dbstart(dentry);
7134+ bi = au_ibstart(inode);
7135+ if (bi > bindex)
7136+ bindex = bi;
7137+
7138+ for (; bindex <= bend; bindex++) {
7139+ h_dentry = au_h_dptr(dentry, bindex);
7140+ if (!h_dentry)
7141+ continue;
7142+ h_inode = au_h_iptr(inode, bindex);
5527c038 7143+ if (unlikely(h_inode != d_inode(h_dentry))) {
392086de 7144+ au_debug_on();
027c5e7a
AM
7145+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7146+ AuDbgDentry(dentry);
7147+ AuDbgInode(inode);
392086de 7148+ au_debug_off();
027c5e7a
AM
7149+ BUG();
7150+ }
7151+ }
7152+}
7153+
1facf9fc 7154+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7155+{
7156+ int err, i, j;
7157+ struct au_dcsub_pages dpages;
7158+ struct au_dpage *dpage;
7159+ struct dentry **dentries;
7160+
7161+ err = au_dpages_init(&dpages, GFP_NOFS);
7162+ AuDebugOn(err);
027c5e7a 7163+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7164+ AuDebugOn(err);
7165+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7166+ dpage = dpages.dpages + i;
7167+ dentries = dpage->dentries;
7168+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7169+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7170+ }
7171+ au_dpages_free(&dpages);
7172+}
7173+
1facf9fc 7174+void au_dbg_verify_kthread(void)
7175+{
53392da6 7176+ if (au_wkq_test()) {
1facf9fc 7177+ au_dbg_blocked();
1e00d052
AM
7178+ /*
7179+ * It may be recursive, but udba=notify between two aufs mounts,
7180+ * where a single ro branch is shared, is not a problem.
7181+ */
7182+ /* WARN_ON(1); */
1facf9fc 7183+ }
7184+}
7185+
7186+/* ---------------------------------------------------------------------- */
7187+
1facf9fc 7188+int __init au_debug_init(void)
7189+{
7190+ aufs_bindex_t bindex;
7191+ struct au_vdir_destr destr;
7192+
7193+ bindex = -1;
7194+ AuDebugOn(bindex >= 0);
7195+
7196+ destr.len = -1;
7197+ AuDebugOn(destr.len < NAME_MAX);
7198+
7199+#ifdef CONFIG_4KSTACKS
0c3ec466 7200+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7201+#endif
7202+
1facf9fc 7203+ return 0;
7204+}
7f207e10
AM
7205diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7206--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 7207+++ linux/fs/aufs/debug.h 2015-09-24 10:47:58.251386326 +0200
5527c038 7208@@ -0,0 +1,225 @@
1facf9fc 7209+/*
2000de60 7210+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 7211+ *
7212+ * This program, aufs is free software; you can redistribute it and/or modify
7213+ * it under the terms of the GNU General Public License as published by
7214+ * the Free Software Foundation; either version 2 of the License, or
7215+ * (at your option) any later version.
dece6358
AM
7216+ *
7217+ * This program is distributed in the hope that it will be useful,
7218+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7219+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7220+ * GNU General Public License for more details.
7221+ *
7222+ * You should have received a copy of the GNU General Public License
523b37e3 7223+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7224+ */
7225+
7226+/*
7227+ * debug print functions
7228+ */
7229+
7230+#ifndef __AUFS_DEBUG_H__
7231+#define __AUFS_DEBUG_H__
7232+
7233+#ifdef __KERNEL__
7234+
392086de 7235+#include <linux/atomic.h>
4a4d8108
AM
7236+#include <linux/module.h>
7237+#include <linux/kallsyms.h>
1facf9fc 7238+#include <linux/sysrq.h>
4a4d8108 7239+
1facf9fc 7240+#ifdef CONFIG_AUFS_DEBUG
7241+#define AuDebugOn(a) BUG_ON(a)
7242+
7243+/* module parameter */
392086de
AM
7244+extern atomic_t aufs_debug;
7245+static inline void au_debug_on(void)
1facf9fc 7246+{
392086de
AM
7247+ atomic_inc(&aufs_debug);
7248+}
7249+static inline void au_debug_off(void)
7250+{
7251+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7252+}
7253+
7254+static inline int au_debug_test(void)
7255+{
392086de 7256+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7257+}
7258+#else
7259+#define AuDebugOn(a) do {} while (0)
392086de
AM
7260+AuStubVoid(au_debug_on, void)
7261+AuStubVoid(au_debug_off, void)
4a4d8108 7262+AuStubInt0(au_debug_test, void)
1facf9fc 7263+#endif /* CONFIG_AUFS_DEBUG */
7264+
392086de
AM
7265+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7266+
1facf9fc 7267+/* ---------------------------------------------------------------------- */
7268+
7269+/* debug print */
7270+
4a4d8108 7271+#define AuDbg(fmt, ...) do { \
1facf9fc 7272+ if (au_debug_test()) \
4a4d8108 7273+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7274+} while (0)
4a4d8108
AM
7275+#define AuLabel(l) AuDbg(#l "\n")
7276+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7277+#define AuWarn1(fmt, ...) do { \
1facf9fc 7278+ static unsigned char _c; \
7279+ if (!_c++) \
0c3ec466 7280+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7281+} while (0)
7282+
4a4d8108 7283+#define AuErr1(fmt, ...) do { \
1facf9fc 7284+ static unsigned char _c; \
7285+ if (!_c++) \
4a4d8108 7286+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7287+} while (0)
7288+
4a4d8108 7289+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7290+ static unsigned char _c; \
7291+ if (!_c++) \
4a4d8108 7292+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7293+} while (0)
7294+
7295+#define AuUnsupportMsg "This operation is not supported." \
7296+ " Please report this application to aufs-users ML."
4a4d8108
AM
7297+#define AuUnsupport(fmt, ...) do { \
7298+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7299+ dump_stack(); \
7300+} while (0)
7301+
7302+#define AuTraceErr(e) do { \
7303+ if (unlikely((e) < 0)) \
7304+ AuDbg("err %d\n", (int)(e)); \
7305+} while (0)
7306+
7307+#define AuTraceErrPtr(p) do { \
7308+ if (IS_ERR(p)) \
7309+ AuDbg("err %ld\n", PTR_ERR(p)); \
7310+} while (0)
7311+
7312+/* dirty macros for debug print, use with "%.*s" and caution */
7313+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7314+
7315+/* ---------------------------------------------------------------------- */
7316+
dece6358 7317+struct dentry;
1facf9fc 7318+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7319+extern struct mutex au_dbg_mtx;
1facf9fc 7320+extern char *au_plevel;
7321+struct au_nhash;
7322+void au_dpri_whlist(struct au_nhash *whlist);
7323+struct au_vdir;
7324+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7325+struct inode;
1facf9fc 7326+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7327+void au_dpri_dalias(struct inode *inode);
1facf9fc 7328+void au_dpri_dentry(struct dentry *dentry);
dece6358 7329+struct file;
1facf9fc 7330+void au_dpri_file(struct file *filp);
dece6358 7331+struct super_block;
1facf9fc 7332+void au_dpri_sb(struct super_block *sb);
7333+
027c5e7a
AM
7334+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7335+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7336+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7337+void au_dbg_verify_kthread(void);
7338+
7339+int __init au_debug_init(void);
7e9cd9fe 7340+
1facf9fc 7341+#define AuDbgWhlist(w) do { \
c1595e42 7342+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7343+ AuDbg(#w "\n"); \
7344+ au_dpri_whlist(w); \
c1595e42 7345+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7346+} while (0)
7347+
7348+#define AuDbgVdir(v) do { \
c1595e42 7349+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7350+ AuDbg(#v "\n"); \
7351+ au_dpri_vdir(v); \
c1595e42 7352+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7353+} while (0)
7354+
7355+#define AuDbgInode(i) do { \
c1595e42 7356+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7357+ AuDbg(#i "\n"); \
7358+ au_dpri_inode(i); \
c1595e42 7359+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7360+} while (0)
7361+
2cbb1c4b 7362+#define AuDbgDAlias(i) do { \
c1595e42 7363+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7364+ AuDbg(#i "\n"); \
7365+ au_dpri_dalias(i); \
c1595e42 7366+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7367+} while (0)
7368+
1facf9fc 7369+#define AuDbgDentry(d) do { \
c1595e42 7370+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7371+ AuDbg(#d "\n"); \
7372+ au_dpri_dentry(d); \
c1595e42 7373+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7374+} while (0)
7375+
7376+#define AuDbgFile(f) do { \
c1595e42 7377+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7378+ AuDbg(#f "\n"); \
7379+ au_dpri_file(f); \
c1595e42 7380+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7381+} while (0)
7382+
7383+#define AuDbgSb(sb) do { \
c1595e42 7384+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7385+ AuDbg(#sb "\n"); \
7386+ au_dpri_sb(sb); \
c1595e42 7387+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7388+} while (0)
7389+
4a4d8108
AM
7390+#define AuDbgSym(addr) do { \
7391+ char sym[KSYM_SYMBOL_LEN]; \
7392+ sprint_symbol(sym, (unsigned long)addr); \
7393+ AuDbg("%s\n", sym); \
7394+} while (0)
1facf9fc 7395+#else
027c5e7a 7396+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7397+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7398+AuStubVoid(au_dbg_verify_kthread, void)
7399+AuStubInt0(__init au_debug_init, void)
1facf9fc 7400+
1facf9fc 7401+#define AuDbgWhlist(w) do {} while (0)
7402+#define AuDbgVdir(v) do {} while (0)
7403+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7404+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7405+#define AuDbgDentry(d) do {} while (0)
7406+#define AuDbgFile(f) do {} while (0)
7407+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7408+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7409+#endif /* CONFIG_AUFS_DEBUG */
7410+
7411+/* ---------------------------------------------------------------------- */
7412+
7413+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7414+int __init au_sysrq_init(void);
7415+void au_sysrq_fin(void);
7416+
7417+#ifdef CONFIG_HW_CONSOLE
7418+#define au_dbg_blocked() do { \
7419+ WARN_ON(1); \
0c5527e5 7420+ handle_sysrq('w'); \
1facf9fc 7421+} while (0)
7422+#else
4a4d8108 7423+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7424+#endif
7425+
7426+#else
4a4d8108
AM
7427+AuStubInt0(__init au_sysrq_init, void)
7428+AuStubVoid(au_sysrq_fin, void)
7429+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7430+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7431+
7432+#endif /* __KERNEL__ */
7433+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
7434diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7435--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
7436+++ linux/fs/aufs/dentry.c 2015-11-11 17:21:46.918863802 +0100
7437@@ -0,0 +1,1136 @@
1facf9fc 7438+/*
2000de60 7439+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 7440+ *
7441+ * This program, aufs is free software; you can redistribute it and/or modify
7442+ * it under the terms of the GNU General Public License as published by
7443+ * the Free Software Foundation; either version 2 of the License, or
7444+ * (at your option) any later version.
dece6358
AM
7445+ *
7446+ * This program is distributed in the hope that it will be useful,
7447+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7448+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7449+ * GNU General Public License for more details.
7450+ *
7451+ * You should have received a copy of the GNU General Public License
523b37e3 7452+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7453+ */
7454+
7455+/*
7456+ * lookup and dentry operations
7457+ */
7458+
dece6358 7459+#include <linux/namei.h>
1facf9fc 7460+#include "aufs.h"
7461+
1facf9fc 7462+#define AuLkup_ALLOW_NEG 1
076b876e 7463+#define AuLkup_IGNORE_PERM (1 << 1)
1facf9fc 7464+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
7465+#define au_fset_lkup(flags, name) \
7466+ do { (flags) |= AuLkup_##name; } while (0)
7467+#define au_fclr_lkup(flags, name) \
7468+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 7469+
7470+struct au_do_lookup_args {
7471+ unsigned int flags;
7472+ mode_t type;
1facf9fc 7473+};
7474+
7475+/*
7476+ * returns positive/negative dentry, NULL or an error.
7477+ * NULL means whiteout-ed or not-found.
7478+ */
7479+static struct dentry*
7480+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7481+ aufs_bindex_t bindex, struct qstr *wh_name,
7482+ struct au_do_lookup_args *args)
7483+{
7484+ struct dentry *h_dentry;
2000de60 7485+ struct inode *h_inode;
1facf9fc 7486+ struct au_branch *br;
7487+ int wh_found, opq;
7488+ unsigned char wh_able;
7489+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7490+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7491+ IGNORE_PERM);
1facf9fc 7492+
1facf9fc 7493+ wh_found = 0;
7494+ br = au_sbr(dentry->d_sb, bindex);
7495+ wh_able = !!au_br_whable(br->br_perm);
7496+ if (wh_able)
076b876e 7497+ wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0);
1facf9fc 7498+ h_dentry = ERR_PTR(wh_found);
7499+ if (!wh_found)
7500+ goto real_lookup;
7501+ if (unlikely(wh_found < 0))
7502+ goto out;
7503+
7504+ /* We found a whiteout */
7505+ /* au_set_dbend(dentry, bindex); */
7506+ au_set_dbwh(dentry, bindex);
7507+ if (!allow_neg)
7508+ return NULL; /* success */
7509+
4f0767ce 7510+real_lookup:
076b876e
AM
7511+ if (!ignore_perm)
7512+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7513+ else
7514+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
2000de60
JR
7515+ if (IS_ERR(h_dentry)) {
7516+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7517+ && !allow_neg)
7518+ h_dentry = NULL;
1facf9fc 7519+ goto out;
2000de60 7520+ }
1facf9fc 7521+
5527c038
JR
7522+ h_inode = d_inode(h_dentry);
7523+ if (d_is_negative(h_dentry)) {
1facf9fc 7524+ if (!allow_neg)
7525+ goto out_neg;
7526+ } else if (wh_found
7527+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7528+ goto out_neg;
7529+
7530+ if (au_dbend(dentry) <= bindex)
7531+ au_set_dbend(dentry, bindex);
7532+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7533+ au_set_dbstart(dentry, bindex);
7534+ au_set_h_dptr(dentry, bindex, h_dentry);
7535+
2000de60
JR
7536+ if (!d_is_dir(h_dentry)
7537+ || !wh_able
5527c038 7538+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7539+ goto out; /* success */
7540+
7541+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
076b876e 7542+ opq = au_diropq_test(h_dentry);
1facf9fc 7543+ mutex_unlock(&h_inode->i_mutex);
7544+ if (opq > 0)
7545+ au_set_dbdiropq(dentry, bindex);
7546+ else if (unlikely(opq < 0)) {
7547+ au_set_h_dptr(dentry, bindex, NULL);
7548+ h_dentry = ERR_PTR(opq);
7549+ }
7550+ goto out;
7551+
4f0767ce 7552+out_neg:
1facf9fc 7553+ dput(h_dentry);
7554+ h_dentry = NULL;
4f0767ce 7555+out:
1facf9fc 7556+ return h_dentry;
7557+}
7558+
dece6358
AM
7559+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7560+{
7561+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7562+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7563+ return -EPERM;
7564+ return 0;
7565+}
7566+
1facf9fc 7567+/*
7568+ * returns the number of lower positive dentries,
7569+ * otherwise an error.
7570+ * can be called at unlinking with @type is zero.
7571+ */
537831f9 7572+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
1facf9fc 7573+{
7574+ int npositive, err;
7575+ aufs_bindex_t bindex, btail, bdiropq;
076b876e 7576+ unsigned char isdir, dirperm1;
1facf9fc 7577+ struct qstr whname;
7578+ struct au_do_lookup_args args = {
b4510431 7579+ .flags = 0,
537831f9 7580+ .type = type
1facf9fc 7581+ };
7582+ const struct qstr *name = &dentry->d_name;
7583+ struct dentry *parent;
076b876e 7584+ struct super_block *sb;
1facf9fc 7585+
076b876e
AM
7586+ sb = dentry->d_sb;
7587+ err = au_test_shwh(sb, name);
dece6358 7588+ if (unlikely(err))
1facf9fc 7589+ goto out;
7590+
7591+ err = au_wh_name_alloc(&whname, name);
7592+ if (unlikely(err))
7593+ goto out;
7594+
2000de60 7595+ isdir = !!d_is_dir(dentry);
1facf9fc 7596+ if (!type)
7597+ au_fset_lkup(args.flags, ALLOW_NEG);
076b876e 7598+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
1facf9fc 7599+
7600+ npositive = 0;
4a4d8108 7601+ parent = dget_parent(dentry);
1facf9fc 7602+ btail = au_dbtaildir(parent);
7603+ for (bindex = bstart; bindex <= btail; bindex++) {
7604+ struct dentry *h_parent, *h_dentry;
7605+ struct inode *h_inode, *h_dir;
7606+
7607+ h_dentry = au_h_dptr(dentry, bindex);
7608+ if (h_dentry) {
5527c038 7609+ if (d_is_positive(h_dentry))
1facf9fc 7610+ npositive++;
7611+ if (type != S_IFDIR)
7612+ break;
7613+ continue;
7614+ }
7615+ h_parent = au_h_dptr(parent, bindex);
2000de60 7616+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7617+ continue;
7618+
5527c038 7619+ h_dir = d_inode(h_parent);
1facf9fc 7620+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7621+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7622+ &args);
7623+ mutex_unlock(&h_dir->i_mutex);
7624+ err = PTR_ERR(h_dentry);
7625+ if (IS_ERR(h_dentry))
4a4d8108 7626+ goto out_parent;
2000de60
JR
7627+ if (h_dentry)
7628+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7629+ if (dirperm1)
7630+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7631+
ab036dbd 7632+ if (au_dbwh(dentry) == bindex)
1facf9fc 7633+ break;
7634+ if (!h_dentry)
7635+ continue;
5527c038 7636+ if (d_is_negative(h_dentry))
1facf9fc 7637+ continue;
5527c038 7638+ h_inode = d_inode(h_dentry);
1facf9fc 7639+ npositive++;
7640+ if (!args.type)
7641+ args.type = h_inode->i_mode & S_IFMT;
7642+ if (args.type != S_IFDIR)
7643+ break;
7644+ else if (isdir) {
7645+ /* the type of lower may be different */
7646+ bdiropq = au_dbdiropq(dentry);
7647+ if (bdiropq >= 0 && bdiropq <= bindex)
7648+ break;
7649+ }
7650+ }
7651+
7652+ if (npositive) {
7653+ AuLabel(positive);
7654+ au_update_dbstart(dentry);
7655+ }
7656+ err = npositive;
076b876e 7657+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
027c5e7a 7658+ && au_dbstart(dentry) < 0)) {
1facf9fc 7659+ err = -EIO;
523b37e3
AM
7660+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7661+ dentry, err);
027c5e7a 7662+ }
1facf9fc 7663+
4f0767ce 7664+out_parent:
4a4d8108 7665+ dput(parent);
1facf9fc 7666+ kfree(whname.name);
4f0767ce 7667+out:
1facf9fc 7668+ return err;
7669+}
7670+
076b876e 7671+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7672+{
7673+ struct dentry *dentry;
7674+ int wkq_err;
7675+
5527c038 7676+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
b4510431 7677+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7678+ else {
b4510431
AM
7679+ struct vfsub_lkup_one_args args = {
7680+ .errp = &dentry,
7681+ .name = name,
7682+ .parent = parent
1facf9fc 7683+ };
7684+
b4510431 7685+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7686+ if (unlikely(wkq_err))
7687+ dentry = ERR_PTR(wkq_err);
7688+ }
7689+
7690+ return dentry;
7691+}
7692+
7693+/*
7694+ * lookup @dentry on @bindex which should be negative.
7695+ */
86dc4139 7696+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7697+{
7698+ int err;
7699+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7700+ struct au_branch *br;
1facf9fc 7701+
1facf9fc 7702+ parent = dget_parent(dentry);
7703+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7704+ br = au_sbr(dentry->d_sb, bindex);
7705+ if (wh)
7706+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7707+ else
076b876e 7708+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7709+ err = PTR_ERR(h_dentry);
7710+ if (IS_ERR(h_dentry))
7711+ goto out;
5527c038 7712+ if (unlikely(d_is_positive(h_dentry))) {
1facf9fc 7713+ err = -EIO;
523b37e3 7714+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7715+ dput(h_dentry);
7716+ goto out;
7717+ }
7718+
4a4d8108 7719+ err = 0;
1facf9fc 7720+ if (bindex < au_dbstart(dentry))
7721+ au_set_dbstart(dentry, bindex);
7722+ if (au_dbend(dentry) < bindex)
7723+ au_set_dbend(dentry, bindex);
7724+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7725+
4f0767ce 7726+out:
1facf9fc 7727+ dput(parent);
7728+ return err;
7729+}
7730+
7731+/* ---------------------------------------------------------------------- */
7732+
7733+/* subset of struct inode */
7734+struct au_iattr {
7735+ unsigned long i_ino;
7736+ /* unsigned int i_nlink; */
0c3ec466
AM
7737+ kuid_t i_uid;
7738+ kgid_t i_gid;
1facf9fc 7739+ u64 i_version;
7740+/*
7741+ loff_t i_size;
7742+ blkcnt_t i_blocks;
7743+*/
7744+ umode_t i_mode;
7745+};
7746+
7747+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7748+{
7749+ ia->i_ino = h_inode->i_ino;
7750+ /* ia->i_nlink = h_inode->i_nlink; */
7751+ ia->i_uid = h_inode->i_uid;
7752+ ia->i_gid = h_inode->i_gid;
7753+ ia->i_version = h_inode->i_version;
7754+/*
7755+ ia->i_size = h_inode->i_size;
7756+ ia->i_blocks = h_inode->i_blocks;
7757+*/
7758+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7759+}
7760+
7761+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7762+{
7763+ return ia->i_ino != h_inode->i_ino
7764+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7765+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7766+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7767+ || ia->i_version != h_inode->i_version
7768+/*
7769+ || ia->i_size != h_inode->i_size
7770+ || ia->i_blocks != h_inode->i_blocks
7771+*/
7772+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
7773+}
7774+
7775+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
7776+ struct au_branch *br)
7777+{
7778+ int err;
7779+ struct au_iattr ia;
7780+ struct inode *h_inode;
7781+ struct dentry *h_d;
7782+ struct super_block *h_sb;
7783+
7784+ err = 0;
7785+ memset(&ia, -1, sizeof(ia));
7786+ h_sb = h_dentry->d_sb;
5527c038
JR
7787+ h_inode = NULL;
7788+ if (d_is_positive(h_dentry)) {
7789+ h_inode = d_inode(h_dentry);
1facf9fc 7790+ au_iattr_save(&ia, h_inode);
5527c038 7791+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
1facf9fc 7792+ /* nfs d_revalidate may return 0 for negative dentry */
7793+ /* fuse d_revalidate always return 0 for negative dentry */
7794+ goto out;
7795+
7796+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 7797+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 7798+ err = PTR_ERR(h_d);
7799+ if (IS_ERR(h_d))
7800+ goto out;
7801+
7802+ err = 0;
7803+ if (unlikely(h_d != h_dentry
5527c038 7804+ || d_inode(h_d) != h_inode
1facf9fc 7805+ || (h_inode && au_iattr_test(&ia, h_inode))))
7806+ err = au_busy_or_stale();
7807+ dput(h_d);
7808+
4f0767ce 7809+out:
1facf9fc 7810+ AuTraceErr(err);
7811+ return err;
7812+}
7813+
7814+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7815+ struct dentry *h_parent, struct au_branch *br)
7816+{
7817+ int err;
7818+
7819+ err = 0;
027c5e7a
AM
7820+ if (udba == AuOpt_UDBA_REVAL
7821+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 7822+ IMustLock(h_dir);
5527c038 7823+ err = (d_inode(h_dentry->d_parent) != h_dir);
027c5e7a 7824+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 7825+ err = au_h_verify_dentry(h_dentry, h_parent, br);
7826+
7827+ return err;
7828+}
7829+
7830+/* ---------------------------------------------------------------------- */
7831+
027c5e7a 7832+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 7833+{
027c5e7a 7834+ int err;
1facf9fc 7835+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
7836+ struct au_hdentry tmp, *p, *q;
7837+ struct au_dinfo *dinfo;
7838+ struct super_block *sb;
1facf9fc 7839+
027c5e7a 7840+ DiMustWriteLock(dentry);
1308ab2a 7841+
027c5e7a
AM
7842+ sb = dentry->d_sb;
7843+ dinfo = au_di(dentry);
1facf9fc 7844+ bend = dinfo->di_bend;
7845+ bwh = dinfo->di_bwh;
7846+ bdiropq = dinfo->di_bdiropq;
027c5e7a 7847+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 7848+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 7849+ if (!p->hd_dentry)
1facf9fc 7850+ continue;
7851+
027c5e7a
AM
7852+ new_bindex = au_br_index(sb, p->hd_id);
7853+ if (new_bindex == bindex)
1facf9fc 7854+ continue;
1facf9fc 7855+
1facf9fc 7856+ if (dinfo->di_bwh == bindex)
7857+ bwh = new_bindex;
7858+ if (dinfo->di_bdiropq == bindex)
7859+ bdiropq = new_bindex;
7860+ if (new_bindex < 0) {
7861+ au_hdput(p);
7862+ p->hd_dentry = NULL;
7863+ continue;
7864+ }
7865+
7866+ /* swap two lower dentries, and loop again */
7867+ q = dinfo->di_hdentry + new_bindex;
7868+ tmp = *q;
7869+ *q = *p;
7870+ *p = tmp;
7871+ if (tmp.hd_dentry) {
7872+ bindex--;
7873+ p--;
7874+ }
7875+ }
7876+
1facf9fc 7877+ dinfo->di_bwh = -1;
7878+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
7879+ dinfo->di_bwh = bwh;
7880+
7881+ dinfo->di_bdiropq = -1;
7882+ if (bdiropq >= 0
7883+ && bdiropq <= au_sbend(sb)
7884+ && au_sbr_whable(sb, bdiropq))
7885+ dinfo->di_bdiropq = bdiropq;
7886+
027c5e7a
AM
7887+ err = -EIO;
7888+ dinfo->di_bstart = -1;
7889+ dinfo->di_bend = -1;
1facf9fc 7890+ bend = au_dbend(parent);
7891+ p = dinfo->di_hdentry;
7892+ for (bindex = 0; bindex <= bend; bindex++, p++)
7893+ if (p->hd_dentry) {
7894+ dinfo->di_bstart = bindex;
7895+ break;
7896+ }
7897+
027c5e7a
AM
7898+ if (dinfo->di_bstart >= 0) {
7899+ p = dinfo->di_hdentry + bend;
7900+ for (bindex = bend; bindex >= 0; bindex--, p--)
7901+ if (p->hd_dentry) {
7902+ dinfo->di_bend = bindex;
7903+ err = 0;
7904+ break;
7905+ }
7906+ }
7907+
7908+ return err;
1facf9fc 7909+}
7910+
027c5e7a 7911+static void au_do_hide(struct dentry *dentry)
1facf9fc 7912+{
027c5e7a 7913+ struct inode *inode;
1facf9fc 7914+
5527c038
JR
7915+ if (d_really_is_positive(dentry)) {
7916+ inode = d_inode(dentry);
7917+ if (!d_is_dir(dentry)) {
027c5e7a
AM
7918+ if (inode->i_nlink && !d_unhashed(dentry))
7919+ drop_nlink(inode);
7920+ } else {
7921+ clear_nlink(inode);
7922+ /* stop next lookup */
7923+ inode->i_flags |= S_DEAD;
7924+ }
7925+ smp_mb(); /* necessary? */
7926+ }
7927+ d_drop(dentry);
7928+}
1308ab2a 7929+
027c5e7a
AM
7930+static int au_hide_children(struct dentry *parent)
7931+{
7932+ int err, i, j, ndentry;
7933+ struct au_dcsub_pages dpages;
7934+ struct au_dpage *dpage;
7935+ struct dentry *dentry;
1facf9fc 7936+
027c5e7a 7937+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 7938+ if (unlikely(err))
7939+ goto out;
027c5e7a
AM
7940+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
7941+ if (unlikely(err))
7942+ goto out_dpages;
1facf9fc 7943+
027c5e7a
AM
7944+ /* in reverse order */
7945+ for (i = dpages.ndpage - 1; i >= 0; i--) {
7946+ dpage = dpages.dpages + i;
7947+ ndentry = dpage->ndentry;
7948+ for (j = ndentry - 1; j >= 0; j--) {
7949+ dentry = dpage->dentries[j];
7950+ if (dentry != parent)
7951+ au_do_hide(dentry);
7952+ }
7953+ }
1facf9fc 7954+
027c5e7a
AM
7955+out_dpages:
7956+ au_dpages_free(&dpages);
4f0767ce 7957+out:
027c5e7a 7958+ return err;
1facf9fc 7959+}
7960+
027c5e7a 7961+static void au_hide(struct dentry *dentry)
1facf9fc 7962+{
027c5e7a 7963+ int err;
1facf9fc 7964+
027c5e7a 7965+ AuDbgDentry(dentry);
2000de60 7966+ if (d_is_dir(dentry)) {
027c5e7a
AM
7967+ /* shrink_dcache_parent(dentry); */
7968+ err = au_hide_children(dentry);
7969+ if (unlikely(err))
523b37e3
AM
7970+ AuIOErr("%pd, failed hiding children, ignored %d\n",
7971+ dentry, err);
027c5e7a
AM
7972+ }
7973+ au_do_hide(dentry);
7974+}
1facf9fc 7975+
027c5e7a
AM
7976+/*
7977+ * By adding a dirty branch, a cached dentry may be affected in various ways.
7978+ *
7979+ * a dirty branch is added
7980+ * - on the top of layers
7981+ * - in the middle of layers
7982+ * - to the bottom of layers
7983+ *
7984+ * on the added branch there exists
7985+ * - a whiteout
7986+ * - a diropq
7987+ * - a same named entry
7988+ * + exist
7989+ * * negative --> positive
7990+ * * positive --> positive
7991+ * - type is unchanged
7992+ * - type is changed
7993+ * + doesn't exist
7994+ * * negative --> negative
7995+ * * positive --> negative (rejected by au_br_del() for non-dir case)
7996+ * - none
7997+ */
7998+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
7999+ struct au_dinfo *tmp)
8000+{
8001+ int err;
8002+ aufs_bindex_t bindex, bend;
8003+ struct {
8004+ struct dentry *dentry;
8005+ struct inode *inode;
8006+ mode_t mode;
8007+ } orig_h, tmp_h;
8008+ struct au_hdentry *hd;
8009+ struct inode *inode, *h_inode;
8010+ struct dentry *h_dentry;
8011+
8012+ err = 0;
8013+ AuDebugOn(dinfo->di_bstart < 0);
027c5e7a 8014+ orig_h.mode = 0;
5527c038
JR
8015+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
8016+ orig_h.inode = NULL;
8017+ if (d_is_positive(orig_h.dentry)) {
8018+ orig_h.inode = d_inode(orig_h.dentry);
027c5e7a 8019+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
5527c038 8020+ }
027c5e7a
AM
8021+ memset(&tmp_h, 0, sizeof(tmp_h));
8022+ if (tmp->di_bstart >= 0) {
8023+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
5527c038
JR
8024+ tmp_h.inode = NULL;
8025+ if (d_is_positive(tmp_h.dentry)) {
8026+ tmp_h.inode = d_inode(tmp_h.dentry);
027c5e7a 8027+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
5527c038 8028+ }
027c5e7a
AM
8029+ }
8030+
5527c038
JR
8031+ inode = NULL;
8032+ if (d_really_is_positive(dentry))
8033+ inode = d_inode(dentry);
027c5e7a
AM
8034+ if (!orig_h.inode) {
8035+ AuDbg("nagative originally\n");
8036+ if (inode) {
8037+ au_hide(dentry);
8038+ goto out;
8039+ }
8040+ AuDebugOn(inode);
8041+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8042+ AuDebugOn(dinfo->di_bdiropq != -1);
8043+
8044+ if (!tmp_h.inode) {
8045+ AuDbg("negative --> negative\n");
8046+ /* should have only one negative lower */
8047+ if (tmp->di_bstart >= 0
8048+ && tmp->di_bstart < dinfo->di_bstart) {
8049+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
8050+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8051+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
8052+ au_di_cp(dinfo, tmp);
8053+ hd = tmp->di_hdentry + tmp->di_bstart;
8054+ au_set_h_dptr(dentry, tmp->di_bstart,
8055+ dget(hd->hd_dentry));
8056+ }
8057+ au_dbg_verify_dinode(dentry);
8058+ } else {
8059+ AuDbg("negative --> positive\n");
8060+ /*
8061+ * similar to the behaviour of creating with bypassing
8062+ * aufs.
8063+ * unhash it in order to force an error in the
8064+ * succeeding create operation.
8065+ * we should not set S_DEAD here.
8066+ */
8067+ d_drop(dentry);
8068+ /* au_di_swap(tmp, dinfo); */
8069+ au_dbg_verify_dinode(dentry);
8070+ }
8071+ } else {
8072+ AuDbg("positive originally\n");
8073+ /* inode may be NULL */
8074+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8075+ if (!tmp_h.inode) {
8076+ AuDbg("positive --> negative\n");
8077+ /* or bypassing aufs */
8078+ au_hide(dentry);
8079+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
8080+ dinfo->di_bwh = tmp->di_bwh;
8081+ if (inode)
8082+ err = au_refresh_hinode_self(inode);
8083+ au_dbg_verify_dinode(dentry);
8084+ } else if (orig_h.mode == tmp_h.mode) {
8085+ AuDbg("positive --> positive, same type\n");
8086+ if (!S_ISDIR(orig_h.mode)
8087+ && dinfo->di_bstart > tmp->di_bstart) {
8088+ /*
8089+ * similar to the behaviour of removing and
8090+ * creating.
8091+ */
8092+ au_hide(dentry);
8093+ if (inode)
8094+ err = au_refresh_hinode_self(inode);
8095+ au_dbg_verify_dinode(dentry);
8096+ } else {
8097+ /* fill empty slots */
8098+ if (dinfo->di_bstart > tmp->di_bstart)
8099+ dinfo->di_bstart = tmp->di_bstart;
8100+ if (dinfo->di_bend < tmp->di_bend)
8101+ dinfo->di_bend = tmp->di_bend;
8102+ dinfo->di_bwh = tmp->di_bwh;
8103+ dinfo->di_bdiropq = tmp->di_bdiropq;
8104+ hd = tmp->di_hdentry;
8105+ bend = dinfo->di_bend;
8106+ for (bindex = tmp->di_bstart; bindex <= bend;
8107+ bindex++) {
8108+ if (au_h_dptr(dentry, bindex))
8109+ continue;
8110+ h_dentry = hd[bindex].hd_dentry;
8111+ if (!h_dentry)
8112+ continue;
5527c038
JR
8113+ AuDebugOn(d_is_negative(h_dentry));
8114+ h_inode = d_inode(h_dentry);
027c5e7a
AM
8115+ AuDebugOn(orig_h.mode
8116+ != (h_inode->i_mode
8117+ & S_IFMT));
8118+ au_set_h_dptr(dentry, bindex,
8119+ dget(h_dentry));
8120+ }
8121+ err = au_refresh_hinode(inode, dentry);
8122+ au_dbg_verify_dinode(dentry);
8123+ }
8124+ } else {
8125+ AuDbg("positive --> positive, different type\n");
8126+ /* similar to the behaviour of removing and creating */
8127+ au_hide(dentry);
8128+ if (inode)
8129+ err = au_refresh_hinode_self(inode);
8130+ au_dbg_verify_dinode(dentry);
8131+ }
8132+ }
8133+
8134+out:
8135+ return err;
8136+}
8137+
ab036dbd
AM
8138+void au_refresh_dop(struct dentry *dentry, int force_reval)
8139+{
8140+ const struct dentry_operations *dop
8141+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8142+ static const unsigned int mask
8143+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8144+
8145+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8146+
8147+ if (dentry->d_op == dop)
8148+ return;
8149+
8150+ AuDbg("%pd\n", dentry);
8151+ spin_lock(&dentry->d_lock);
8152+ if (dop == &aufs_dop)
8153+ dentry->d_flags |= mask;
8154+ else
8155+ dentry->d_flags &= ~mask;
8156+ dentry->d_op = dop;
8157+ spin_unlock(&dentry->d_lock);
8158+}
8159+
027c5e7a
AM
8160+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8161+{
8162+ int err, ebrange;
8163+ unsigned int sigen;
8164+ struct au_dinfo *dinfo, *tmp;
8165+ struct super_block *sb;
8166+ struct inode *inode;
8167+
8168+ DiMustWriteLock(dentry);
8169+ AuDebugOn(IS_ROOT(dentry));
5527c038 8170+ AuDebugOn(d_really_is_negative(parent));
027c5e7a
AM
8171+
8172+ sb = dentry->d_sb;
027c5e7a
AM
8173+ sigen = au_sigen(sb);
8174+ err = au_digen_test(parent, sigen);
8175+ if (unlikely(err))
8176+ goto out;
8177+
8178+ dinfo = au_di(dentry);
8179+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
8180+ if (unlikely(err))
8181+ goto out;
8182+ ebrange = au_dbrange_test(dentry);
8183+ if (!ebrange)
8184+ ebrange = au_do_refresh_hdentry(dentry, parent);
8185+
38d290e6 8186+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
027c5e7a 8187+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
5527c038
JR
8188+ if (d_really_is_positive(dentry)) {
8189+ inode = d_inode(dentry);
027c5e7a 8190+ err = au_refresh_hinode_self(inode);
5527c038 8191+ }
027c5e7a
AM
8192+ au_dbg_verify_dinode(dentry);
8193+ if (!err)
8194+ goto out_dgen; /* success */
8195+ goto out;
8196+ }
8197+
8198+ /* temporary dinfo */
8199+ AuDbgDentry(dentry);
8200+ err = -ENOMEM;
8201+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8202+ if (unlikely(!tmp))
8203+ goto out;
8204+ au_di_swap(tmp, dinfo);
8205+ /* returns the number of positive dentries */
8206+ /*
8207+ * if current working dir is removed, it returns an error.
8208+ * but the dentry is legal.
8209+ */
537831f9 8210+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
027c5e7a
AM
8211+ AuDbgDentry(dentry);
8212+ au_di_swap(tmp, dinfo);
8213+ if (err == -ENOENT)
8214+ err = 0;
8215+ if (err >= 0) {
8216+ /* compare/refresh by dinfo */
8217+ AuDbgDentry(dentry);
8218+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8219+ au_dbg_verify_dinode(dentry);
8220+ AuTraceErr(err);
8221+ }
8222+ au_rw_write_unlock(&tmp->di_rwsem);
8223+ au_di_free(tmp);
8224+ if (unlikely(err))
8225+ goto out;
8226+
8227+out_dgen:
8228+ au_update_digen(dentry);
8229+out:
8230+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8231+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8232+ AuDbgDentry(dentry);
8233+ }
8234+ AuTraceErr(err);
8235+ return err;
8236+}
8237+
b4510431
AM
8238+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8239+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8240+{
8241+ int err, valid;
027c5e7a
AM
8242+
8243+ err = 0;
8244+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8245+ goto out;
027c5e7a
AM
8246+
8247+ AuDbg("b%d\n", bindex);
b4510431
AM
8248+ /*
8249+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8250+ * due to whiteout and branch permission.
8251+ */
8252+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8253+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8254+ /* it may return tri-state */
8255+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8256+
8257+ if (unlikely(valid < 0))
8258+ err = valid;
8259+ else if (!valid)
8260+ err = -EINVAL;
8261+
4f0767ce 8262+out:
1facf9fc 8263+ AuTraceErr(err);
8264+ return err;
8265+}
8266+
8267+/* todo: remove this */
8268+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
b4510431 8269+ unsigned int flags, int do_udba)
1facf9fc 8270+{
8271+ int err;
8272+ umode_t mode, h_mode;
8273+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
38d290e6 8274+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8275+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8276+ struct dentry *h_dentry;
8277+ struct qstr *name, *h_name;
8278+
8279+ err = 0;
8280+ plus = 0;
8281+ mode = 0;
1facf9fc 8282+ ibs = -1;
8283+ ibe = -1;
8284+ unhashed = !!d_unhashed(dentry);
8285+ is_root = !!IS_ROOT(dentry);
8286+ name = &dentry->d_name;
38d290e6 8287+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8288+
8289+ /*
7f207e10
AM
8290+ * Theoretically, REVAL test should be unnecessary in case of
8291+ * {FS,I}NOTIFY.
8292+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8293+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8294+ * Let's do REVAL test too.
8295+ */
8296+ if (do_udba && inode) {
8297+ mode = (inode->i_mode & S_IFMT);
8298+ plus = (inode->i_nlink > 0);
1facf9fc 8299+ ibs = au_ibstart(inode);
8300+ ibe = au_ibend(inode);
8301+ }
8302+
8303+ bstart = au_dbstart(dentry);
8304+ btail = bstart;
8305+ if (inode && S_ISDIR(inode->i_mode))
8306+ btail = au_dbtaildir(dentry);
8307+ for (bindex = bstart; bindex <= btail; bindex++) {
8308+ h_dentry = au_h_dptr(dentry, bindex);
8309+ if (!h_dentry)
8310+ continue;
8311+
523b37e3
AM
8312+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8313+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8314+ spin_lock(&h_dentry->d_lock);
1facf9fc 8315+ h_name = &h_dentry->d_name;
8316+ if (unlikely(do_udba
8317+ && !is_root
523b37e3
AM
8318+ && ((!h_nfs
8319+ && (unhashed != !!d_unhashed(h_dentry)
38d290e6
JR
8320+ || (!tmpfile
8321+ && !au_qstreq(name, h_name))
8322+ ))
523b37e3
AM
8323+ || (h_nfs
8324+ && !(flags & LOOKUP_OPEN)
8325+ && (h_dentry->d_flags
8326+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8327+ )) {
38d290e6
JR
8328+ int h_unhashed;
8329+
8330+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8331+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8332+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8333+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8334+ goto err;
8335+ }
027c5e7a 8336+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8337+
b4510431 8338+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8339+ if (unlikely(err))
8340+ /* do not goto err, to keep the errno */
8341+ break;
8342+
8343+ /* todo: plink too? */
8344+ if (!do_udba)
8345+ continue;
8346+
8347+ /* UDBA tests */
5527c038 8348+ if (unlikely(!!inode != d_is_positive(h_dentry)))
1facf9fc 8349+ goto err;
8350+
5527c038
JR
8351+ h_inode = NULL;
8352+ if (d_is_positive(h_dentry))
8353+ h_inode = d_inode(h_dentry);
1facf9fc 8354+ h_plus = plus;
8355+ h_mode = mode;
8356+ h_cached_inode = h_inode;
8357+ if (h_inode) {
8358+ h_mode = (h_inode->i_mode & S_IFMT);
8359+ h_plus = (h_inode->i_nlink > 0);
8360+ }
8361+ if (inode && ibs <= bindex && bindex <= ibe)
8362+ h_cached_inode = au_h_iptr(inode, bindex);
8363+
523b37e3 8364+ if (!h_nfs) {
38d290e6 8365+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8366+ goto err;
8367+ } else {
8368+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8369+ && !is_root
8370+ && !IS_ROOT(h_dentry)
8371+ && unhashed != d_unhashed(h_dentry)))
8372+ goto err;
8373+ }
8374+ if (unlikely(mode != h_mode
1facf9fc 8375+ || h_cached_inode != h_inode))
8376+ goto err;
8377+ continue;
8378+
f6b6e03d 8379+err:
1facf9fc 8380+ err = -EINVAL;
8381+ break;
8382+ }
8383+
523b37e3 8384+ AuTraceErr(err);
1facf9fc 8385+ return err;
8386+}
8387+
027c5e7a 8388+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8389+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8390+{
8391+ int err;
8392+ struct dentry *parent;
1facf9fc 8393+
027c5e7a 8394+ if (!au_digen_test(dentry, sigen))
1facf9fc 8395+ return 0;
8396+
8397+ parent = dget_parent(dentry);
8398+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8399+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8400+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8401+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8402+ di_read_unlock(parent, AuLock_IR);
8403+ dput(parent);
027c5e7a 8404+ AuTraceErr(err);
1facf9fc 8405+ return err;
8406+}
8407+
8408+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8409+{
8410+ int err;
8411+ struct dentry *d, *parent;
1facf9fc 8412+
027c5e7a 8413+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8414+ return simple_reval_dpath(dentry, sigen);
8415+
8416+ /* slow loop, keep it simple and stupid */
8417+ /* cf: au_cpup_dirs() */
8418+ err = 0;
8419+ parent = NULL;
027c5e7a 8420+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8421+ d = dentry;
8422+ while (1) {
8423+ dput(parent);
8424+ parent = dget_parent(d);
027c5e7a 8425+ if (!au_digen_test(parent, sigen))
1facf9fc 8426+ break;
8427+ d = parent;
8428+ }
8429+
1facf9fc 8430+ if (d != dentry)
027c5e7a 8431+ di_write_lock_child2(d);
1facf9fc 8432+
8433+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8434+ if (au_digen_test(d, sigen)) {
8435+ /*
8436+ * todo: consolidate with simple_reval_dpath(),
8437+ * do_refresh() and au_reval_for_attr().
8438+ */
1facf9fc 8439+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8440+ err = au_refresh_dentry(d, parent);
1facf9fc 8441+ di_read_unlock(parent, AuLock_IR);
8442+ }
8443+
8444+ if (d != dentry)
8445+ di_write_unlock(d);
8446+ dput(parent);
8447+ if (unlikely(err))
8448+ break;
8449+ }
8450+
8451+ return err;
8452+}
8453+
8454+/*
8455+ * if valid returns 1, otherwise 0.
8456+ */
b4510431 8457+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8458+{
8459+ int valid, err;
8460+ unsigned int sigen;
8461+ unsigned char do_udba;
8462+ struct super_block *sb;
8463+ struct inode *inode;
8464+
027c5e7a 8465+ /* todo: support rcu-walk? */
b4510431 8466+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8467+ return -ECHILD;
8468+
8469+ valid = 0;
8470+ if (unlikely(!au_di(dentry)))
8471+ goto out;
8472+
e49829fe 8473+ valid = 1;
1facf9fc 8474+ sb = dentry->d_sb;
e49829fe
JR
8475+ /*
8476+ * todo: very ugly
8477+ * i_mutex of parent dir may be held,
8478+ * but we should not return 'invalid' due to busy.
8479+ */
8480+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8481+ if (unlikely(err)) {
8482+ valid = err;
027c5e7a 8483+ AuTraceErr(err);
e49829fe
JR
8484+ goto out;
8485+ }
5527c038
JR
8486+ inode = NULL;
8487+ if (d_really_is_positive(dentry))
8488+ inode = d_inode(dentry);
c1595e42
JR
8489+ if (unlikely(inode && is_bad_inode(inode))) {
8490+ err = -EINVAL;
8491+ AuTraceErr(err);
8492+ goto out_dgrade;
8493+ }
027c5e7a
AM
8494+ if (unlikely(au_dbrange_test(dentry))) {
8495+ err = -EINVAL;
8496+ AuTraceErr(err);
8497+ goto out_dgrade;
1facf9fc 8498+ }
027c5e7a
AM
8499+
8500+ sigen = au_sigen(sb);
8501+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8502+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8503+ err = au_reval_dpath(dentry, sigen);
8504+ if (unlikely(err)) {
8505+ AuTraceErr(err);
1facf9fc 8506+ goto out_dgrade;
027c5e7a 8507+ }
1facf9fc 8508+ }
8509+ di_downgrade_lock(dentry, AuLock_IR);
8510+
1facf9fc 8511+ err = -EINVAL;
c1595e42 8512+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8513+ && inode
38d290e6 8514+ && !(inode->i_state && I_LINKABLE)
ab036dbd
AM
8515+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8516+ AuTraceErr(err);
027c5e7a 8517+ goto out_inval;
ab036dbd 8518+ }
027c5e7a 8519+
1facf9fc 8520+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8521+ if (do_udba && inode) {
8522+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 8523+ struct inode *h_inode;
1facf9fc 8524+
027c5e7a
AM
8525+ if (bstart >= 0) {
8526+ h_inode = au_h_iptr(inode, bstart);
ab036dbd
AM
8527+ if (h_inode && au_test_higen(inode, h_inode)) {
8528+ AuTraceErr(err);
027c5e7a 8529+ goto out_inval;
ab036dbd 8530+ }
027c5e7a 8531+ }
1facf9fc 8532+ }
8533+
b4510431 8534+ err = h_d_revalidate(dentry, inode, flags, do_udba);
027c5e7a 8535+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 8536+ err = -EIO;
523b37e3
AM
8537+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8538+ dentry, err);
027c5e7a 8539+ }
e49829fe 8540+ goto out_inval;
1facf9fc 8541+
4f0767ce 8542+out_dgrade:
1facf9fc 8543+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8544+out_inval:
1facf9fc 8545+ aufs_read_unlock(dentry, AuLock_IR);
8546+ AuTraceErr(err);
8547+ valid = !err;
e49829fe 8548+out:
027c5e7a 8549+ if (!valid) {
523b37e3 8550+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8551+ d_drop(dentry);
8552+ }
1facf9fc 8553+ return valid;
8554+}
8555+
8556+static void aufs_d_release(struct dentry *dentry)
8557+{
027c5e7a 8558+ if (au_di(dentry)) {
4a4d8108
AM
8559+ au_di_fin(dentry);
8560+ au_hn_di_reinit(dentry);
1facf9fc 8561+ }
1facf9fc 8562+}
8563+
4a4d8108 8564+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8565+ .d_revalidate = aufs_d_revalidate,
8566+ .d_weak_revalidate = aufs_d_revalidate,
8567+ .d_release = aufs_d_release
1facf9fc 8568+};
ab036dbd
AM
8569+
8570+/* aufs_dop without d_revalidate */
8571+const struct dentry_operations aufs_dop_noreval = {
8572+ .d_release = aufs_d_release
8573+};
7f207e10
AM
8574diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8575--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
8576+++ linux/fs/aufs/dentry.h 2015-11-11 17:21:46.918863802 +0100
8577@@ -0,0 +1,234 @@
1facf9fc 8578+/*
2000de60 8579+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 8580+ *
8581+ * This program, aufs is free software; you can redistribute it and/or modify
8582+ * it under the terms of the GNU General Public License as published by
8583+ * the Free Software Foundation; either version 2 of the License, or
8584+ * (at your option) any later version.
dece6358
AM
8585+ *
8586+ * This program is distributed in the hope that it will be useful,
8587+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8588+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8589+ * GNU General Public License for more details.
8590+ *
8591+ * You should have received a copy of the GNU General Public License
523b37e3 8592+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8593+ */
8594+
8595+/*
8596+ * lookup and dentry operations
8597+ */
8598+
8599+#ifndef __AUFS_DENTRY_H__
8600+#define __AUFS_DENTRY_H__
8601+
8602+#ifdef __KERNEL__
8603+
dece6358 8604+#include <linux/dcache.h>
1facf9fc 8605+#include "rwsem.h"
8606+
1facf9fc 8607+struct au_hdentry {
8608+ struct dentry *hd_dentry;
027c5e7a 8609+ aufs_bindex_t hd_id;
1facf9fc 8610+};
8611+
8612+struct au_dinfo {
8613+ atomic_t di_generation;
8614+
dece6358 8615+ struct au_rwsem di_rwsem;
1facf9fc 8616+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
38d290e6 8617+ unsigned char di_tmpfile; /* to allow the different name */
1facf9fc 8618+ struct au_hdentry *di_hdentry;
4a4d8108 8619+} ____cacheline_aligned_in_smp;
1facf9fc 8620+
8621+/* ---------------------------------------------------------------------- */
8622+
8623+/* dentry.c */
ab036dbd 8624+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8625+struct au_branch;
076b876e 8626+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8627+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8628+ struct dentry *h_parent, struct au_branch *br);
8629+
537831f9 8630+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type);
86dc4139 8631+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8632+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8633+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
ab036dbd 8634+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8635+
8636+/* dinfo.c */
4a4d8108 8637+void au_di_init_once(void *_di);
027c5e7a
AM
8638+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8639+void au_di_free(struct au_dinfo *dinfo);
8640+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8641+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8642+int au_di_init(struct dentry *dentry);
8643+void au_di_fin(struct dentry *dentry);
1facf9fc 8644+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
8645+
8646+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8647+void di_read_unlock(struct dentry *d, int flags);
8648+void di_downgrade_lock(struct dentry *d, int flags);
8649+void di_write_lock(struct dentry *d, unsigned int lsc);
8650+void di_write_unlock(struct dentry *d);
8651+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8652+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8653+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8654+
8655+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8656+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8657+aufs_bindex_t au_dbtail(struct dentry *dentry);
8658+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8659+
8660+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8661+ struct dentry *h_dentry);
027c5e7a
AM
8662+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8663+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8664+void au_update_digen(struct dentry *dentry);
8665+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
8666+void au_update_dbstart(struct dentry *dentry);
8667+void au_update_dbend(struct dentry *dentry);
8668+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8669+
8670+/* ---------------------------------------------------------------------- */
8671+
8672+static inline struct au_dinfo *au_di(struct dentry *dentry)
8673+{
8674+ return dentry->d_fsdata;
8675+}
8676+
8677+/* ---------------------------------------------------------------------- */
8678+
8679+/* lock subclass for dinfo */
8680+enum {
8681+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8682+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8683+ AuLsc_DI_CHILD3, /* copyup dirs */
8684+ AuLsc_DI_PARENT,
8685+ AuLsc_DI_PARENT2,
027c5e7a
AM
8686+ AuLsc_DI_PARENT3,
8687+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8688+};
8689+
8690+/*
8691+ * di_read_lock_child, di_write_lock_child,
8692+ * di_read_lock_child2, di_write_lock_child2,
8693+ * di_read_lock_child3, di_write_lock_child3,
8694+ * di_read_lock_parent, di_write_lock_parent,
8695+ * di_read_lock_parent2, di_write_lock_parent2,
8696+ * di_read_lock_parent3, di_write_lock_parent3,
8697+ */
8698+#define AuReadLockFunc(name, lsc) \
8699+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8700+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8701+
8702+#define AuWriteLockFunc(name, lsc) \
8703+static inline void di_write_lock_##name(struct dentry *d) \
8704+{ di_write_lock(d, AuLsc_DI_##lsc); }
8705+
8706+#define AuRWLockFuncs(name, lsc) \
8707+ AuReadLockFunc(name, lsc) \
8708+ AuWriteLockFunc(name, lsc)
8709+
8710+AuRWLockFuncs(child, CHILD);
8711+AuRWLockFuncs(child2, CHILD2);
8712+AuRWLockFuncs(child3, CHILD3);
8713+AuRWLockFuncs(parent, PARENT);
8714+AuRWLockFuncs(parent2, PARENT2);
8715+AuRWLockFuncs(parent3, PARENT3);
8716+
8717+#undef AuReadLockFunc
8718+#undef AuWriteLockFunc
8719+#undef AuRWLockFuncs
8720+
8721+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8722+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8723+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8724+
8725+/* ---------------------------------------------------------------------- */
8726+
8727+/* todo: memory barrier? */
8728+static inline unsigned int au_digen(struct dentry *d)
8729+{
8730+ return atomic_read(&au_di(d)->di_generation);
8731+}
8732+
8733+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8734+{
8735+ hdentry->hd_dentry = NULL;
8736+}
8737+
8738+static inline void au_hdput(struct au_hdentry *hd)
8739+{
4a4d8108
AM
8740+ if (hd)
8741+ dput(hd->hd_dentry);
1facf9fc 8742+}
8743+
8744+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
8745+{
1308ab2a 8746+ DiMustAnyLock(dentry);
1facf9fc 8747+ return au_di(dentry)->di_bstart;
8748+}
8749+
8750+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
8751+{
1308ab2a 8752+ DiMustAnyLock(dentry);
1facf9fc 8753+ return au_di(dentry)->di_bend;
8754+}
8755+
8756+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
8757+{
1308ab2a 8758+ DiMustAnyLock(dentry);
1facf9fc 8759+ return au_di(dentry)->di_bwh;
8760+}
8761+
8762+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
8763+{
1308ab2a 8764+ DiMustAnyLock(dentry);
1facf9fc 8765+ return au_di(dentry)->di_bdiropq;
8766+}
8767+
8768+/* todo: hard/soft set? */
8769+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
8770+{
1308ab2a 8771+ DiMustWriteLock(dentry);
1facf9fc 8772+ au_di(dentry)->di_bstart = bindex;
8773+}
8774+
8775+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
8776+{
1308ab2a 8777+ DiMustWriteLock(dentry);
1facf9fc 8778+ au_di(dentry)->di_bend = bindex;
8779+}
8780+
8781+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
8782+{
1308ab2a 8783+ DiMustWriteLock(dentry);
1facf9fc 8784+ /* dbwh can be outside of bstart - bend range */
8785+ au_di(dentry)->di_bwh = bindex;
8786+}
8787+
8788+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
8789+{
1308ab2a 8790+ DiMustWriteLock(dentry);
1facf9fc 8791+ au_di(dentry)->di_bdiropq = bindex;
8792+}
8793+
8794+/* ---------------------------------------------------------------------- */
8795+
4a4d8108 8796+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 8797+static inline void au_digen_dec(struct dentry *d)
8798+{
e49829fe 8799+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 8800+}
8801+
4a4d8108 8802+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 8803+{
8804+ dentry->d_fsdata = NULL;
8805+}
8806+#else
4a4d8108
AM
8807+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
8808+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 8809+
8810+#endif /* __KERNEL__ */
8811+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
8812diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
8813--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 8814+++ linux/fs/aufs/dinfo.c 2015-09-24 10:47:58.251386326 +0200
5527c038 8815@@ -0,0 +1,550 @@
1facf9fc 8816+/*
2000de60 8817+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 8818+ *
8819+ * This program, aufs is free software; you can redistribute it and/or modify
8820+ * it under the terms of the GNU General Public License as published by
8821+ * the Free Software Foundation; either version 2 of the License, or
8822+ * (at your option) any later version.
dece6358
AM
8823+ *
8824+ * This program is distributed in the hope that it will be useful,
8825+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8826+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8827+ * GNU General Public License for more details.
8828+ *
8829+ * You should have received a copy of the GNU General Public License
523b37e3 8830+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8831+ */
8832+
8833+/*
8834+ * dentry private data
8835+ */
8836+
8837+#include "aufs.h"
8838+
e49829fe 8839+void au_di_init_once(void *_dinfo)
4a4d8108 8840+{
e49829fe
JR
8841+ struct au_dinfo *dinfo = _dinfo;
8842+ static struct lock_class_key aufs_di;
4a4d8108 8843+
e49829fe
JR
8844+ au_rw_init(&dinfo->di_rwsem);
8845+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
8846+}
8847+
027c5e7a 8848+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 8849+{
8850+ struct au_dinfo *dinfo;
027c5e7a 8851+ int nbr, i;
1facf9fc 8852+
8853+ dinfo = au_cache_alloc_dinfo();
8854+ if (unlikely(!dinfo))
8855+ goto out;
8856+
1facf9fc 8857+ nbr = au_sbend(sb) + 1;
8858+ if (nbr <= 0)
8859+ nbr = 1;
8860+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
8861+ if (dinfo->di_hdentry) {
8862+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
8863+ dinfo->di_bstart = -1;
8864+ dinfo->di_bend = -1;
8865+ dinfo->di_bwh = -1;
8866+ dinfo->di_bdiropq = -1;
38d290e6 8867+ dinfo->di_tmpfile = 0;
027c5e7a
AM
8868+ for (i = 0; i < nbr; i++)
8869+ dinfo->di_hdentry[i].hd_id = -1;
8870+ goto out;
8871+ }
1facf9fc 8872+
1facf9fc 8873+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
8874+ dinfo = NULL;
8875+
4f0767ce 8876+out:
027c5e7a 8877+ return dinfo;
1facf9fc 8878+}
8879+
027c5e7a 8880+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 8881+{
4a4d8108
AM
8882+ struct au_hdentry *p;
8883+ aufs_bindex_t bend, bindex;
8884+
8885+ /* dentry may not be revalidated */
027c5e7a 8886+ bindex = dinfo->di_bstart;
4a4d8108 8887+ if (bindex >= 0) {
027c5e7a
AM
8888+ bend = dinfo->di_bend;
8889+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
8890+ while (bindex++ <= bend)
8891+ au_hdput(p++);
8892+ }
027c5e7a
AM
8893+ kfree(dinfo->di_hdentry);
8894+ au_cache_free_dinfo(dinfo);
8895+}
8896+
8897+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
8898+{
8899+ struct au_hdentry *p;
8900+ aufs_bindex_t bi;
8901+
8902+ AuRwMustWriteLock(&a->di_rwsem);
8903+ AuRwMustWriteLock(&b->di_rwsem);
8904+
8905+#define DiSwap(v, name) \
8906+ do { \
8907+ v = a->di_##name; \
8908+ a->di_##name = b->di_##name; \
8909+ b->di_##name = v; \
8910+ } while (0)
8911+
8912+ DiSwap(p, hdentry);
8913+ DiSwap(bi, bstart);
8914+ DiSwap(bi, bend);
8915+ DiSwap(bi, bwh);
8916+ DiSwap(bi, bdiropq);
8917+ /* smp_mb(); */
8918+
8919+#undef DiSwap
8920+}
8921+
8922+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
8923+{
8924+ AuRwMustWriteLock(&dst->di_rwsem);
8925+ AuRwMustWriteLock(&src->di_rwsem);
8926+
8927+ dst->di_bstart = src->di_bstart;
8928+ dst->di_bend = src->di_bend;
8929+ dst->di_bwh = src->di_bwh;
8930+ dst->di_bdiropq = src->di_bdiropq;
8931+ /* smp_mb(); */
8932+}
8933+
8934+int au_di_init(struct dentry *dentry)
8935+{
8936+ int err;
8937+ struct super_block *sb;
8938+ struct au_dinfo *dinfo;
8939+
8940+ err = 0;
8941+ sb = dentry->d_sb;
8942+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
8943+ if (dinfo) {
8944+ atomic_set(&dinfo->di_generation, au_sigen(sb));
8945+ /* smp_mb(); */ /* atomic_set */
8946+ dentry->d_fsdata = dinfo;
8947+ } else
8948+ err = -ENOMEM;
8949+
8950+ return err;
8951+}
8952+
8953+void au_di_fin(struct dentry *dentry)
8954+{
8955+ struct au_dinfo *dinfo;
8956+
8957+ dinfo = au_di(dentry);
8958+ AuRwDestroy(&dinfo->di_rwsem);
8959+ au_di_free(dinfo);
4a4d8108
AM
8960+}
8961+
1facf9fc 8962+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
8963+{
8964+ int err, sz;
8965+ struct au_hdentry *hdp;
8966+
1308ab2a 8967+ AuRwMustWriteLock(&dinfo->di_rwsem);
8968+
1facf9fc 8969+ err = -ENOMEM;
8970+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
8971+ if (!sz)
8972+ sz = sizeof(*hdp);
8973+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
8974+ if (hdp) {
8975+ dinfo->di_hdentry = hdp;
8976+ err = 0;
8977+ }
8978+
8979+ return err;
8980+}
8981+
8982+/* ---------------------------------------------------------------------- */
8983+
8984+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
8985+{
8986+ switch (lsc) {
8987+ case AuLsc_DI_CHILD:
8988+ ii_write_lock_child(inode);
8989+ break;
8990+ case AuLsc_DI_CHILD2:
8991+ ii_write_lock_child2(inode);
8992+ break;
8993+ case AuLsc_DI_CHILD3:
8994+ ii_write_lock_child3(inode);
8995+ break;
8996+ case AuLsc_DI_PARENT:
8997+ ii_write_lock_parent(inode);
8998+ break;
8999+ case AuLsc_DI_PARENT2:
9000+ ii_write_lock_parent2(inode);
9001+ break;
9002+ case AuLsc_DI_PARENT3:
9003+ ii_write_lock_parent3(inode);
9004+ break;
9005+ default:
9006+ BUG();
9007+ }
9008+}
9009+
9010+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9011+{
9012+ switch (lsc) {
9013+ case AuLsc_DI_CHILD:
9014+ ii_read_lock_child(inode);
9015+ break;
9016+ case AuLsc_DI_CHILD2:
9017+ ii_read_lock_child2(inode);
9018+ break;
9019+ case AuLsc_DI_CHILD3:
9020+ ii_read_lock_child3(inode);
9021+ break;
9022+ case AuLsc_DI_PARENT:
9023+ ii_read_lock_parent(inode);
9024+ break;
9025+ case AuLsc_DI_PARENT2:
9026+ ii_read_lock_parent2(inode);
9027+ break;
9028+ case AuLsc_DI_PARENT3:
9029+ ii_read_lock_parent3(inode);
9030+ break;
9031+ default:
9032+ BUG();
9033+ }
9034+}
9035+
9036+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9037+{
5527c038
JR
9038+ struct inode *inode;
9039+
dece6358 9040+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9041+ if (d_really_is_positive(d)) {
9042+ inode = d_inode(d);
1facf9fc 9043+ if (au_ftest_lock(flags, IW))
5527c038 9044+ do_ii_write_lock(inode, lsc);
1facf9fc 9045+ else if (au_ftest_lock(flags, IR))
5527c038 9046+ do_ii_read_lock(inode, lsc);
1facf9fc 9047+ }
9048+}
9049+
9050+void di_read_unlock(struct dentry *d, int flags)
9051+{
5527c038
JR
9052+ struct inode *inode;
9053+
9054+ if (d_really_is_positive(d)) {
9055+ inode = d_inode(d);
027c5e7a
AM
9056+ if (au_ftest_lock(flags, IW)) {
9057+ au_dbg_verify_dinode(d);
5527c038 9058+ ii_write_unlock(inode);
027c5e7a
AM
9059+ } else if (au_ftest_lock(flags, IR)) {
9060+ au_dbg_verify_dinode(d);
5527c038 9061+ ii_read_unlock(inode);
027c5e7a 9062+ }
1facf9fc 9063+ }
dece6358 9064+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 9065+}
9066+
9067+void di_downgrade_lock(struct dentry *d, int flags)
9068+{
5527c038
JR
9069+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9070+ ii_downgrade_lock(d_inode(d));
dece6358 9071+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 9072+}
9073+
9074+void di_write_lock(struct dentry *d, unsigned int lsc)
9075+{
dece6358 9076+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9077+ if (d_really_is_positive(d))
9078+ do_ii_write_lock(d_inode(d), lsc);
1facf9fc 9079+}
9080+
9081+void di_write_unlock(struct dentry *d)
9082+{
027c5e7a 9083+ au_dbg_verify_dinode(d);
5527c038
JR
9084+ if (d_really_is_positive(d))
9085+ ii_write_unlock(d_inode(d));
dece6358 9086+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9087+}
9088+
9089+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9090+{
9091+ AuDebugOn(d1 == d2
5527c038 9092+ || d_inode(d1) == d_inode(d2)
1facf9fc 9093+ || d1->d_sb != d2->d_sb);
9094+
9095+ if (isdir && au_test_subdir(d1, d2)) {
9096+ di_write_lock_child(d1);
9097+ di_write_lock_child2(d2);
9098+ } else {
9099+ /* there should be no races */
9100+ di_write_lock_child(d2);
9101+ di_write_lock_child2(d1);
9102+ }
9103+}
9104+
9105+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9106+{
9107+ AuDebugOn(d1 == d2
5527c038 9108+ || d_inode(d1) == d_inode(d2)
1facf9fc 9109+ || d1->d_sb != d2->d_sb);
9110+
9111+ if (isdir && au_test_subdir(d1, d2)) {
9112+ di_write_lock_parent(d1);
9113+ di_write_lock_parent2(d2);
9114+ } else {
9115+ /* there should be no races */
9116+ di_write_lock_parent(d2);
9117+ di_write_lock_parent2(d1);
9118+ }
9119+}
9120+
9121+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9122+{
9123+ di_write_unlock(d1);
5527c038 9124+ if (d_inode(d1) == d_inode(d2))
dece6358 9125+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9126+ else
9127+ di_write_unlock(d2);
9128+}
9129+
9130+/* ---------------------------------------------------------------------- */
9131+
9132+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9133+{
9134+ struct dentry *d;
9135+
1308ab2a 9136+ DiMustAnyLock(dentry);
9137+
1facf9fc 9138+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
9139+ return NULL;
9140+ AuDebugOn(bindex < 0);
9141+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
c1595e42 9142+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9143+ return d;
9144+}
9145+
2cbb1c4b
JR
9146+/*
9147+ * extended version of au_h_dptr().
38d290e6
JR
9148+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9149+ * error.
2cbb1c4b
JR
9150+ */
9151+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9152+{
9153+ struct dentry *h_dentry;
9154+ struct inode *inode, *h_inode;
9155+
5527c038 9156+ AuDebugOn(d_really_is_negative(dentry));
2cbb1c4b
JR
9157+
9158+ h_dentry = NULL;
9159+ if (au_dbstart(dentry) <= bindex
9160+ && bindex <= au_dbend(dentry))
9161+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9162+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9163+ dget(h_dentry);
9164+ goto out; /* success */
9165+ }
9166+
5527c038 9167+ inode = d_inode(dentry);
2cbb1c4b
JR
9168+ AuDebugOn(bindex < au_ibstart(inode));
9169+ AuDebugOn(au_ibend(inode) < bindex);
9170+ h_inode = au_h_iptr(inode, bindex);
9171+ h_dentry = d_find_alias(h_inode);
9172+ if (h_dentry) {
9173+ if (!IS_ERR(h_dentry)) {
38d290e6 9174+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9175+ goto out; /* success */
9176+ dput(h_dentry);
9177+ } else
9178+ goto out;
9179+ }
9180+
9181+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9182+ h_dentry = au_plink_lkup(inode, bindex);
9183+ AuDebugOn(!h_dentry);
9184+ if (!IS_ERR(h_dentry)) {
9185+ if (!au_d_hashed_positive(h_dentry))
9186+ goto out; /* success */
9187+ dput(h_dentry);
9188+ h_dentry = NULL;
9189+ }
9190+ }
9191+
9192+out:
9193+ AuDbgDentry(h_dentry);
9194+ return h_dentry;
9195+}
9196+
1facf9fc 9197+aufs_bindex_t au_dbtail(struct dentry *dentry)
9198+{
9199+ aufs_bindex_t bend, bwh;
9200+
9201+ bend = au_dbend(dentry);
9202+ if (0 <= bend) {
9203+ bwh = au_dbwh(dentry);
9204+ if (!bwh)
9205+ return bwh;
9206+ if (0 < bwh && bwh < bend)
9207+ return bwh - 1;
9208+ }
9209+ return bend;
9210+}
9211+
9212+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9213+{
9214+ aufs_bindex_t bend, bopq;
9215+
9216+ bend = au_dbtail(dentry);
9217+ if (0 <= bend) {
9218+ bopq = au_dbdiropq(dentry);
9219+ if (0 <= bopq && bopq < bend)
9220+ bend = bopq;
9221+ }
9222+ return bend;
9223+}
9224+
9225+/* ---------------------------------------------------------------------- */
9226+
9227+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9228+ struct dentry *h_dentry)
9229+{
9230+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 9231+ struct au_branch *br;
1facf9fc 9232+
1308ab2a 9233+ DiMustWriteLock(dentry);
9234+
4a4d8108 9235+ au_hdput(hd);
1facf9fc 9236+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9237+ if (h_dentry) {
9238+ br = au_sbr(dentry->d_sb, bindex);
9239+ hd->hd_id = br->br_id;
9240+ }
9241+}
9242+
9243+int au_dbrange_test(struct dentry *dentry)
9244+{
9245+ int err;
9246+ aufs_bindex_t bstart, bend;
9247+
9248+ err = 0;
9249+ bstart = au_dbstart(dentry);
9250+ bend = au_dbend(dentry);
9251+ if (bstart >= 0)
9252+ AuDebugOn(bend < 0 && bstart > bend);
9253+ else {
9254+ err = -EIO;
9255+ AuDebugOn(bend >= 0);
9256+ }
9257+
9258+ return err;
9259+}
9260+
9261+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9262+{
9263+ int err;
9264+
9265+ err = 0;
9266+ if (unlikely(au_digen(dentry) != sigen
5527c038 9267+ || au_iigen_test(d_inode(dentry), sigen)))
027c5e7a
AM
9268+ err = -EIO;
9269+
9270+ return err;
1facf9fc 9271+}
9272+
9273+void au_update_digen(struct dentry *dentry)
9274+{
9275+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9276+ /* smp_mb(); */ /* atomic_set */
9277+}
9278+
9279+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9280+{
9281+ struct au_dinfo *dinfo;
9282+ struct dentry *h_d;
4a4d8108 9283+ struct au_hdentry *hdp;
1facf9fc 9284+
1308ab2a 9285+ DiMustWriteLock(dentry);
9286+
1facf9fc 9287+ dinfo = au_di(dentry);
9288+ if (!dinfo || dinfo->di_bstart < 0)
9289+ return;
9290+
4a4d8108 9291+ hdp = dinfo->di_hdentry;
1facf9fc 9292+ if (do_put_zero) {
9293+ aufs_bindex_t bindex, bend;
9294+
9295+ bend = dinfo->di_bend;
9296+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 9297+ h_d = hdp[0 + bindex].hd_dentry;
5527c038 9298+ if (h_d && d_is_negative(h_d))
1facf9fc 9299+ au_set_h_dptr(dentry, bindex, NULL);
9300+ }
9301+ }
9302+
9303+ dinfo->di_bstart = -1;
9304+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 9305+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 9306+ break;
9307+ if (dinfo->di_bstart > dinfo->di_bend) {
9308+ dinfo->di_bstart = -1;
9309+ dinfo->di_bend = -1;
9310+ return;
9311+ }
9312+
9313+ dinfo->di_bend++;
9314+ while (0 <= --dinfo->di_bend)
4a4d8108 9315+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 9316+ break;
9317+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
9318+}
9319+
9320+void au_update_dbstart(struct dentry *dentry)
9321+{
9322+ aufs_bindex_t bindex, bend;
9323+ struct dentry *h_dentry;
9324+
9325+ bend = au_dbend(dentry);
9326+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
9327+ h_dentry = au_h_dptr(dentry, bindex);
9328+ if (!h_dentry)
9329+ continue;
5527c038 9330+ if (d_is_positive(h_dentry)) {
1facf9fc 9331+ au_set_dbstart(dentry, bindex);
9332+ return;
9333+ }
9334+ au_set_h_dptr(dentry, bindex, NULL);
9335+ }
9336+}
9337+
9338+void au_update_dbend(struct dentry *dentry)
9339+{
9340+ aufs_bindex_t bindex, bstart;
9341+ struct dentry *h_dentry;
9342+
9343+ bstart = au_dbstart(dentry);
7f207e10 9344+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 9345+ h_dentry = au_h_dptr(dentry, bindex);
9346+ if (!h_dentry)
9347+ continue;
5527c038 9348+ if (d_is_positive(h_dentry)) {
1facf9fc 9349+ au_set_dbend(dentry, bindex);
9350+ return;
9351+ }
9352+ au_set_h_dptr(dentry, bindex, NULL);
9353+ }
9354+}
9355+
9356+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9357+{
9358+ aufs_bindex_t bindex, bend;
9359+
9360+ bend = au_dbend(dentry);
9361+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
9362+ if (au_h_dptr(dentry, bindex) == h_dentry)
9363+ return bindex;
9364+ return -1;
9365+}
7f207e10
AM
9366diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9367--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 9368+++ linux/fs/aufs/dir.c 2015-12-10 17:59:16.836166410 +0100
5527c038 9369@@ -0,0 +1,753 @@
1facf9fc 9370+/*
2000de60 9371+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 9372+ *
9373+ * This program, aufs is free software; you can redistribute it and/or modify
9374+ * it under the terms of the GNU General Public License as published by
9375+ * the Free Software Foundation; either version 2 of the License, or
9376+ * (at your option) any later version.
dece6358
AM
9377+ *
9378+ * This program is distributed in the hope that it will be useful,
9379+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9380+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9381+ * GNU General Public License for more details.
9382+ *
9383+ * You should have received a copy of the GNU General Public License
523b37e3 9384+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9385+ */
9386+
9387+/*
9388+ * directory operations
9389+ */
9390+
9391+#include <linux/fs_stack.h>
9392+#include "aufs.h"
9393+
9394+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9395+{
9dbd164d
AM
9396+ unsigned int nlink;
9397+
1facf9fc 9398+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9399+
9dbd164d
AM
9400+ nlink = dir->i_nlink;
9401+ nlink += h_dir->i_nlink - 2;
1facf9fc 9402+ if (h_dir->i_nlink < 2)
9dbd164d 9403+ nlink += 2;
f6b6e03d 9404+ smp_mb(); /* for i_nlink */
7eafdf33 9405+ /* 0 can happen in revaliding */
92d182d2 9406+ set_nlink(dir, nlink);
1facf9fc 9407+}
9408+
9409+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9410+{
9dbd164d
AM
9411+ unsigned int nlink;
9412+
1facf9fc 9413+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9414+
9dbd164d
AM
9415+ nlink = dir->i_nlink;
9416+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9417+ if (h_dir->i_nlink < 2)
9dbd164d 9418+ nlink -= 2;
f6b6e03d 9419+ smp_mb(); /* for i_nlink */
92d182d2 9420+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9421+ set_nlink(dir, nlink);
1facf9fc 9422+}
9423+
1308ab2a 9424+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9425+{
9426+ loff_t sz;
9427+ aufs_bindex_t bindex, bend;
9428+ struct file *h_file;
9429+ struct dentry *h_dentry;
9430+
9431+ sz = 0;
9432+ if (file) {
2000de60 9433+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9434+
4a4d8108 9435+ bend = au_fbend_dir(file);
1308ab2a 9436+ for (bindex = au_fbstart(file);
9437+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9438+ bindex++) {
4a4d8108 9439+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9440+ if (h_file && file_inode(h_file))
9441+ sz += vfsub_f_size_read(h_file);
1308ab2a 9442+ }
9443+ } else {
9444+ AuDebugOn(!dentry);
2000de60 9445+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9446+
9447+ bend = au_dbtaildir(dentry);
9448+ for (bindex = au_dbstart(dentry);
9449+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9450+ bindex++) {
9451+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
9452+ if (h_dentry && d_is_positive(h_dentry))
9453+ sz += i_size_read(d_inode(h_dentry));
1308ab2a 9454+ }
9455+ }
9456+ if (sz < KMALLOC_MAX_SIZE)
9457+ sz = roundup_pow_of_two(sz);
9458+ if (sz > KMALLOC_MAX_SIZE)
9459+ sz = KMALLOC_MAX_SIZE;
9460+ else if (sz < NAME_MAX) {
9461+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9462+ sz = AUFS_RDBLK_DEF;
9463+ }
9464+ return sz;
9465+}
9466+
b912730e
AM
9467+struct au_dir_ts_arg {
9468+ struct dentry *dentry;
9469+ aufs_bindex_t brid;
9470+};
9471+
9472+static void au_do_dir_ts(void *arg)
9473+{
9474+ struct au_dir_ts_arg *a = arg;
9475+ struct au_dtime dt;
9476+ struct path h_path;
9477+ struct inode *dir, *h_dir;
9478+ struct super_block *sb;
9479+ struct au_branch *br;
9480+ struct au_hinode *hdir;
9481+ int err;
9482+ aufs_bindex_t bstart, bindex;
9483+
9484+ sb = a->dentry->d_sb;
5527c038 9485+ if (d_really_is_negative(a->dentry))
b912730e 9486+ goto out;
5527c038 9487+ /* no dir->i_mutex lock */
ab036dbd
AM
9488+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9489+
5527c038 9490+ dir = d_inode(a->dentry);
b912730e
AM
9491+ bstart = au_ibstart(dir);
9492+ bindex = au_br_index(sb, a->brid);
9493+ if (bindex < bstart)
9494+ goto out_unlock;
9495+
9496+ br = au_sbr(sb, bindex);
9497+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9498+ if (!h_path.dentry)
9499+ goto out_unlock;
9500+ h_path.mnt = au_br_mnt(br);
9501+ au_dtime_store(&dt, a->dentry, &h_path);
9502+
9503+ br = au_sbr(sb, bstart);
9504+ if (!au_br_writable(br->br_perm))
9505+ goto out_unlock;
9506+ h_path.dentry = au_h_dptr(a->dentry, bstart);
9507+ h_path.mnt = au_br_mnt(br);
9508+ err = vfsub_mnt_want_write(h_path.mnt);
9509+ if (err)
9510+ goto out_unlock;
9511+ hdir = au_hi(dir, bstart);
9512+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
9513+ h_dir = au_h_iptr(dir, bstart);
9514+ if (h_dir->i_nlink
9515+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9516+ dt.dt_h_path = h_path;
9517+ au_dtime_revert(&dt);
9518+ }
9519+ au_hn_imtx_unlock(hdir);
9520+ vfsub_mnt_drop_write(h_path.mnt);
9521+ au_cpup_attr_timesizes(dir);
9522+
9523+out_unlock:
9524+ aufs_read_unlock(a->dentry, AuLock_DW);
9525+out:
9526+ dput(a->dentry);
9527+ au_nwt_done(&au_sbi(sb)->si_nowait);
9528+ kfree(arg);
9529+}
9530+
9531+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9532+{
9533+ int perm, wkq_err;
9534+ aufs_bindex_t bstart;
9535+ struct au_dir_ts_arg *arg;
9536+ struct dentry *dentry;
9537+ struct super_block *sb;
9538+
9539+ IMustLock(dir);
9540+
9541+ dentry = d_find_any_alias(dir);
9542+ AuDebugOn(!dentry);
9543+ sb = dentry->d_sb;
9544+ bstart = au_ibstart(dir);
9545+ if (bstart == bindex) {
9546+ au_cpup_attr_timesizes(dir);
9547+ goto out;
9548+ }
9549+
9550+ perm = au_sbr_perm(sb, bstart);
9551+ if (!au_br_writable(perm))
9552+ goto out;
9553+
9554+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9555+ if (!arg)
9556+ goto out;
9557+
9558+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9559+ arg->brid = au_sbr_id(sb, bindex);
9560+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9561+ if (unlikely(wkq_err)) {
9562+ pr_err("wkq %d\n", wkq_err);
9563+ dput(dentry);
9564+ kfree(arg);
9565+ }
9566+
9567+out:
9568+ dput(dentry);
9569+}
9570+
1facf9fc 9571+/* ---------------------------------------------------------------------- */
9572+
9573+static int reopen_dir(struct file *file)
9574+{
9575+ int err;
9576+ unsigned int flags;
9577+ aufs_bindex_t bindex, btail, bstart;
9578+ struct dentry *dentry, *h_dentry;
9579+ struct file *h_file;
9580+
9581+ /* open all lower dirs */
2000de60 9582+ dentry = file->f_path.dentry;
1facf9fc 9583+ bstart = au_dbstart(dentry);
9584+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
9585+ au_set_h_fptr(file, bindex, NULL);
9586+ au_set_fbstart(file, bstart);
9587+
9588+ btail = au_dbtaildir(dentry);
4a4d8108 9589+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 9590+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 9591+ au_set_fbend_dir(file, btail);
1facf9fc 9592+
4a4d8108 9593+ flags = vfsub_file_flags(file);
1facf9fc 9594+ for (bindex = bstart; bindex <= btail; bindex++) {
9595+ h_dentry = au_h_dptr(dentry, bindex);
9596+ if (!h_dentry)
9597+ continue;
4a4d8108 9598+ h_file = au_hf_dir(file, bindex);
1facf9fc 9599+ if (h_file)
9600+ continue;
9601+
392086de 9602+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9603+ err = PTR_ERR(h_file);
9604+ if (IS_ERR(h_file))
9605+ goto out; /* close all? */
9606+ au_set_h_fptr(file, bindex, h_file);
9607+ }
9608+ au_update_figen(file);
9609+ /* todo: necessary? */
9610+ /* file->f_ra = h_file->f_ra; */
9611+ err = 0;
9612+
4f0767ce 9613+out:
1facf9fc 9614+ return err;
9615+}
9616+
b912730e 9617+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9618+{
9619+ int err;
9620+ aufs_bindex_t bindex, btail;
9621+ struct dentry *dentry, *h_dentry;
1facf9fc 9622+
1308ab2a 9623+ FiMustWriteLock(file);
b912730e 9624+ AuDebugOn(h_file);
1308ab2a 9625+
523b37e3 9626+ err = 0;
2000de60 9627+ dentry = file->f_path.dentry;
5527c038 9628+ file->f_version = d_inode(dentry)->i_version;
1facf9fc 9629+ bindex = au_dbstart(dentry);
9630+ au_set_fbstart(file, bindex);
9631+ btail = au_dbtaildir(dentry);
4a4d8108 9632+ au_set_fbend_dir(file, btail);
1facf9fc 9633+ for (; !err && bindex <= btail; bindex++) {
9634+ h_dentry = au_h_dptr(dentry, bindex);
9635+ if (!h_dentry)
9636+ continue;
9637+
392086de 9638+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9639+ if (IS_ERR(h_file)) {
9640+ err = PTR_ERR(h_file);
9641+ break;
9642+ }
9643+ au_set_h_fptr(file, bindex, h_file);
9644+ }
9645+ au_update_figen(file);
9646+ /* todo: necessary? */
9647+ /* file->f_ra = h_file->f_ra; */
9648+ if (!err)
9649+ return 0; /* success */
9650+
9651+ /* close all */
9652+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
9653+ au_set_h_fptr(file, bindex, NULL);
9654+ au_set_fbstart(file, -1);
4a4d8108
AM
9655+ au_set_fbend_dir(file, -1);
9656+
1facf9fc 9657+ return err;
9658+}
9659+
9660+static int aufs_open_dir(struct inode *inode __maybe_unused,
9661+ struct file *file)
9662+{
4a4d8108
AM
9663+ int err;
9664+ struct super_block *sb;
9665+ struct au_fidir *fidir;
9666+
9667+ err = -ENOMEM;
2000de60 9668+ sb = file->f_path.dentry->d_sb;
4a4d8108 9669+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9670+ fidir = au_fidir_alloc(sb);
4a4d8108 9671+ if (fidir) {
b912730e
AM
9672+ struct au_do_open_args args = {
9673+ .open = do_open_dir,
9674+ .fidir = fidir
9675+ };
9676+ err = au_do_open(file, &args);
4a4d8108
AM
9677+ if (unlikely(err))
9678+ kfree(fidir);
9679+ }
9680+ si_read_unlock(sb);
9681+ return err;
1facf9fc 9682+}
9683+
9684+static int aufs_release_dir(struct inode *inode __maybe_unused,
9685+ struct file *file)
9686+{
9687+ struct au_vdir *vdir_cache;
4a4d8108
AM
9688+ struct au_finfo *finfo;
9689+ struct au_fidir *fidir;
9690+ aufs_bindex_t bindex, bend;
1facf9fc 9691+
4a4d8108
AM
9692+ finfo = au_fi(file);
9693+ fidir = finfo->fi_hdir;
9694+ if (fidir) {
076b876e 9695+ au_sphl_del(&finfo->fi_hlist,
2000de60 9696+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108
AM
9697+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9698+ if (vdir_cache)
9699+ au_vdir_free(vdir_cache);
9700+
9701+ bindex = finfo->fi_btop;
9702+ if (bindex >= 0) {
9703+ /*
9704+ * calls fput() instead of filp_close(),
9705+ * since no dnotify or lock for the lower file.
9706+ */
9707+ bend = fidir->fd_bbot;
9708+ for (; bindex <= bend; bindex++)
9709+ au_set_h_fptr(file, bindex, NULL);
9710+ }
9711+ kfree(fidir);
9712+ finfo->fi_hdir = NULL;
1facf9fc 9713+ }
1facf9fc 9714+ au_finfo_fin(file);
1facf9fc 9715+ return 0;
9716+}
9717+
9718+/* ---------------------------------------------------------------------- */
9719+
4a4d8108
AM
9720+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9721+{
9722+ int err;
9723+ aufs_bindex_t bindex, bend;
9724+ struct file *h_file;
9725+
9726+ err = 0;
9727+ bend = au_fbend_dir(file);
9728+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
9729+ h_file = au_hf_dir(file, bindex);
9730+ if (h_file)
9731+ err = vfsub_flush(h_file, id);
9732+ }
9733+ return err;
9734+}
9735+
9736+static int aufs_flush_dir(struct file *file, fl_owner_t id)
9737+{
9738+ return au_do_flush(file, id, au_do_flush_dir);
9739+}
9740+
9741+/* ---------------------------------------------------------------------- */
9742+
1facf9fc 9743+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
9744+{
9745+ int err;
9746+ aufs_bindex_t bend, bindex;
9747+ struct inode *inode;
9748+ struct super_block *sb;
9749+
9750+ err = 0;
9751+ sb = dentry->d_sb;
5527c038 9752+ inode = d_inode(dentry);
1facf9fc 9753+ IMustLock(inode);
9754+ bend = au_dbend(dentry);
9755+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
9756+ struct path h_path;
1facf9fc 9757+
9758+ if (au_test_ro(sb, bindex, inode))
9759+ continue;
9760+ h_path.dentry = au_h_dptr(dentry, bindex);
9761+ if (!h_path.dentry)
9762+ continue;
1facf9fc 9763+
1facf9fc 9764+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 9765+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 9766+ }
9767+
9768+ return err;
9769+}
9770+
9771+static int au_do_fsync_dir(struct file *file, int datasync)
9772+{
9773+ int err;
9774+ aufs_bindex_t bend, bindex;
9775+ struct file *h_file;
9776+ struct super_block *sb;
9777+ struct inode *inode;
1facf9fc 9778+
9779+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9780+ if (unlikely(err))
9781+ goto out;
9782+
c06a8ce3 9783+ inode = file_inode(file);
b912730e 9784+ sb = inode->i_sb;
4a4d8108 9785+ bend = au_fbend_dir(file);
1facf9fc 9786+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 9787+ h_file = au_hf_dir(file, bindex);
1facf9fc 9788+ if (!h_file || au_test_ro(sb, bindex, inode))
9789+ continue;
9790+
53392da6 9791+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 9792+ }
9793+
4f0767ce 9794+out:
1facf9fc 9795+ return err;
9796+}
9797+
9798+/*
9799+ * @file may be NULL
9800+ */
1e00d052
AM
9801+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
9802+ int datasync)
1facf9fc 9803+{
9804+ int err;
b752ccd1 9805+ struct dentry *dentry;
5527c038 9806+ struct inode *inode;
1facf9fc 9807+ struct super_block *sb;
1e00d052 9808+ struct mutex *mtx;
1facf9fc 9809+
9810+ err = 0;
2000de60 9811+ dentry = file->f_path.dentry;
5527c038
JR
9812+ inode = d_inode(dentry);
9813+ mtx = &inode->i_mutex;
1e00d052 9814+ mutex_lock(mtx);
1facf9fc 9815+ sb = dentry->d_sb;
9816+ si_noflush_read_lock(sb);
9817+ if (file)
9818+ err = au_do_fsync_dir(file, datasync);
9819+ else {
9820+ di_write_lock_child(dentry);
9821+ err = au_do_fsync_dir_no_file(dentry, datasync);
9822+ }
5527c038 9823+ au_cpup_attr_timesizes(inode);
1facf9fc 9824+ di_write_unlock(dentry);
9825+ if (file)
9826+ fi_write_unlock(file);
9827+
9828+ si_read_unlock(sb);
1e00d052 9829+ mutex_unlock(mtx);
1facf9fc 9830+ return err;
9831+}
9832+
9833+/* ---------------------------------------------------------------------- */
9834+
392086de 9835+static int aufs_iterate(struct file *file, struct dir_context *ctx)
1facf9fc 9836+{
9837+ int err;
9838+ struct dentry *dentry;
9dbd164d 9839+ struct inode *inode, *h_inode;
1facf9fc 9840+ struct super_block *sb;
9841+
523b37e3 9842+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 9843+
2000de60 9844+ dentry = file->f_path.dentry;
5527c038 9845+ inode = d_inode(dentry);
1facf9fc 9846+ IMustLock(inode);
9847+
9848+ sb = dentry->d_sb;
9849+ si_read_lock(sb, AuLock_FLUSH);
9850+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9851+ if (unlikely(err))
9852+ goto out;
027c5e7a
AM
9853+ err = au_alive_dir(dentry);
9854+ if (!err)
9855+ err = au_vdir_init(file);
1facf9fc 9856+ di_downgrade_lock(dentry, AuLock_IR);
9857+ if (unlikely(err))
9858+ goto out_unlock;
9859+
9dbd164d 9860+ h_inode = au_h_iptr(inode, au_ibstart(inode));
b752ccd1 9861+ if (!au_test_nfsd()) {
392086de 9862+ err = au_vdir_fill_de(file, ctx);
9dbd164d 9863+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 9864+ } else {
9865+ /*
9866+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
9867+ * encode_fh() and others.
9868+ */
9dbd164d 9869+ atomic_inc(&h_inode->i_count);
1facf9fc 9870+ di_read_unlock(dentry, AuLock_IR);
9871+ si_read_unlock(sb);
392086de 9872+ err = au_vdir_fill_de(file, ctx);
1facf9fc 9873+ fsstack_copy_attr_atime(inode, h_inode);
9874+ fi_write_unlock(file);
9dbd164d 9875+ iput(h_inode);
1facf9fc 9876+
9877+ AuTraceErr(err);
9878+ return err;
9879+ }
9880+
4f0767ce 9881+out_unlock:
1facf9fc 9882+ di_read_unlock(dentry, AuLock_IR);
9883+ fi_write_unlock(file);
4f0767ce 9884+out:
1facf9fc 9885+ si_read_unlock(sb);
9886+ return err;
9887+}
9888+
9889+/* ---------------------------------------------------------------------- */
9890+
9891+#define AuTestEmpty_WHONLY 1
dece6358
AM
9892+#define AuTestEmpty_CALLED (1 << 1)
9893+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 9894+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
9895+#define au_fset_testempty(flags, name) \
9896+ do { (flags) |= AuTestEmpty_##name; } while (0)
9897+#define au_fclr_testempty(flags, name) \
9898+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 9899+
dece6358
AM
9900+#ifndef CONFIG_AUFS_SHWH
9901+#undef AuTestEmpty_SHWH
9902+#define AuTestEmpty_SHWH 0
9903+#endif
9904+
1facf9fc 9905+struct test_empty_arg {
392086de 9906+ struct dir_context ctx;
1308ab2a 9907+ struct au_nhash *whlist;
1facf9fc 9908+ unsigned int flags;
9909+ int err;
9910+ aufs_bindex_t bindex;
9911+};
9912+
392086de
AM
9913+static int test_empty_cb(struct dir_context *ctx, const char *__name,
9914+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 9915+ unsigned int d_type)
1facf9fc 9916+{
392086de
AM
9917+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
9918+ ctx);
1facf9fc 9919+ char *name = (void *)__name;
9920+
9921+ arg->err = 0;
9922+ au_fset_testempty(arg->flags, CALLED);
9923+ /* smp_mb(); */
9924+ if (name[0] == '.'
9925+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
9926+ goto out; /* success */
9927+
9928+ if (namelen <= AUFS_WH_PFX_LEN
9929+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
9930+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 9931+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 9932+ arg->err = -ENOTEMPTY;
9933+ goto out;
9934+ }
9935+
9936+ name += AUFS_WH_PFX_LEN;
9937+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 9938+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 9939+ arg->err = au_nhash_append_wh
1308ab2a 9940+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 9941+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 9942+
4f0767ce 9943+out:
1facf9fc 9944+ /* smp_mb(); */
9945+ AuTraceErr(arg->err);
9946+ return arg->err;
9947+}
9948+
9949+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
9950+{
9951+ int err;
9952+ struct file *h_file;
9953+
9954+ h_file = au_h_open(dentry, arg->bindex,
9955+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 9956+ /*file*/NULL, /*force_wr*/0);
1facf9fc 9957+ err = PTR_ERR(h_file);
9958+ if (IS_ERR(h_file))
9959+ goto out;
9960+
9961+ err = 0;
9962+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 9963+ && !file_inode(h_file)->i_nlink)
1facf9fc 9964+ goto out_put;
9965+
9966+ do {
9967+ arg->err = 0;
9968+ au_fclr_testempty(arg->flags, CALLED);
9969+ /* smp_mb(); */
392086de 9970+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 9971+ if (err >= 0)
9972+ err = arg->err;
9973+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
9974+
4f0767ce 9975+out_put:
1facf9fc 9976+ fput(h_file);
9977+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 9978+out:
1facf9fc 9979+ return err;
9980+}
9981+
9982+struct do_test_empty_args {
9983+ int *errp;
9984+ struct dentry *dentry;
9985+ struct test_empty_arg *arg;
9986+};
9987+
9988+static void call_do_test_empty(void *args)
9989+{
9990+ struct do_test_empty_args *a = args;
9991+ *a->errp = do_test_empty(a->dentry, a->arg);
9992+}
9993+
9994+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
9995+{
9996+ int err, wkq_err;
9997+ struct dentry *h_dentry;
9998+ struct inode *h_inode;
9999+
10000+ h_dentry = au_h_dptr(dentry, arg->bindex);
5527c038 10001+ h_inode = d_inode(h_dentry);
53392da6 10002+ /* todo: i_mode changes anytime? */
1facf9fc 10003+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10004+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
10005+ mutex_unlock(&h_inode->i_mutex);
10006+ if (!err)
10007+ err = do_test_empty(dentry, arg);
10008+ else {
10009+ struct do_test_empty_args args = {
10010+ .errp = &err,
10011+ .dentry = dentry,
10012+ .arg = arg
10013+ };
10014+ unsigned int flags = arg->flags;
10015+
10016+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10017+ if (unlikely(wkq_err))
10018+ err = wkq_err;
10019+ arg->flags = flags;
10020+ }
10021+
10022+ return err;
10023+}
10024+
10025+int au_test_empty_lower(struct dentry *dentry)
10026+{
10027+ int err;
1308ab2a 10028+ unsigned int rdhash;
1facf9fc 10029+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 10030+ struct au_nhash whlist;
392086de
AM
10031+ struct test_empty_arg arg = {
10032+ .ctx = {
2000de60 10033+ .actor = test_empty_cb
392086de
AM
10034+ }
10035+ };
076b876e 10036+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 10037+
dece6358
AM
10038+ SiMustAnyLock(dentry->d_sb);
10039+
1308ab2a 10040+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10041+ if (!rdhash)
10042+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10043+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 10044+ if (unlikely(err))
1facf9fc 10045+ goto out;
10046+
1facf9fc 10047+ arg.flags = 0;
1308ab2a 10048+ arg.whlist = &whlist;
10049+ bstart = au_dbstart(dentry);
dece6358
AM
10050+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10051+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
10052+ test_empty = do_test_empty;
10053+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10054+ test_empty = sio_test_empty;
1facf9fc 10055+ arg.bindex = bstart;
076b876e 10056+ err = test_empty(dentry, &arg);
1facf9fc 10057+ if (unlikely(err))
10058+ goto out_whlist;
10059+
10060+ au_fset_testempty(arg.flags, WHONLY);
10061+ btail = au_dbtaildir(dentry);
10062+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
10063+ struct dentry *h_dentry;
10064+
10065+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10066+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10067+ arg.bindex = bindex;
076b876e 10068+ err = test_empty(dentry, &arg);
1facf9fc 10069+ }
10070+ }
10071+
4f0767ce 10072+out_whlist:
1308ab2a 10073+ au_nhash_wh_free(&whlist);
4f0767ce 10074+out:
1facf9fc 10075+ return err;
10076+}
10077+
10078+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10079+{
10080+ int err;
392086de
AM
10081+ struct test_empty_arg arg = {
10082+ .ctx = {
2000de60 10083+ .actor = test_empty_cb
392086de
AM
10084+ }
10085+ };
1facf9fc 10086+ aufs_bindex_t bindex, btail;
10087+
10088+ err = 0;
1308ab2a 10089+ arg.whlist = whlist;
1facf9fc 10090+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10091+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10092+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10093+ btail = au_dbtaildir(dentry);
10094+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
10095+ struct dentry *h_dentry;
10096+
10097+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10098+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10099+ arg.bindex = bindex;
10100+ err = sio_test_empty(dentry, &arg);
10101+ }
10102+ }
10103+
10104+ return err;
10105+}
10106+
10107+/* ---------------------------------------------------------------------- */
10108+
10109+const struct file_operations aufs_dir_fop = {
4a4d8108 10110+ .owner = THIS_MODULE,
027c5e7a 10111+ .llseek = default_llseek,
1facf9fc 10112+ .read = generic_read_dir,
392086de 10113+ .iterate = aufs_iterate,
1facf9fc 10114+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10115+#ifdef CONFIG_COMPAT
10116+ .compat_ioctl = aufs_compat_ioctl_dir,
10117+#endif
1facf9fc 10118+ .open = aufs_open_dir,
10119+ .release = aufs_release_dir,
4a4d8108 10120+ .flush = aufs_flush_dir,
1facf9fc 10121+ .fsync = aufs_fsync_dir
10122+};
7f207e10
AM
10123diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10124--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 10125+++ linux/fs/aufs/dir.h 2015-09-24 10:47:58.251386326 +0200
b912730e 10126@@ -0,0 +1,131 @@
1facf9fc 10127+/*
2000de60 10128+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 10129+ *
10130+ * This program, aufs is free software; you can redistribute it and/or modify
10131+ * it under the terms of the GNU General Public License as published by
10132+ * the Free Software Foundation; either version 2 of the License, or
10133+ * (at your option) any later version.
dece6358
AM
10134+ *
10135+ * This program is distributed in the hope that it will be useful,
10136+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10137+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10138+ * GNU General Public License for more details.
10139+ *
10140+ * You should have received a copy of the GNU General Public License
523b37e3 10141+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10142+ */
10143+
10144+/*
10145+ * directory operations
10146+ */
10147+
10148+#ifndef __AUFS_DIR_H__
10149+#define __AUFS_DIR_H__
10150+
10151+#ifdef __KERNEL__
10152+
10153+#include <linux/fs.h>
1facf9fc 10154+
10155+/* ---------------------------------------------------------------------- */
10156+
10157+/* need to be faster and smaller */
10158+
10159+struct au_nhash {
dece6358
AM
10160+ unsigned int nh_num;
10161+ struct hlist_head *nh_head;
1facf9fc 10162+};
10163+
10164+struct au_vdir_destr {
10165+ unsigned char len;
10166+ unsigned char name[0];
10167+} __packed;
10168+
10169+struct au_vdir_dehstr {
10170+ struct hlist_node hash;
10171+ struct au_vdir_destr *str;
4a4d8108 10172+} ____cacheline_aligned_in_smp;
1facf9fc 10173+
10174+struct au_vdir_de {
10175+ ino_t de_ino;
10176+ unsigned char de_type;
10177+ /* caution: packed */
10178+ struct au_vdir_destr de_str;
10179+} __packed;
10180+
10181+struct au_vdir_wh {
10182+ struct hlist_node wh_hash;
dece6358
AM
10183+#ifdef CONFIG_AUFS_SHWH
10184+ ino_t wh_ino;
1facf9fc 10185+ aufs_bindex_t wh_bindex;
dece6358
AM
10186+ unsigned char wh_type;
10187+#else
10188+ aufs_bindex_t wh_bindex;
10189+#endif
10190+ /* caution: packed */
1facf9fc 10191+ struct au_vdir_destr wh_str;
10192+} __packed;
10193+
10194+union au_vdir_deblk_p {
10195+ unsigned char *deblk;
10196+ struct au_vdir_de *de;
10197+};
10198+
10199+struct au_vdir {
10200+ unsigned char **vd_deblk;
10201+ unsigned long vd_nblk;
1facf9fc 10202+ struct {
10203+ unsigned long ul;
10204+ union au_vdir_deblk_p p;
10205+ } vd_last;
10206+
10207+ unsigned long vd_version;
dece6358 10208+ unsigned int vd_deblk_sz;
1facf9fc 10209+ unsigned long vd_jiffy;
4a4d8108 10210+} ____cacheline_aligned_in_smp;
1facf9fc 10211+
10212+/* ---------------------------------------------------------------------- */
10213+
10214+/* dir.c */
10215+extern const struct file_operations aufs_dir_fop;
10216+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10217+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10218+loff_t au_dir_size(struct file *file, struct dentry *dentry);
b912730e 10219+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10220+int au_test_empty_lower(struct dentry *dentry);
10221+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10222+
10223+/* vdir.c */
1308ab2a 10224+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10225+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10226+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10227+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10228+ int limit);
dece6358
AM
10229+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10230+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10231+ unsigned int d_type, aufs_bindex_t bindex,
10232+ unsigned char shwh);
1facf9fc 10233+void au_vdir_free(struct au_vdir *vdir);
10234+int au_vdir_init(struct file *file);
392086de 10235+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10236+
10237+/* ioctl.c */
10238+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10239+
1308ab2a 10240+#ifdef CONFIG_AUFS_RDU
10241+/* rdu.c */
10242+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10243+#ifdef CONFIG_COMPAT
10244+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10245+ unsigned long arg);
10246+#endif
1308ab2a 10247+#else
c1595e42
JR
10248+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10249+ unsigned int cmd, unsigned long arg)
b752ccd1 10250+#ifdef CONFIG_COMPAT
c1595e42
JR
10251+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10252+ unsigned int cmd, unsigned long arg)
b752ccd1 10253+#endif
1308ab2a 10254+#endif
10255+
1facf9fc 10256+#endif /* __KERNEL__ */
10257+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
10258diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10259--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 10260+++ linux/fs/aufs/dynop.c 2015-09-24 10:47:58.251386326 +0200
7e9cd9fe 10261@@ -0,0 +1,369 @@
1facf9fc 10262+/*
2000de60 10263+ * Copyright (C) 2010-2015 Junjiro R. Okajima
1facf9fc 10264+ *
10265+ * This program, aufs is free software; you can redistribute it and/or modify
10266+ * it under the terms of the GNU General Public License as published by
10267+ * the Free Software Foundation; either version 2 of the License, or
10268+ * (at your option) any later version.
dece6358
AM
10269+ *
10270+ * This program is distributed in the hope that it will be useful,
10271+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10272+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10273+ * GNU General Public License for more details.
10274+ *
10275+ * You should have received a copy of the GNU General Public License
523b37e3 10276+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10277+ */
10278+
10279+/*
4a4d8108 10280+ * dynamically customizable operations for regular files
1facf9fc 10281+ */
10282+
1facf9fc 10283+#include "aufs.h"
10284+
4a4d8108 10285+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 10286+
4a4d8108
AM
10287+/*
10288+ * How large will these lists be?
10289+ * Usually just a few elements, 20-30 at most for each, I guess.
10290+ */
10291+static struct au_splhead dynop[AuDyLast];
10292+
10293+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 10294+{
4a4d8108
AM
10295+ struct au_dykey *key, *tmp;
10296+ struct list_head *head;
1facf9fc 10297+
4a4d8108
AM
10298+ key = NULL;
10299+ head = &spl->head;
10300+ rcu_read_lock();
10301+ list_for_each_entry_rcu(tmp, head, dk_list)
10302+ if (tmp->dk_op.dy_hop == h_op) {
10303+ key = tmp;
10304+ kref_get(&key->dk_kref);
10305+ break;
10306+ }
10307+ rcu_read_unlock();
10308+
10309+ return key;
1facf9fc 10310+}
10311+
4a4d8108 10312+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 10313+{
4a4d8108
AM
10314+ struct au_dykey **k, *found;
10315+ const void *h_op = key->dk_op.dy_hop;
10316+ int i;
1facf9fc 10317+
4a4d8108
AM
10318+ found = NULL;
10319+ k = br->br_dykey;
10320+ for (i = 0; i < AuBrDynOp; i++)
10321+ if (k[i]) {
10322+ if (k[i]->dk_op.dy_hop == h_op) {
10323+ found = k[i];
10324+ break;
10325+ }
10326+ } else
10327+ break;
10328+ if (!found) {
10329+ spin_lock(&br->br_dykey_lock);
10330+ for (; i < AuBrDynOp; i++)
10331+ if (k[i]) {
10332+ if (k[i]->dk_op.dy_hop == h_op) {
10333+ found = k[i];
10334+ break;
10335+ }
10336+ } else {
10337+ k[i] = key;
10338+ break;
10339+ }
10340+ spin_unlock(&br->br_dykey_lock);
10341+ BUG_ON(i == AuBrDynOp); /* expand the array */
10342+ }
10343+
10344+ return found;
1facf9fc 10345+}
10346+
4a4d8108
AM
10347+/* kref_get() if @key is already added */
10348+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
10349+{
10350+ struct au_dykey *tmp, *found;
10351+ struct list_head *head;
10352+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 10353+
4a4d8108
AM
10354+ found = NULL;
10355+ head = &spl->head;
10356+ spin_lock(&spl->spin);
10357+ list_for_each_entry(tmp, head, dk_list)
10358+ if (tmp->dk_op.dy_hop == h_op) {
10359+ kref_get(&tmp->dk_kref);
10360+ found = tmp;
10361+ break;
10362+ }
10363+ if (!found)
10364+ list_add_rcu(&key->dk_list, head);
10365+ spin_unlock(&spl->spin);
1facf9fc 10366+
4a4d8108
AM
10367+ if (!found)
10368+ DyPrSym(key);
10369+ return found;
10370+}
10371+
10372+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 10373+{
4a4d8108
AM
10374+ struct au_dykey *key;
10375+
10376+ key = container_of(rcu, struct au_dykey, dk_rcu);
10377+ DyPrSym(key);
10378+ kfree(key);
1facf9fc 10379+}
10380+
4a4d8108
AM
10381+static void dy_free(struct kref *kref)
10382+{
10383+ struct au_dykey *key;
10384+ struct au_splhead *spl;
1facf9fc 10385+
4a4d8108
AM
10386+ key = container_of(kref, struct au_dykey, dk_kref);
10387+ spl = dynop + key->dk_op.dy_type;
10388+ au_spl_del_rcu(&key->dk_list, spl);
10389+ call_rcu(&key->dk_rcu, dy_free_rcu);
10390+}
10391+
10392+void au_dy_put(struct au_dykey *key)
1facf9fc 10393+{
4a4d8108
AM
10394+ kref_put(&key->dk_kref, dy_free);
10395+}
1facf9fc 10396+
4a4d8108
AM
10397+/* ---------------------------------------------------------------------- */
10398+
10399+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10400+
10401+#ifdef CONFIG_AUFS_DEBUG
10402+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 10403+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
10404+#else
10405+#define DyDbgDeclare(cnt) do {} while (0)
10406+#define DyDbgInc(cnt) do {} while (0)
10407+#endif
10408+
10409+#define DySet(func, dst, src, h_op, h_sb) do { \
10410+ DyDbgInc(cnt); \
10411+ if (h_op->func) { \
10412+ if (src.func) \
10413+ dst.func = src.func; \
10414+ else \
10415+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10416+ } \
10417+} while (0)
10418+
10419+#define DySetForce(func, dst, src) do { \
10420+ AuDebugOn(!src.func); \
10421+ DyDbgInc(cnt); \
10422+ dst.func = src.func; \
10423+} while (0)
10424+
10425+#define DySetAop(func) \
10426+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10427+#define DySetAopForce(func) \
10428+ DySetForce(func, dyaop->da_op, aufs_aop)
10429+
10430+static void dy_aop(struct au_dykey *key, const void *h_op,
10431+ struct super_block *h_sb __maybe_unused)
10432+{
10433+ struct au_dyaop *dyaop = (void *)key;
10434+ const struct address_space_operations *h_aop = h_op;
10435+ DyDbgDeclare(cnt);
10436+
10437+ AuDbg("%s\n", au_sbtype(h_sb));
10438+
10439+ DySetAop(writepage);
10440+ DySetAopForce(readpage); /* force */
4a4d8108
AM
10441+ DySetAop(writepages);
10442+ DySetAop(set_page_dirty);
10443+ DySetAop(readpages);
10444+ DySetAop(write_begin);
10445+ DySetAop(write_end);
10446+ DySetAop(bmap);
10447+ DySetAop(invalidatepage);
10448+ DySetAop(releasepage);
027c5e7a 10449+ DySetAop(freepage);
7e9cd9fe 10450+ /* this one will be changed according to an aufs mount option */
4a4d8108 10451+ DySetAop(direct_IO);
4a4d8108
AM
10452+ DySetAop(migratepage);
10453+ DySetAop(launder_page);
10454+ DySetAop(is_partially_uptodate);
392086de 10455+ DySetAop(is_dirty_writeback);
4a4d8108 10456+ DySetAop(error_remove_page);
b4510431
AM
10457+ DySetAop(swap_activate);
10458+ DySetAop(swap_deactivate);
4a4d8108
AM
10459+
10460+ DyDbgSize(cnt, *h_aop);
4a4d8108
AM
10461+}
10462+
4a4d8108
AM
10463+/* ---------------------------------------------------------------------- */
10464+
10465+static void dy_bug(struct kref *kref)
10466+{
10467+ BUG();
10468+}
10469+
10470+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10471+{
10472+ struct au_dykey *key, *old;
10473+ struct au_splhead *spl;
b752ccd1 10474+ struct op {
4a4d8108 10475+ unsigned int sz;
b752ccd1
AM
10476+ void (*set)(struct au_dykey *key, const void *h_op,
10477+ struct super_block *h_sb __maybe_unused);
10478+ };
10479+ static const struct op a[] = {
4a4d8108
AM
10480+ [AuDy_AOP] = {
10481+ .sz = sizeof(struct au_dyaop),
b752ccd1 10482+ .set = dy_aop
4a4d8108 10483+ }
b752ccd1
AM
10484+ };
10485+ const struct op *p;
4a4d8108
AM
10486+
10487+ spl = dynop + op->dy_type;
10488+ key = dy_gfind_get(spl, op->dy_hop);
10489+ if (key)
10490+ goto out_add; /* success */
10491+
10492+ p = a + op->dy_type;
10493+ key = kzalloc(p->sz, GFP_NOFS);
10494+ if (unlikely(!key)) {
10495+ key = ERR_PTR(-ENOMEM);
10496+ goto out;
10497+ }
10498+
10499+ key->dk_op.dy_hop = op->dy_hop;
10500+ kref_init(&key->dk_kref);
86dc4139 10501+ p->set(key, op->dy_hop, au_br_sb(br));
4a4d8108
AM
10502+ old = dy_gadd(spl, key);
10503+ if (old) {
10504+ kfree(key);
10505+ key = old;
10506+ }
10507+
10508+out_add:
10509+ old = dy_bradd(br, key);
10510+ if (old)
10511+ /* its ref-count should never be zero here */
10512+ kref_put(&key->dk_kref, dy_bug);
10513+out:
10514+ return key;
10515+}
10516+
10517+/* ---------------------------------------------------------------------- */
10518+/*
10519+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
c1595e42 10520+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
4a4d8108
AM
10521+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10522+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10523+ * See the aufs manual in detail.
4a4d8108
AM
10524+ */
10525+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10526+{
7e9cd9fe 10527+ if (!do_dx)
4a4d8108 10528+ dyaop->da_op.direct_IO = NULL;
7e9cd9fe 10529+ else
4a4d8108 10530+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
4a4d8108
AM
10531+}
10532+
10533+static struct au_dyaop *dy_aget(struct au_branch *br,
10534+ const struct address_space_operations *h_aop,
10535+ int do_dx)
10536+{
10537+ struct au_dyaop *dyaop;
10538+ struct au_dynop op;
10539+
10540+ op.dy_type = AuDy_AOP;
10541+ op.dy_haop = h_aop;
10542+ dyaop = (void *)dy_get(&op, br);
10543+ if (IS_ERR(dyaop))
10544+ goto out;
10545+ dy_adx(dyaop, do_dx);
10546+
10547+out:
10548+ return dyaop;
10549+}
10550+
10551+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10552+ struct inode *h_inode)
10553+{
10554+ int err, do_dx;
10555+ struct super_block *sb;
10556+ struct au_branch *br;
10557+ struct au_dyaop *dyaop;
10558+
10559+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10560+ IiMustWriteLock(inode);
10561+
10562+ sb = inode->i_sb;
10563+ br = au_sbr(sb, bindex);
10564+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10565+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10566+ err = PTR_ERR(dyaop);
10567+ if (IS_ERR(dyaop))
10568+ /* unnecessary to call dy_fput() */
10569+ goto out;
10570+
10571+ err = 0;
10572+ inode->i_mapping->a_ops = &dyaop->da_op;
10573+
10574+out:
10575+ return err;
10576+}
10577+
b752ccd1
AM
10578+/*
10579+ * Is it safe to replace a_ops during the inode/file is in operation?
10580+ * Yes, I hope so.
10581+ */
10582+int au_dy_irefresh(struct inode *inode)
10583+{
10584+ int err;
10585+ aufs_bindex_t bstart;
10586+ struct inode *h_inode;
10587+
10588+ err = 0;
10589+ if (S_ISREG(inode->i_mode)) {
10590+ bstart = au_ibstart(inode);
10591+ h_inode = au_h_iptr(inode, bstart);
10592+ err = au_dy_iaop(inode, bstart, h_inode);
10593+ }
10594+ return err;
10595+}
10596+
4a4d8108
AM
10597+void au_dy_arefresh(int do_dx)
10598+{
10599+ struct au_splhead *spl;
10600+ struct list_head *head;
10601+ struct au_dykey *key;
10602+
10603+ spl = dynop + AuDy_AOP;
10604+ head = &spl->head;
10605+ spin_lock(&spl->spin);
10606+ list_for_each_entry(key, head, dk_list)
10607+ dy_adx((void *)key, do_dx);
10608+ spin_unlock(&spl->spin);
10609+}
10610+
4a4d8108
AM
10611+/* ---------------------------------------------------------------------- */
10612+
10613+void __init au_dy_init(void)
10614+{
10615+ int i;
10616+
10617+ /* make sure that 'struct au_dykey *' can be any type */
10618+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
10619+
10620+ for (i = 0; i < AuDyLast; i++)
10621+ au_spl_init(dynop + i);
10622+}
10623+
10624+void au_dy_fin(void)
10625+{
10626+ int i;
10627+
10628+ for (i = 0; i < AuDyLast; i++)
10629+ WARN_ON(!list_empty(&dynop[i].head));
10630+}
7f207e10
AM
10631diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10632--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 10633+++ linux/fs/aufs/dynop.h 2015-09-24 10:47:58.251386326 +0200
7e9cd9fe 10634@@ -0,0 +1,74 @@
4a4d8108 10635+/*
2000de60 10636+ * Copyright (C) 2010-2015 Junjiro R. Okajima
4a4d8108
AM
10637+ *
10638+ * This program, aufs is free software; you can redistribute it and/or modify
10639+ * it under the terms of the GNU General Public License as published by
10640+ * the Free Software Foundation; either version 2 of the License, or
10641+ * (at your option) any later version.
10642+ *
10643+ * This program is distributed in the hope that it will be useful,
10644+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10645+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10646+ * GNU General Public License for more details.
10647+ *
10648+ * You should have received a copy of the GNU General Public License
523b37e3 10649+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10650+ */
10651+
10652+/*
10653+ * dynamically customizable operations (for regular files only)
10654+ */
10655+
10656+#ifndef __AUFS_DYNOP_H__
10657+#define __AUFS_DYNOP_H__
10658+
10659+#ifdef __KERNEL__
10660+
7e9cd9fe
AM
10661+#include <linux/fs.h>
10662+#include <linux/kref.h>
4a4d8108 10663+
2cbb1c4b 10664+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
10665+
10666+struct au_dynop {
10667+ int dy_type;
10668+ union {
10669+ const void *dy_hop;
10670+ const struct address_space_operations *dy_haop;
4a4d8108
AM
10671+ };
10672+};
10673+
10674+struct au_dykey {
10675+ union {
10676+ struct list_head dk_list;
10677+ struct rcu_head dk_rcu;
10678+ };
10679+ struct au_dynop dk_op;
10680+
10681+ /*
10682+ * during I am in the branch local array, kref is gotten. when the
10683+ * branch is removed, kref is put.
10684+ */
10685+ struct kref dk_kref;
10686+};
10687+
10688+/* stop unioning since their sizes are very different from each other */
10689+struct au_dyaop {
10690+ struct au_dykey da_key;
10691+ struct address_space_operations da_op; /* not const */
4a4d8108
AM
10692+};
10693+
4a4d8108
AM
10694+/* ---------------------------------------------------------------------- */
10695+
10696+/* dynop.c */
10697+struct au_branch;
10698+void au_dy_put(struct au_dykey *key);
10699+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10700+ struct inode *h_inode);
b752ccd1 10701+int au_dy_irefresh(struct inode *inode);
4a4d8108 10702+void au_dy_arefresh(int do_dio);
4a4d8108
AM
10703+
10704+void __init au_dy_init(void);
10705+void au_dy_fin(void);
10706+
4a4d8108
AM
10707+#endif /* __KERNEL__ */
10708+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
10709diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10710--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 10711+++ linux/fs/aufs/export.c 2015-09-24 10:47:58.251386326 +0200
5527c038 10712@@ -0,0 +1,832 @@
4a4d8108 10713+/*
2000de60 10714+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
10715+ *
10716+ * This program, aufs is free software; you can redistribute it and/or modify
10717+ * it under the terms of the GNU General Public License as published by
10718+ * the Free Software Foundation; either version 2 of the License, or
10719+ * (at your option) any later version.
10720+ *
10721+ * This program is distributed in the hope that it will be useful,
10722+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10723+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10724+ * GNU General Public License for more details.
10725+ *
10726+ * You should have received a copy of the GNU General Public License
523b37e3 10727+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10728+ */
10729+
10730+/*
10731+ * export via nfs
10732+ */
10733+
10734+#include <linux/exportfs.h>
7eafdf33 10735+#include <linux/fs_struct.h>
4a4d8108
AM
10736+#include <linux/namei.h>
10737+#include <linux/nsproxy.h>
10738+#include <linux/random.h>
10739+#include <linux/writeback.h>
7eafdf33 10740+#include "../fs/mount.h"
4a4d8108
AM
10741+#include "aufs.h"
10742+
10743+union conv {
10744+#ifdef CONFIG_AUFS_INO_T_64
10745+ __u32 a[2];
10746+#else
10747+ __u32 a[1];
10748+#endif
10749+ ino_t ino;
10750+};
10751+
10752+static ino_t decode_ino(__u32 *a)
10753+{
10754+ union conv u;
10755+
10756+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
10757+ u.a[0] = a[0];
10758+#ifdef CONFIG_AUFS_INO_T_64
10759+ u.a[1] = a[1];
10760+#endif
10761+ return u.ino;
10762+}
10763+
10764+static void encode_ino(__u32 *a, ino_t ino)
10765+{
10766+ union conv u;
10767+
10768+ u.ino = ino;
10769+ a[0] = u.a[0];
10770+#ifdef CONFIG_AUFS_INO_T_64
10771+ a[1] = u.a[1];
10772+#endif
10773+}
10774+
10775+/* NFS file handle */
10776+enum {
10777+ Fh_br_id,
10778+ Fh_sigen,
10779+#ifdef CONFIG_AUFS_INO_T_64
10780+ /* support 64bit inode number */
10781+ Fh_ino1,
10782+ Fh_ino2,
10783+ Fh_dir_ino1,
10784+ Fh_dir_ino2,
10785+#else
10786+ Fh_ino1,
10787+ Fh_dir_ino1,
10788+#endif
10789+ Fh_igen,
10790+ Fh_h_type,
10791+ Fh_tail,
10792+
10793+ Fh_ino = Fh_ino1,
10794+ Fh_dir_ino = Fh_dir_ino1
10795+};
10796+
10797+static int au_test_anon(struct dentry *dentry)
10798+{
027c5e7a 10799+ /* note: read d_flags without d_lock */
4a4d8108
AM
10800+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
10801+}
10802+
a2a7ad62
AM
10803+int au_test_nfsd(void)
10804+{
10805+ int ret;
10806+ struct task_struct *tsk = current;
10807+ char comm[sizeof(tsk->comm)];
10808+
10809+ ret = 0;
10810+ if (tsk->flags & PF_KTHREAD) {
10811+ get_task_comm(comm, tsk);
10812+ ret = !strcmp(comm, "nfsd");
10813+ }
10814+
10815+ return ret;
10816+}
10817+
4a4d8108
AM
10818+/* ---------------------------------------------------------------------- */
10819+/* inode generation external table */
10820+
b752ccd1 10821+void au_xigen_inc(struct inode *inode)
4a4d8108 10822+{
4a4d8108
AM
10823+ loff_t pos;
10824+ ssize_t sz;
10825+ __u32 igen;
10826+ struct super_block *sb;
10827+ struct au_sbinfo *sbinfo;
10828+
4a4d8108 10829+ sb = inode->i_sb;
b752ccd1 10830+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 10831+
b752ccd1 10832+ sbinfo = au_sbi(sb);
1facf9fc 10833+ pos = inode->i_ino;
10834+ pos *= sizeof(igen);
10835+ igen = inode->i_generation + 1;
1facf9fc 10836+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
10837+ sizeof(igen), &pos);
10838+ if (sz == sizeof(igen))
b752ccd1 10839+ return; /* success */
1facf9fc 10840+
b752ccd1 10841+ if (unlikely(sz >= 0))
1facf9fc 10842+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 10843+}
10844+
10845+int au_xigen_new(struct inode *inode)
10846+{
10847+ int err;
10848+ loff_t pos;
10849+ ssize_t sz;
10850+ struct super_block *sb;
10851+ struct au_sbinfo *sbinfo;
10852+ struct file *file;
10853+
10854+ err = 0;
10855+ /* todo: dirty, at mount time */
10856+ if (inode->i_ino == AUFS_ROOT_INO)
10857+ goto out;
10858+ sb = inode->i_sb;
dece6358 10859+ SiMustAnyLock(sb);
1facf9fc 10860+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
10861+ goto out;
10862+
10863+ err = -EFBIG;
10864+ pos = inode->i_ino;
10865+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
10866+ AuIOErr1("too large i%lld\n", pos);
10867+ goto out;
10868+ }
10869+ pos *= sizeof(inode->i_generation);
10870+
10871+ err = 0;
10872+ sbinfo = au_sbi(sb);
10873+ file = sbinfo->si_xigen;
10874+ BUG_ON(!file);
10875+
c06a8ce3 10876+ if (vfsub_f_size_read(file)
1facf9fc 10877+ < pos + sizeof(inode->i_generation)) {
10878+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
10879+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
10880+ sizeof(inode->i_generation), &pos);
10881+ } else
10882+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
10883+ sizeof(inode->i_generation), &pos);
10884+ if (sz == sizeof(inode->i_generation))
10885+ goto out; /* success */
10886+
10887+ err = sz;
10888+ if (unlikely(sz >= 0)) {
10889+ err = -EIO;
10890+ AuIOErr("xigen error (%zd)\n", sz);
10891+ }
10892+
4f0767ce 10893+out:
1facf9fc 10894+ return err;
10895+}
10896+
10897+int au_xigen_set(struct super_block *sb, struct file *base)
10898+{
10899+ int err;
10900+ struct au_sbinfo *sbinfo;
10901+ struct file *file;
10902+
dece6358
AM
10903+ SiMustWriteLock(sb);
10904+
1facf9fc 10905+ sbinfo = au_sbi(sb);
10906+ file = au_xino_create2(base, sbinfo->si_xigen);
10907+ err = PTR_ERR(file);
10908+ if (IS_ERR(file))
10909+ goto out;
10910+ err = 0;
10911+ if (sbinfo->si_xigen)
10912+ fput(sbinfo->si_xigen);
10913+ sbinfo->si_xigen = file;
10914+
4f0767ce 10915+out:
1facf9fc 10916+ return err;
10917+}
10918+
10919+void au_xigen_clr(struct super_block *sb)
10920+{
10921+ struct au_sbinfo *sbinfo;
10922+
dece6358
AM
10923+ SiMustWriteLock(sb);
10924+
1facf9fc 10925+ sbinfo = au_sbi(sb);
10926+ if (sbinfo->si_xigen) {
10927+ fput(sbinfo->si_xigen);
10928+ sbinfo->si_xigen = NULL;
10929+ }
10930+}
10931+
10932+/* ---------------------------------------------------------------------- */
10933+
10934+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
10935+ ino_t dir_ino)
10936+{
10937+ struct dentry *dentry, *d;
10938+ struct inode *inode;
10939+ unsigned int sigen;
10940+
10941+ dentry = NULL;
10942+ inode = ilookup(sb, ino);
10943+ if (!inode)
10944+ goto out;
10945+
10946+ dentry = ERR_PTR(-ESTALE);
10947+ sigen = au_sigen(sb);
10948+ if (unlikely(is_bad_inode(inode)
10949+ || IS_DEADDIR(inode)
537831f9 10950+ || sigen != au_iigen(inode, NULL)))
1facf9fc 10951+ goto out_iput;
10952+
10953+ dentry = NULL;
10954+ if (!dir_ino || S_ISDIR(inode->i_mode))
10955+ dentry = d_find_alias(inode);
10956+ else {
027c5e7a 10957+ spin_lock(&inode->i_lock);
c1595e42 10958+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 10959+ spin_lock(&d->d_lock);
1facf9fc 10960+ if (!au_test_anon(d)
5527c038 10961+ && d_inode(d->d_parent)->i_ino == dir_ino) {
027c5e7a
AM
10962+ dentry = dget_dlock(d);
10963+ spin_unlock(&d->d_lock);
1facf9fc 10964+ break;
10965+ }
027c5e7a
AM
10966+ spin_unlock(&d->d_lock);
10967+ }
10968+ spin_unlock(&inode->i_lock);
1facf9fc 10969+ }
027c5e7a 10970+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 10971+ /* need to refresh */
1facf9fc 10972+ dput(dentry);
2cbb1c4b 10973+ dentry = NULL;
1facf9fc 10974+ }
10975+
4f0767ce 10976+out_iput:
1facf9fc 10977+ iput(inode);
4f0767ce 10978+out:
2cbb1c4b 10979+ AuTraceErrPtr(dentry);
1facf9fc 10980+ return dentry;
10981+}
10982+
10983+/* ---------------------------------------------------------------------- */
10984+
10985+/* todo: dirty? */
10986+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
10987+
10988+struct au_compare_mnt_args {
10989+ /* input */
10990+ struct super_block *sb;
10991+
10992+ /* output */
10993+ struct vfsmount *mnt;
10994+};
10995+
10996+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
10997+{
10998+ struct au_compare_mnt_args *a = arg;
10999+
11000+ if (mnt->mnt_sb != a->sb)
11001+ return 0;
11002+ a->mnt = mntget(mnt);
11003+ return 1;
11004+}
11005+
1facf9fc 11006+static struct vfsmount *au_mnt_get(struct super_block *sb)
11007+{
4a4d8108 11008+ int err;
7eafdf33 11009+ struct path root;
4a4d8108
AM
11010+ struct au_compare_mnt_args args = {
11011+ .sb = sb
11012+ };
1facf9fc 11013+
7eafdf33 11014+ get_fs_root(current->fs, &root);
523b37e3 11015+ rcu_read_lock();
7eafdf33 11016+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
523b37e3 11017+ rcu_read_unlock();
7eafdf33 11018+ path_put(&root);
4a4d8108
AM
11019+ AuDebugOn(!err);
11020+ AuDebugOn(!args.mnt);
11021+ return args.mnt;
1facf9fc 11022+}
11023+
11024+struct au_nfsd_si_lock {
4a4d8108 11025+ unsigned int sigen;
027c5e7a 11026+ aufs_bindex_t bindex, br_id;
1facf9fc 11027+ unsigned char force_lock;
11028+};
11029+
027c5e7a
AM
11030+static int si_nfsd_read_lock(struct super_block *sb,
11031+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11032+{
027c5e7a 11033+ int err;
1facf9fc 11034+ aufs_bindex_t bindex;
11035+
11036+ si_read_lock(sb, AuLock_FLUSH);
11037+
11038+ /* branch id may be wrapped around */
027c5e7a 11039+ err = 0;
1facf9fc 11040+ bindex = au_br_index(sb, nsi_lock->br_id);
11041+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11042+ goto out; /* success */
11043+
027c5e7a
AM
11044+ err = -ESTALE;
11045+ bindex = -1;
1facf9fc 11046+ if (!nsi_lock->force_lock)
11047+ si_read_unlock(sb);
1facf9fc 11048+
4f0767ce 11049+out:
027c5e7a
AM
11050+ nsi_lock->bindex = bindex;
11051+ return err;
1facf9fc 11052+}
11053+
11054+struct find_name_by_ino {
392086de 11055+ struct dir_context ctx;
1facf9fc 11056+ int called, found;
11057+ ino_t ino;
11058+ char *name;
11059+ int namelen;
11060+};
11061+
11062+static int
392086de
AM
11063+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11064+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 11065+{
392086de
AM
11066+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11067+ ctx);
1facf9fc 11068+
11069+ a->called++;
11070+ if (a->ino != ino)
11071+ return 0;
11072+
11073+ memcpy(a->name, name, namelen);
11074+ a->namelen = namelen;
11075+ a->found = 1;
11076+ return 1;
11077+}
11078+
11079+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11080+ struct au_nfsd_si_lock *nsi_lock)
11081+{
11082+ struct dentry *dentry, *parent;
11083+ struct file *file;
11084+ struct inode *dir;
392086de
AM
11085+ struct find_name_by_ino arg = {
11086+ .ctx = {
2000de60 11087+ .actor = find_name_by_ino
392086de
AM
11088+ }
11089+ };
1facf9fc 11090+ int err;
11091+
11092+ parent = path->dentry;
11093+ if (nsi_lock)
11094+ si_read_unlock(parent->d_sb);
4a4d8108 11095+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 11096+ dentry = (void *)file;
11097+ if (IS_ERR(file))
11098+ goto out;
11099+
11100+ dentry = ERR_PTR(-ENOMEM);
537831f9 11101+ arg.name = (void *)__get_free_page(GFP_NOFS);
1facf9fc 11102+ if (unlikely(!arg.name))
11103+ goto out_file;
11104+ arg.ino = ino;
11105+ arg.found = 0;
11106+ do {
11107+ arg.called = 0;
11108+ /* smp_mb(); */
392086de 11109+ err = vfsub_iterate_dir(file, &arg.ctx);
1facf9fc 11110+ } while (!err && !arg.found && arg.called);
11111+ dentry = ERR_PTR(err);
11112+ if (unlikely(err))
11113+ goto out_name;
1716fcea
AM
11114+ /* instead of ENOENT */
11115+ dentry = ERR_PTR(-ESTALE);
1facf9fc 11116+ if (!arg.found)
11117+ goto out_name;
11118+
b4510431 11119+ /* do not call vfsub_lkup_one() */
5527c038 11120+ dir = d_inode(parent);
1facf9fc 11121+ mutex_lock(&dir->i_mutex);
11122+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
11123+ mutex_unlock(&dir->i_mutex);
11124+ AuTraceErrPtr(dentry);
11125+ if (IS_ERR(dentry))
11126+ goto out_name;
11127+ AuDebugOn(au_test_anon(dentry));
5527c038 11128+ if (unlikely(d_really_is_negative(dentry))) {
1facf9fc 11129+ dput(dentry);
11130+ dentry = ERR_PTR(-ENOENT);
11131+ }
11132+
4f0767ce 11133+out_name:
537831f9 11134+ free_page((unsigned long)arg.name);
4f0767ce 11135+out_file:
1facf9fc 11136+ fput(file);
4f0767ce 11137+out:
1facf9fc 11138+ if (unlikely(nsi_lock
11139+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11140+ if (!IS_ERR(dentry)) {
11141+ dput(dentry);
11142+ dentry = ERR_PTR(-ESTALE);
11143+ }
11144+ AuTraceErrPtr(dentry);
11145+ return dentry;
11146+}
11147+
11148+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11149+ ino_t dir_ino,
11150+ struct au_nfsd_si_lock *nsi_lock)
11151+{
11152+ struct dentry *dentry;
11153+ struct path path;
11154+
11155+ if (dir_ino != AUFS_ROOT_INO) {
11156+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11157+ dentry = path.dentry;
11158+ if (!path.dentry || IS_ERR(path.dentry))
11159+ goto out;
11160+ AuDebugOn(au_test_anon(path.dentry));
11161+ } else
11162+ path.dentry = dget(sb->s_root);
11163+
11164+ path.mnt = au_mnt_get(sb);
11165+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11166+ path_put(&path);
11167+
4f0767ce 11168+out:
1facf9fc 11169+ AuTraceErrPtr(dentry);
11170+ return dentry;
11171+}
11172+
11173+/* ---------------------------------------------------------------------- */
11174+
11175+static int h_acceptable(void *expv, struct dentry *dentry)
11176+{
11177+ return 1;
11178+}
11179+
11180+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11181+ char *buf, int len, struct super_block *sb)
11182+{
11183+ char *p;
11184+ int n;
11185+ struct path path;
11186+
11187+ p = d_path(h_rootpath, buf, len);
11188+ if (IS_ERR(p))
11189+ goto out;
11190+ n = strlen(p);
11191+
11192+ path.mnt = h_rootpath->mnt;
11193+ path.dentry = h_parent;
11194+ p = d_path(&path, buf, len);
11195+ if (IS_ERR(p))
11196+ goto out;
11197+ if (n != 1)
11198+ p += n;
11199+
11200+ path.mnt = au_mnt_get(sb);
11201+ path.dentry = sb->s_root;
11202+ p = d_path(&path, buf, len - strlen(p));
11203+ mntput(path.mnt);
11204+ if (IS_ERR(p))
11205+ goto out;
11206+ if (n != 1)
11207+ p[strlen(p)] = '/';
11208+
4f0767ce 11209+out:
1facf9fc 11210+ AuTraceErrPtr(p);
11211+ return p;
11212+}
11213+
11214+static
027c5e7a
AM
11215+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11216+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11217+{
11218+ struct dentry *dentry, *h_parent, *root;
11219+ struct super_block *h_sb;
11220+ char *pathname, *p;
11221+ struct vfsmount *h_mnt;
11222+ struct au_branch *br;
11223+ int err;
11224+ struct path path;
11225+
027c5e7a 11226+ br = au_sbr(sb, nsi_lock->bindex);
86dc4139 11227+ h_mnt = au_br_mnt(br);
1facf9fc 11228+ h_sb = h_mnt->mnt_sb;
11229+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
11230+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11231+ fh_len - Fh_tail, fh[Fh_h_type],
11232+ h_acceptable, /*context*/NULL);
11233+ dentry = h_parent;
11234+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11235+ AuWarn1("%s decode_fh failed, %ld\n",
11236+ au_sbtype(h_sb), PTR_ERR(h_parent));
11237+ goto out;
11238+ }
11239+ dentry = NULL;
11240+ if (unlikely(au_test_anon(h_parent))) {
11241+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11242+ au_sbtype(h_sb));
11243+ goto out_h_parent;
11244+ }
11245+
11246+ dentry = ERR_PTR(-ENOMEM);
11247+ pathname = (void *)__get_free_page(GFP_NOFS);
11248+ if (unlikely(!pathname))
11249+ goto out_h_parent;
11250+
11251+ root = sb->s_root;
11252+ path.mnt = h_mnt;
11253+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 11254+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 11255+ di_read_unlock(root, !AuLock_IR);
11256+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11257+ dentry = (void *)p;
11258+ if (IS_ERR(p))
11259+ goto out_pathname;
11260+
11261+ si_read_unlock(sb);
11262+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11263+ dentry = ERR_PTR(err);
11264+ if (unlikely(err))
11265+ goto out_relock;
11266+
11267+ dentry = ERR_PTR(-ENOENT);
11268+ AuDebugOn(au_test_anon(path.dentry));
5527c038 11269+ if (unlikely(d_really_is_negative(path.dentry)))
1facf9fc 11270+ goto out_path;
11271+
5527c038 11272+ if (ino != d_inode(path.dentry)->i_ino)
1facf9fc 11273+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11274+ else
11275+ dentry = dget(path.dentry);
11276+
4f0767ce 11277+out_path:
1facf9fc 11278+ path_put(&path);
4f0767ce 11279+out_relock:
1facf9fc 11280+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11281+ if (!IS_ERR(dentry)) {
11282+ dput(dentry);
11283+ dentry = ERR_PTR(-ESTALE);
11284+ }
4f0767ce 11285+out_pathname:
1facf9fc 11286+ free_page((unsigned long)pathname);
4f0767ce 11287+out_h_parent:
1facf9fc 11288+ dput(h_parent);
4f0767ce 11289+out:
1facf9fc 11290+ AuTraceErrPtr(dentry);
11291+ return dentry;
11292+}
11293+
11294+/* ---------------------------------------------------------------------- */
11295+
11296+static struct dentry *
11297+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11298+ int fh_type)
11299+{
11300+ struct dentry *dentry;
11301+ __u32 *fh = fid->raw;
027c5e7a 11302+ struct au_branch *br;
1facf9fc 11303+ ino_t ino, dir_ino;
1facf9fc 11304+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 11305+ .force_lock = 0
11306+ };
11307+
1facf9fc 11308+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
11309+ /* it should never happen, but the file handle is unreliable */
11310+ if (unlikely(fh_len < Fh_tail))
11311+ goto out;
11312+ nsi_lock.sigen = fh[Fh_sigen];
11313+ nsi_lock.br_id = fh[Fh_br_id];
11314+
1facf9fc 11315+ /* branch id may be wrapped around */
027c5e7a
AM
11316+ br = NULL;
11317+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 11318+ goto out;
11319+ nsi_lock.force_lock = 1;
11320+
11321+ /* is this inode still cached? */
11322+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
11323+ /* it should never happen */
11324+ if (unlikely(ino == AUFS_ROOT_INO))
11325+ goto out;
11326+
1facf9fc 11327+ dir_ino = decode_ino(fh + Fh_dir_ino);
11328+ dentry = decode_by_ino(sb, ino, dir_ino);
11329+ if (IS_ERR(dentry))
11330+ goto out_unlock;
11331+ if (dentry)
11332+ goto accept;
11333+
11334+ /* is the parent dir cached? */
027c5e7a
AM
11335+ br = au_sbr(sb, nsi_lock.bindex);
11336+ atomic_inc(&br->br_count);
1facf9fc 11337+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11338+ if (IS_ERR(dentry))
11339+ goto out_unlock;
11340+ if (dentry)
11341+ goto accept;
11342+
11343+ /* lookup path */
027c5e7a 11344+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 11345+ if (IS_ERR(dentry))
11346+ goto out_unlock;
11347+ if (unlikely(!dentry))
11348+ /* todo?: make it ESTALE */
11349+ goto out_unlock;
11350+
4f0767ce 11351+accept:
027c5e7a 11352+ if (!au_digen_test(dentry, au_sigen(sb))
5527c038 11353+ && d_inode(dentry)->i_generation == fh[Fh_igen])
1facf9fc 11354+ goto out_unlock; /* success */
11355+
11356+ dput(dentry);
11357+ dentry = ERR_PTR(-ESTALE);
4f0767ce 11358+out_unlock:
027c5e7a
AM
11359+ if (br)
11360+ atomic_dec(&br->br_count);
1facf9fc 11361+ si_read_unlock(sb);
4f0767ce 11362+out:
1facf9fc 11363+ AuTraceErrPtr(dentry);
11364+ return dentry;
11365+}
11366+
11367+#if 0 /* reserved for future use */
11368+/* support subtreecheck option */
11369+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11370+ int fh_len, int fh_type)
11371+{
11372+ struct dentry *parent;
11373+ __u32 *fh = fid->raw;
11374+ ino_t dir_ino;
11375+
11376+ dir_ino = decode_ino(fh + Fh_dir_ino);
11377+ parent = decode_by_ino(sb, dir_ino, 0);
11378+ if (IS_ERR(parent))
11379+ goto out;
11380+ if (!parent)
11381+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11382+ dir_ino, fh, fh_len);
11383+
4f0767ce 11384+out:
1facf9fc 11385+ AuTraceErrPtr(parent);
11386+ return parent;
11387+}
11388+#endif
11389+
11390+/* ---------------------------------------------------------------------- */
11391+
0c3ec466
AM
11392+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11393+ struct inode *dir)
1facf9fc 11394+{
11395+ int err;
0c3ec466 11396+ aufs_bindex_t bindex;
1facf9fc 11397+ struct super_block *sb, *h_sb;
0c3ec466
AM
11398+ struct dentry *dentry, *parent, *h_parent;
11399+ struct inode *h_dir;
1facf9fc 11400+ struct au_branch *br;
11401+
1facf9fc 11402+ err = -ENOSPC;
11403+ if (unlikely(*max_len <= Fh_tail)) {
11404+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11405+ goto out;
11406+ }
11407+
11408+ err = FILEID_ROOT;
0c3ec466
AM
11409+ if (inode->i_ino == AUFS_ROOT_INO) {
11410+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
1facf9fc 11411+ goto out;
11412+ }
11413+
1facf9fc 11414+ h_parent = NULL;
0c3ec466
AM
11415+ sb = inode->i_sb;
11416+ err = si_read_lock(sb, AuLock_FLUSH);
027c5e7a
AM
11417+ if (unlikely(err))
11418+ goto out;
11419+
1facf9fc 11420+#ifdef CONFIG_AUFS_DEBUG
11421+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11422+ AuWarn1("NFS-exporting requires xino\n");
11423+#endif
027c5e7a 11424+ err = -EIO;
0c3ec466
AM
11425+ parent = NULL;
11426+ ii_read_lock_child(inode);
11427+ bindex = au_ibstart(inode);
11428+ if (!dir) {
c1595e42 11429+ dentry = d_find_any_alias(inode);
0c3ec466
AM
11430+ if (unlikely(!dentry))
11431+ goto out_unlock;
11432+ AuDebugOn(au_test_anon(dentry));
11433+ parent = dget_parent(dentry);
11434+ dput(dentry);
11435+ if (unlikely(!parent))
11436+ goto out_unlock;
5527c038
JR
11437+ if (d_really_is_positive(parent))
11438+ dir = d_inode(parent);
1facf9fc 11439+ }
0c3ec466
AM
11440+
11441+ ii_read_lock_parent(dir);
11442+ h_dir = au_h_iptr(dir, bindex);
11443+ ii_read_unlock(dir);
11444+ if (unlikely(!h_dir))
11445+ goto out_parent;
c1595e42 11446+ h_parent = d_find_any_alias(h_dir);
1facf9fc 11447+ if (unlikely(!h_parent))
0c3ec466 11448+ goto out_hparent;
1facf9fc 11449+
11450+ err = -EPERM;
11451+ br = au_sbr(sb, bindex);
86dc4139 11452+ h_sb = au_br_sb(br);
1facf9fc 11453+ if (unlikely(!h_sb->s_export_op)) {
11454+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
0c3ec466 11455+ goto out_hparent;
1facf9fc 11456+ }
11457+
11458+ fh[Fh_br_id] = br->br_id;
11459+ fh[Fh_sigen] = au_sigen(sb);
11460+ encode_ino(fh + Fh_ino, inode->i_ino);
0c3ec466 11461+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
1facf9fc 11462+ fh[Fh_igen] = inode->i_generation;
11463+
11464+ *max_len -= Fh_tail;
11465+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11466+ max_len,
11467+ /*connectable or subtreecheck*/0);
11468+ err = fh[Fh_h_type];
11469+ *max_len += Fh_tail;
11470+ /* todo: macros? */
1716fcea 11471+ if (err != FILEID_INVALID)
1facf9fc 11472+ err = 99;
11473+ else
11474+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11475+
0c3ec466 11476+out_hparent:
1facf9fc 11477+ dput(h_parent);
0c3ec466 11478+out_parent:
1facf9fc 11479+ dput(parent);
0c3ec466
AM
11480+out_unlock:
11481+ ii_read_unlock(inode);
11482+ si_read_unlock(sb);
4f0767ce 11483+out:
1facf9fc 11484+ if (unlikely(err < 0))
1716fcea 11485+ err = FILEID_INVALID;
1facf9fc 11486+ return err;
11487+}
11488+
11489+/* ---------------------------------------------------------------------- */
11490+
4a4d8108
AM
11491+static int aufs_commit_metadata(struct inode *inode)
11492+{
11493+ int err;
11494+ aufs_bindex_t bindex;
11495+ struct super_block *sb;
11496+ struct inode *h_inode;
11497+ int (*f)(struct inode *inode);
11498+
11499+ sb = inode->i_sb;
e49829fe 11500+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11501+ ii_write_lock_child(inode);
11502+ bindex = au_ibstart(inode);
11503+ AuDebugOn(bindex < 0);
11504+ h_inode = au_h_iptr(inode, bindex);
11505+
11506+ f = h_inode->i_sb->s_export_op->commit_metadata;
11507+ if (f)
11508+ err = f(h_inode);
11509+ else {
11510+ struct writeback_control wbc = {
11511+ .sync_mode = WB_SYNC_ALL,
11512+ .nr_to_write = 0 /* metadata only */
11513+ };
11514+
11515+ err = sync_inode(h_inode, &wbc);
11516+ }
11517+
11518+ au_cpup_attr_timesizes(inode);
11519+ ii_write_unlock(inode);
11520+ si_read_unlock(sb);
11521+ return err;
11522+}
11523+
11524+/* ---------------------------------------------------------------------- */
11525+
1facf9fc 11526+static struct export_operations aufs_export_op = {
4a4d8108 11527+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 11528+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
11529+ .encode_fh = aufs_encode_fh,
11530+ .commit_metadata = aufs_commit_metadata
1facf9fc 11531+};
11532+
11533+void au_export_init(struct super_block *sb)
11534+{
11535+ struct au_sbinfo *sbinfo;
11536+ __u32 u;
11537+
11538+ sb->s_export_op = &aufs_export_op;
11539+ sbinfo = au_sbi(sb);
11540+ sbinfo->si_xigen = NULL;
11541+ get_random_bytes(&u, sizeof(u));
11542+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11543+ atomic_set(&sbinfo->si_xigen_next, u);
11544+}
076b876e
AM
11545diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11546--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 11547+++ linux/fs/aufs/fhsm.c 2015-09-24 10:47:58.251386326 +0200
c1595e42 11548@@ -0,0 +1,426 @@
076b876e 11549+/*
2000de60 11550+ * Copyright (C) 2011-2015 Junjiro R. Okajima
076b876e
AM
11551+ *
11552+ * This program, aufs is free software; you can redistribute it and/or modify
11553+ * it under the terms of the GNU General Public License as published by
11554+ * the Free Software Foundation; either version 2 of the License, or
11555+ * (at your option) any later version.
11556+ *
11557+ * This program is distributed in the hope that it will be useful,
11558+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11559+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11560+ * GNU General Public License for more details.
11561+ *
11562+ * You should have received a copy of the GNU General Public License
11563+ * along with this program; if not, write to the Free Software
11564+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11565+ */
11566+
11567+/*
11568+ * File-based Hierarchy Storage Management
11569+ */
11570+
11571+#include <linux/anon_inodes.h>
11572+#include <linux/poll.h>
11573+#include <linux/seq_file.h>
11574+#include <linux/statfs.h>
11575+#include "aufs.h"
11576+
c1595e42
JR
11577+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11578+{
11579+ struct au_sbinfo *sbinfo;
11580+ struct au_fhsm *fhsm;
11581+
11582+ SiMustAnyLock(sb);
11583+
11584+ sbinfo = au_sbi(sb);
11585+ fhsm = &sbinfo->si_fhsm;
11586+ AuDebugOn(!fhsm);
11587+ return fhsm->fhsm_bottom;
11588+}
11589+
11590+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11591+{
11592+ struct au_sbinfo *sbinfo;
11593+ struct au_fhsm *fhsm;
11594+
11595+ SiMustWriteLock(sb);
11596+
11597+ sbinfo = au_sbi(sb);
11598+ fhsm = &sbinfo->si_fhsm;
11599+ AuDebugOn(!fhsm);
11600+ fhsm->fhsm_bottom = bindex;
11601+}
11602+
11603+/* ---------------------------------------------------------------------- */
11604+
076b876e
AM
11605+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11606+{
11607+ struct au_br_fhsm *bf;
11608+
11609+ bf = br->br_fhsm;
11610+ MtxMustLock(&bf->bf_lock);
11611+
11612+ return !bf->bf_readable
11613+ || time_after(jiffies,
11614+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11615+}
11616+
11617+/* ---------------------------------------------------------------------- */
11618+
11619+static void au_fhsm_notify(struct super_block *sb, int val)
11620+{
11621+ struct au_sbinfo *sbinfo;
11622+ struct au_fhsm *fhsm;
11623+
11624+ SiMustAnyLock(sb);
11625+
11626+ sbinfo = au_sbi(sb);
11627+ fhsm = &sbinfo->si_fhsm;
11628+ if (au_fhsm_pid(fhsm)
11629+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11630+ atomic_set(&fhsm->fhsm_readable, val);
11631+ if (val)
11632+ wake_up(&fhsm->fhsm_wqh);
11633+ }
11634+}
11635+
11636+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11637+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11638+{
11639+ int err;
11640+ struct au_branch *br;
11641+ struct au_br_fhsm *bf;
11642+
11643+ br = au_sbr(sb, bindex);
11644+ AuDebugOn(au_br_rdonly(br));
11645+ bf = br->br_fhsm;
11646+ AuDebugOn(!bf);
11647+
11648+ if (do_lock)
11649+ mutex_lock(&bf->bf_lock);
11650+ else
11651+ MtxMustLock(&bf->bf_lock);
11652+
11653+ /* sb->s_root for NFS is unreliable */
11654+ err = au_br_stfs(br, &bf->bf_stfs);
11655+ if (unlikely(err)) {
11656+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11657+ goto out;
11658+ }
11659+
11660+ bf->bf_jiffy = jiffies;
11661+ bf->bf_readable = 1;
11662+ if (do_notify)
11663+ au_fhsm_notify(sb, /*val*/1);
11664+ if (rstfs)
11665+ *rstfs = bf->bf_stfs;
11666+
11667+out:
11668+ if (do_lock)
11669+ mutex_unlock(&bf->bf_lock);
11670+ au_fhsm_notify(sb, /*val*/1);
11671+
11672+ return err;
11673+}
11674+
11675+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11676+{
11677+ int err;
076b876e
AM
11678+ struct au_sbinfo *sbinfo;
11679+ struct au_fhsm *fhsm;
11680+ struct au_branch *br;
11681+ struct au_br_fhsm *bf;
11682+
11683+ AuDbg("b%d, force %d\n", bindex, force);
11684+ SiMustAnyLock(sb);
11685+
11686+ sbinfo = au_sbi(sb);
11687+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
11688+ if (!au_ftest_si(sbinfo, FHSM)
11689+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
11690+ return;
11691+
11692+ br = au_sbr(sb, bindex);
11693+ bf = br->br_fhsm;
11694+ AuDebugOn(!bf);
11695+ mutex_lock(&bf->bf_lock);
11696+ if (force
11697+ || au_fhsm_pid(fhsm)
11698+ || au_fhsm_test_jiffy(sbinfo, br))
11699+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11700+ /*do_notify*/1);
11701+ mutex_unlock(&bf->bf_lock);
11702+}
11703+
11704+void au_fhsm_wrote_all(struct super_block *sb, int force)
11705+{
11706+ aufs_bindex_t bindex, bend;
11707+ struct au_branch *br;
11708+
11709+ /* exclude the bottom */
c1595e42 11710+ bend = au_fhsm_bottom(sb);
076b876e
AM
11711+ for (bindex = 0; bindex < bend; bindex++) {
11712+ br = au_sbr(sb, bindex);
11713+ if (au_br_fhsm(br->br_perm))
11714+ au_fhsm_wrote(sb, bindex, force);
11715+ }
11716+}
11717+
11718+/* ---------------------------------------------------------------------- */
11719+
11720+static unsigned int au_fhsm_poll(struct file *file,
11721+ struct poll_table_struct *wait)
11722+{
11723+ unsigned int mask;
11724+ struct au_sbinfo *sbinfo;
11725+ struct au_fhsm *fhsm;
11726+
11727+ mask = 0;
11728+ sbinfo = file->private_data;
11729+ fhsm = &sbinfo->si_fhsm;
11730+ poll_wait(file, &fhsm->fhsm_wqh, wait);
11731+ if (atomic_read(&fhsm->fhsm_readable))
11732+ mask = POLLIN /* | POLLRDNORM */;
11733+
11734+ AuTraceErr((int)mask);
11735+ return mask;
11736+}
11737+
11738+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
11739+ struct aufs_stfs *stfs, __s16 brid)
11740+{
11741+ int err;
11742+
11743+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
11744+ if (!err)
11745+ err = __put_user(brid, &stbr->brid);
11746+ if (unlikely(err))
11747+ err = -EFAULT;
11748+
11749+ return err;
11750+}
11751+
11752+static ssize_t au_fhsm_do_read(struct super_block *sb,
11753+ struct aufs_stbr __user *stbr, size_t count)
11754+{
11755+ ssize_t err;
11756+ int nstbr;
11757+ aufs_bindex_t bindex, bend;
11758+ struct au_branch *br;
11759+ struct au_br_fhsm *bf;
11760+
11761+ /* except the bottom branch */
11762+ err = 0;
11763+ nstbr = 0;
c1595e42 11764+ bend = au_fhsm_bottom(sb);
076b876e
AM
11765+ for (bindex = 0; !err && bindex < bend; bindex++) {
11766+ br = au_sbr(sb, bindex);
11767+ if (!au_br_fhsm(br->br_perm))
11768+ continue;
11769+
11770+ bf = br->br_fhsm;
11771+ mutex_lock(&bf->bf_lock);
11772+ if (bf->bf_readable) {
11773+ err = -EFAULT;
11774+ if (count >= sizeof(*stbr))
11775+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
11776+ br->br_id);
11777+ if (!err) {
11778+ bf->bf_readable = 0;
11779+ count -= sizeof(*stbr);
11780+ nstbr++;
11781+ }
11782+ }
11783+ mutex_unlock(&bf->bf_lock);
11784+ }
11785+ if (!err)
11786+ err = sizeof(*stbr) * nstbr;
11787+
11788+ return err;
11789+}
11790+
11791+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
11792+ loff_t *pos)
11793+{
11794+ ssize_t err;
11795+ int readable;
11796+ aufs_bindex_t nfhsm, bindex, bend;
11797+ struct au_sbinfo *sbinfo;
11798+ struct au_fhsm *fhsm;
11799+ struct au_branch *br;
11800+ struct super_block *sb;
11801+
11802+ err = 0;
11803+ sbinfo = file->private_data;
11804+ fhsm = &sbinfo->si_fhsm;
11805+need_data:
11806+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
11807+ if (!atomic_read(&fhsm->fhsm_readable)) {
11808+ if (vfsub_file_flags(file) & O_NONBLOCK)
11809+ err = -EAGAIN;
11810+ else
11811+ err = wait_event_interruptible_locked_irq
11812+ (fhsm->fhsm_wqh,
11813+ atomic_read(&fhsm->fhsm_readable));
11814+ }
11815+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
11816+ if (unlikely(err))
11817+ goto out;
11818+
11819+ /* sb may already be dead */
11820+ au_rw_read_lock(&sbinfo->si_rwsem);
11821+ readable = atomic_read(&fhsm->fhsm_readable);
11822+ if (readable > 0) {
11823+ sb = sbinfo->si_sb;
11824+ AuDebugOn(!sb);
11825+ /* exclude the bottom branch */
11826+ nfhsm = 0;
c1595e42 11827+ bend = au_fhsm_bottom(sb);
076b876e
AM
11828+ for (bindex = 0; bindex < bend; bindex++) {
11829+ br = au_sbr(sb, bindex);
11830+ if (au_br_fhsm(br->br_perm))
11831+ nfhsm++;
11832+ }
11833+ err = -EMSGSIZE;
11834+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
11835+ atomic_set(&fhsm->fhsm_readable, 0);
11836+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
11837+ count);
11838+ }
11839+ }
11840+ au_rw_read_unlock(&sbinfo->si_rwsem);
11841+ if (!readable)
11842+ goto need_data;
11843+
11844+out:
11845+ return err;
11846+}
11847+
11848+static int au_fhsm_release(struct inode *inode, struct file *file)
11849+{
11850+ struct au_sbinfo *sbinfo;
11851+ struct au_fhsm *fhsm;
11852+
11853+ /* sb may already be dead */
11854+ sbinfo = file->private_data;
11855+ fhsm = &sbinfo->si_fhsm;
11856+ spin_lock(&fhsm->fhsm_spin);
11857+ fhsm->fhsm_pid = 0;
11858+ spin_unlock(&fhsm->fhsm_spin);
11859+ kobject_put(&sbinfo->si_kobj);
11860+
11861+ return 0;
11862+}
11863+
11864+static const struct file_operations au_fhsm_fops = {
11865+ .owner = THIS_MODULE,
11866+ .llseek = noop_llseek,
11867+ .read = au_fhsm_read,
11868+ .poll = au_fhsm_poll,
11869+ .release = au_fhsm_release
11870+};
11871+
11872+int au_fhsm_fd(struct super_block *sb, int oflags)
11873+{
11874+ int err, fd;
11875+ struct au_sbinfo *sbinfo;
11876+ struct au_fhsm *fhsm;
11877+
11878+ err = -EPERM;
11879+ if (unlikely(!capable(CAP_SYS_ADMIN)))
11880+ goto out;
11881+
11882+ err = -EINVAL;
11883+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
11884+ goto out;
11885+
11886+ err = 0;
11887+ sbinfo = au_sbi(sb);
11888+ fhsm = &sbinfo->si_fhsm;
11889+ spin_lock(&fhsm->fhsm_spin);
11890+ if (!fhsm->fhsm_pid)
11891+ fhsm->fhsm_pid = current->pid;
11892+ else
11893+ err = -EBUSY;
11894+ spin_unlock(&fhsm->fhsm_spin);
11895+ if (unlikely(err))
11896+ goto out;
11897+
11898+ oflags |= O_RDONLY;
11899+ /* oflags |= FMODE_NONOTIFY; */
11900+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
11901+ err = fd;
11902+ if (unlikely(fd < 0))
11903+ goto out_pid;
11904+
11905+ /* succeed reglardless 'fhsm' status */
11906+ kobject_get(&sbinfo->si_kobj);
11907+ si_noflush_read_lock(sb);
11908+ if (au_ftest_si(sbinfo, FHSM))
11909+ au_fhsm_wrote_all(sb, /*force*/0);
11910+ si_read_unlock(sb);
11911+ goto out; /* success */
11912+
11913+out_pid:
11914+ spin_lock(&fhsm->fhsm_spin);
11915+ fhsm->fhsm_pid = 0;
11916+ spin_unlock(&fhsm->fhsm_spin);
11917+out:
11918+ AuTraceErr(err);
11919+ return err;
11920+}
11921+
11922+/* ---------------------------------------------------------------------- */
11923+
11924+int au_fhsm_br_alloc(struct au_branch *br)
11925+{
11926+ int err;
11927+
11928+ err = 0;
11929+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
11930+ if (br->br_fhsm)
11931+ au_br_fhsm_init(br->br_fhsm);
11932+ else
11933+ err = -ENOMEM;
11934+
11935+ return err;
11936+}
11937+
11938+/* ---------------------------------------------------------------------- */
11939+
11940+void au_fhsm_fin(struct super_block *sb)
11941+{
11942+ au_fhsm_notify(sb, /*val*/-1);
11943+}
11944+
11945+void au_fhsm_init(struct au_sbinfo *sbinfo)
11946+{
11947+ struct au_fhsm *fhsm;
11948+
11949+ fhsm = &sbinfo->si_fhsm;
11950+ spin_lock_init(&fhsm->fhsm_spin);
11951+ init_waitqueue_head(&fhsm->fhsm_wqh);
11952+ atomic_set(&fhsm->fhsm_readable, 0);
11953+ fhsm->fhsm_expire
11954+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 11955+ fhsm->fhsm_bottom = -1;
076b876e
AM
11956+}
11957+
11958+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
11959+{
11960+ sbinfo->si_fhsm.fhsm_expire
11961+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
11962+}
11963+
11964+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
11965+{
11966+ unsigned int u;
11967+
11968+ if (!au_ftest_si(sbinfo, FHSM))
11969+ return;
11970+
11971+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
11972+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
11973+ seq_printf(seq, ",fhsm_sec=%u", u);
11974+}
7f207e10
AM
11975diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
11976--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
11977+++ linux/fs/aufs/file.c 2015-11-11 17:21:46.918863802 +0100
11978@@ -0,0 +1,844 @@
1facf9fc 11979+/*
2000de60 11980+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 11981+ *
11982+ * This program, aufs is free software; you can redistribute it and/or modify
11983+ * it under the terms of the GNU General Public License as published by
11984+ * the Free Software Foundation; either version 2 of the License, or
11985+ * (at your option) any later version.
dece6358
AM
11986+ *
11987+ * This program is distributed in the hope that it will be useful,
11988+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11989+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11990+ * GNU General Public License for more details.
11991+ *
11992+ * You should have received a copy of the GNU General Public License
523b37e3 11993+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 11994+ */
11995+
11996+/*
4a4d8108 11997+ * handling file/dir, and address_space operation
1facf9fc 11998+ */
11999+
7eafdf33
AM
12000+#ifdef CONFIG_AUFS_DEBUG
12001+#include <linux/migrate.h>
12002+#endif
4a4d8108 12003+#include <linux/pagemap.h>
1facf9fc 12004+#include "aufs.h"
12005+
4a4d8108
AM
12006+/* drop flags for writing */
12007+unsigned int au_file_roflags(unsigned int flags)
12008+{
12009+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12010+ flags |= O_RDONLY | O_NOATIME;
12011+ return flags;
12012+}
12013+
12014+/* common functions to regular file and dir */
12015+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12016+ struct file *file, int force_wr)
1facf9fc 12017+{
1308ab2a 12018+ struct file *h_file;
4a4d8108
AM
12019+ struct dentry *h_dentry;
12020+ struct inode *h_inode;
12021+ struct super_block *sb;
12022+ struct au_branch *br;
12023+ struct path h_path;
b912730e 12024+ int err;
1facf9fc 12025+
4a4d8108
AM
12026+ /* a race condition can happen between open and unlink/rmdir */
12027+ h_file = ERR_PTR(-ENOENT);
12028+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 12029+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
4a4d8108 12030+ goto out;
5527c038 12031+ h_inode = d_inode(h_dentry);
027c5e7a
AM
12032+ spin_lock(&h_dentry->d_lock);
12033+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
5527c038 12034+ /* || !d_inode(dentry)->i_nlink */
027c5e7a
AM
12035+ ;
12036+ spin_unlock(&h_dentry->d_lock);
12037+ if (unlikely(err))
4a4d8108 12038+ goto out;
1facf9fc 12039+
4a4d8108
AM
12040+ sb = dentry->d_sb;
12041+ br = au_sbr(sb, bindex);
b912730e
AM
12042+ err = au_br_test_oflag(flags, br);
12043+ h_file = ERR_PTR(err);
12044+ if (unlikely(err))
027c5e7a 12045+ goto out;
1facf9fc 12046+
4a4d8108 12047+ /* drop flags for writing */
5527c038 12048+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
392086de
AM
12049+ if (force_wr && !(flags & O_WRONLY))
12050+ force_wr = 0;
4a4d8108 12051+ flags = au_file_roflags(flags);
392086de
AM
12052+ if (force_wr) {
12053+ h_file = ERR_PTR(-EROFS);
12054+ flags = au_file_roflags(flags);
12055+ if (unlikely(vfsub_native_ro(h_inode)
12056+ || IS_APPEND(h_inode)))
12057+ goto out;
12058+ flags &= ~O_ACCMODE;
12059+ flags |= O_WRONLY;
12060+ }
12061+ }
4a4d8108
AM
12062+ flags &= ~O_CREAT;
12063+ atomic_inc(&br->br_count);
12064+ h_path.dentry = h_dentry;
86dc4139 12065+ h_path.mnt = au_br_mnt(br);
38d290e6 12066+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
12067+ if (IS_ERR(h_file))
12068+ goto out_br;
dece6358 12069+
b912730e 12070+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
12071+ err = deny_write_access(h_file);
12072+ if (unlikely(err)) {
12073+ fput(h_file);
12074+ h_file = ERR_PTR(err);
12075+ goto out_br;
12076+ }
12077+ }
953406b4 12078+ fsnotify_open(h_file);
4a4d8108 12079+ goto out; /* success */
1facf9fc 12080+
4f0767ce 12081+out_br:
4a4d8108 12082+ atomic_dec(&br->br_count);
4f0767ce 12083+out:
4a4d8108
AM
12084+ return h_file;
12085+}
1308ab2a 12086+
076b876e
AM
12087+static int au_cmoo(struct dentry *dentry)
12088+{
12089+ int err, cmoo;
12090+ unsigned int udba;
12091+ struct path h_path;
12092+ struct au_pin pin;
12093+ struct au_cp_generic cpg = {
12094+ .dentry = dentry,
12095+ .bdst = -1,
12096+ .bsrc = -1,
12097+ .len = -1,
12098+ .pin = &pin,
12099+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12100+ };
7e9cd9fe 12101+ struct inode *delegated;
076b876e
AM
12102+ struct super_block *sb;
12103+ struct au_sbinfo *sbinfo;
12104+ struct au_fhsm *fhsm;
12105+ pid_t pid;
12106+ struct au_branch *br;
12107+ struct dentry *parent;
12108+ struct au_hinode *hdir;
12109+
12110+ DiMustWriteLock(dentry);
5527c038 12111+ IiMustWriteLock(d_inode(dentry));
076b876e
AM
12112+
12113+ err = 0;
12114+ if (IS_ROOT(dentry))
12115+ goto out;
12116+ cpg.bsrc = au_dbstart(dentry);
12117+ if (!cpg.bsrc)
12118+ goto out;
12119+
12120+ sb = dentry->d_sb;
12121+ sbinfo = au_sbi(sb);
12122+ fhsm = &sbinfo->si_fhsm;
12123+ pid = au_fhsm_pid(fhsm);
12124+ if (pid
12125+ && (current->pid == pid
12126+ || current->real_parent->pid == pid))
12127+ goto out;
12128+
12129+ br = au_sbr(sb, cpg.bsrc);
12130+ cmoo = au_br_cmoo(br->br_perm);
12131+ if (!cmoo)
12132+ goto out;
7e9cd9fe 12133+ if (!d_is_reg(dentry))
076b876e
AM
12134+ cmoo &= AuBrAttr_COO_ALL;
12135+ if (!cmoo)
12136+ goto out;
12137+
12138+ parent = dget_parent(dentry);
12139+ di_write_lock_parent(parent);
12140+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12141+ cpg.bdst = err;
12142+ if (unlikely(err < 0)) {
12143+ err = 0; /* there is no upper writable branch */
12144+ goto out_dgrade;
12145+ }
12146+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12147+
12148+ /* do not respect the coo attrib for the target branch */
12149+ err = au_cpup_dirs(dentry, cpg.bdst);
12150+ if (unlikely(err))
12151+ goto out_dgrade;
12152+
12153+ di_downgrade_lock(parent, AuLock_IR);
12154+ udba = au_opt_udba(sb);
12155+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12156+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12157+ if (unlikely(err))
12158+ goto out_parent;
12159+
12160+ err = au_sio_cpup_simple(&cpg);
12161+ au_unpin(&pin);
12162+ if (unlikely(err))
12163+ goto out_parent;
12164+ if (!(cmoo & AuBrWAttr_MOO))
12165+ goto out_parent; /* success */
12166+
12167+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12168+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12169+ if (unlikely(err))
12170+ goto out_parent;
12171+
12172+ h_path.mnt = au_br_mnt(br);
12173+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
5527c038 12174+ hdir = au_hi(d_inode(parent), cpg.bsrc);
076b876e
AM
12175+ delegated = NULL;
12176+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12177+ au_unpin(&pin);
12178+ /* todo: keep h_dentry or not? */
12179+ if (unlikely(err == -EWOULDBLOCK)) {
12180+ pr_warn("cannot retry for NFSv4 delegation"
12181+ " for an internal unlink\n");
12182+ iput(delegated);
12183+ }
12184+ if (unlikely(err)) {
12185+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12186+ dentry, err);
12187+ err = 0;
12188+ }
12189+ goto out_parent; /* success */
12190+
12191+out_dgrade:
12192+ di_downgrade_lock(parent, AuLock_IR);
12193+out_parent:
12194+ di_read_unlock(parent, AuLock_IR);
12195+ dput(parent);
12196+out:
12197+ AuTraceErr(err);
12198+ return err;
12199+}
12200+
b912730e 12201+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 12202+{
b912730e 12203+ int err, no_lock = args->no_lock;
1facf9fc 12204+ struct dentry *dentry;
076b876e 12205+ struct au_finfo *finfo;
1308ab2a 12206+
b912730e
AM
12207+ if (!no_lock)
12208+ err = au_finfo_init(file, args->fidir);
12209+ else {
12210+ lockdep_off();
12211+ err = au_finfo_init(file, args->fidir);
12212+ lockdep_on();
12213+ }
4a4d8108
AM
12214+ if (unlikely(err))
12215+ goto out;
1facf9fc 12216+
2000de60 12217+ dentry = file->f_path.dentry;
b912730e
AM
12218+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12219+ if (!no_lock) {
12220+ di_write_lock_child(dentry);
12221+ err = au_cmoo(dentry);
12222+ di_downgrade_lock(dentry, AuLock_IR);
12223+ if (!err)
12224+ err = args->open(file, vfsub_file_flags(file), NULL);
12225+ di_read_unlock(dentry, AuLock_IR);
12226+ } else {
12227+ err = au_cmoo(dentry);
12228+ if (!err)
12229+ err = args->open(file, vfsub_file_flags(file),
12230+ args->h_file);
12231+ if (!err && au_fbstart(file) != au_dbstart(dentry))
12232+ /*
12233+ * cmoo happens after h_file was opened.
12234+ * need to refresh file later.
12235+ */
12236+ atomic_dec(&au_fi(file)->fi_generation);
12237+ }
1facf9fc 12238+
076b876e
AM
12239+ finfo = au_fi(file);
12240+ if (!err) {
12241+ finfo->fi_file = file;
12242+ au_sphl_add(&finfo->fi_hlist,
2000de60 12243+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
076b876e 12244+ }
b912730e
AM
12245+ if (!no_lock)
12246+ fi_write_unlock(file);
12247+ else {
12248+ lockdep_off();
12249+ fi_write_unlock(file);
12250+ lockdep_on();
12251+ }
4a4d8108 12252+ if (unlikely(err)) {
076b876e 12253+ finfo->fi_hdir = NULL;
4a4d8108 12254+ au_finfo_fin(file);
1308ab2a 12255+ }
4a4d8108 12256+
4f0767ce 12257+out:
1308ab2a 12258+ return err;
12259+}
dece6358 12260+
4a4d8108 12261+int au_reopen_nondir(struct file *file)
1308ab2a 12262+{
4a4d8108
AM
12263+ int err;
12264+ aufs_bindex_t bstart;
12265+ struct dentry *dentry;
12266+ struct file *h_file, *h_file_tmp;
1308ab2a 12267+
2000de60 12268+ dentry = file->f_path.dentry;
4a4d8108
AM
12269+ bstart = au_dbstart(dentry);
12270+ h_file_tmp = NULL;
12271+ if (au_fbstart(file) == bstart) {
12272+ h_file = au_hf_top(file);
12273+ if (file->f_mode == h_file->f_mode)
12274+ return 0; /* success */
12275+ h_file_tmp = h_file;
12276+ get_file(h_file_tmp);
12277+ au_set_h_fptr(file, bstart, NULL);
12278+ }
12279+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
12280+ /*
12281+ * it can happen
12282+ * file exists on both of rw and ro
12283+ * open --> dbstart and fbstart are both 0
12284+ * prepend a branch as rw, "rw" become ro
12285+ * remove rw/file
12286+ * delete the top branch, "rw" becomes rw again
12287+ * --> dbstart is 1, fbstart is still 0
12288+ * write --> fbstart is 0 but dbstart is 1
12289+ */
12290+ /* AuDebugOn(au_fbstart(file) < bstart); */
1308ab2a 12291+
4a4d8108 12292+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
392086de 12293+ file, /*force_wr*/0);
4a4d8108 12294+ err = PTR_ERR(h_file);
86dc4139
AM
12295+ if (IS_ERR(h_file)) {
12296+ if (h_file_tmp) {
12297+ atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count);
12298+ au_set_h_fptr(file, bstart, h_file_tmp);
12299+ h_file_tmp = NULL;
12300+ }
4a4d8108 12301+ goto out; /* todo: close all? */
86dc4139 12302+ }
4a4d8108
AM
12303+
12304+ err = 0;
12305+ au_set_fbstart(file, bstart);
12306+ au_set_h_fptr(file, bstart, h_file);
12307+ au_update_figen(file);
12308+ /* todo: necessary? */
12309+ /* file->f_ra = h_file->f_ra; */
12310+
4f0767ce 12311+out:
4a4d8108
AM
12312+ if (h_file_tmp)
12313+ fput(h_file_tmp);
12314+ return err;
1facf9fc 12315+}
12316+
1308ab2a 12317+/* ---------------------------------------------------------------------- */
12318+
4a4d8108
AM
12319+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12320+ struct dentry *hi_wh)
1facf9fc 12321+{
4a4d8108
AM
12322+ int err;
12323+ aufs_bindex_t bstart;
12324+ struct au_dinfo *dinfo;
12325+ struct dentry *h_dentry;
12326+ struct au_hdentry *hdp;
1facf9fc 12327+
2000de60 12328+ dinfo = au_di(file->f_path.dentry);
4a4d8108 12329+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 12330+
4a4d8108
AM
12331+ bstart = dinfo->di_bstart;
12332+ dinfo->di_bstart = btgt;
12333+ hdp = dinfo->di_hdentry;
12334+ h_dentry = hdp[0 + btgt].hd_dentry;
12335+ hdp[0 + btgt].hd_dentry = hi_wh;
12336+ err = au_reopen_nondir(file);
12337+ hdp[0 + btgt].hd_dentry = h_dentry;
12338+ dinfo->di_bstart = bstart;
1facf9fc 12339+
1facf9fc 12340+ return err;
12341+}
12342+
4a4d8108 12343+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 12344+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 12345+{
4a4d8108 12346+ int err;
027c5e7a 12347+ struct inode *inode, *h_inode;
c2b27bf2
AM
12348+ struct dentry *h_dentry, *hi_wh;
12349+ struct au_cp_generic cpg = {
2000de60 12350+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12351+ .bdst = bcpup,
12352+ .bsrc = -1,
12353+ .len = len,
12354+ .pin = pin
12355+ };
1facf9fc 12356+
c2b27bf2 12357+ au_update_dbstart(cpg.dentry);
5527c038 12358+ inode = d_inode(cpg.dentry);
027c5e7a 12359+ h_inode = NULL;
c2b27bf2
AM
12360+ if (au_dbstart(cpg.dentry) <= bcpup
12361+ && au_dbend(cpg.dentry) >= bcpup) {
12362+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
5527c038
JR
12363+ if (h_dentry && d_is_positive(h_dentry))
12364+ h_inode = d_inode(h_dentry);
027c5e7a 12365+ }
4a4d8108 12366+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 12367+ if (!hi_wh && !h_inode)
c2b27bf2 12368+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
12369+ else
12370+ /* already copied-up after unlink */
12371+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 12372+
4a4d8108 12373+ if (!err
38d290e6
JR
12374+ && (inode->i_nlink > 1
12375+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
12376+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12377+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 12378+
dece6358 12379+ return err;
1facf9fc 12380+}
12381+
4a4d8108
AM
12382+/*
12383+ * prepare the @file for writing.
12384+ */
12385+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 12386+{
4a4d8108 12387+ int err;
c2b27bf2 12388+ aufs_bindex_t dbstart;
c1595e42 12389+ struct dentry *parent;
86dc4139 12390+ struct inode *inode;
1facf9fc 12391+ struct super_block *sb;
4a4d8108 12392+ struct file *h_file;
c2b27bf2 12393+ struct au_cp_generic cpg = {
2000de60 12394+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12395+ .bdst = -1,
12396+ .bsrc = -1,
12397+ .len = len,
12398+ .pin = pin,
12399+ .flags = AuCpup_DTIME
12400+ };
1facf9fc 12401+
c2b27bf2 12402+ sb = cpg.dentry->d_sb;
5527c038 12403+ inode = d_inode(cpg.dentry);
c2b27bf2
AM
12404+ cpg.bsrc = au_fbstart(file);
12405+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 12406+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
12407+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12408+ /*flags*/0);
1facf9fc 12409+ goto out;
4a4d8108 12410+ }
1facf9fc 12411+
027c5e7a 12412+ /* need to cpup or reopen */
c2b27bf2 12413+ parent = dget_parent(cpg.dentry);
4a4d8108 12414+ di_write_lock_parent(parent);
c2b27bf2
AM
12415+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12416+ cpg.bdst = err;
4a4d8108
AM
12417+ if (unlikely(err < 0))
12418+ goto out_dgrade;
12419+ err = 0;
12420+
c2b27bf2
AM
12421+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12422+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 12423+ if (unlikely(err))
4a4d8108
AM
12424+ goto out_dgrade;
12425+ }
12426+
c2b27bf2 12427+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
12428+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12429+ if (unlikely(err))
12430+ goto out_dgrade;
12431+
c2b27bf2 12432+ dbstart = au_dbstart(cpg.dentry);
c1595e42 12433+ if (dbstart <= cpg.bdst)
c2b27bf2 12434+ cpg.bsrc = cpg.bdst;
027c5e7a 12435+
c2b27bf2
AM
12436+ if (dbstart <= cpg.bdst /* just reopen */
12437+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 12438+ ) {
392086de 12439+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 12440+ if (IS_ERR(h_file))
027c5e7a 12441+ err = PTR_ERR(h_file);
86dc4139 12442+ else {
027c5e7a 12443+ di_downgrade_lock(parent, AuLock_IR);
c2b27bf2
AM
12444+ if (dbstart > cpg.bdst)
12445+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
12446+ if (!err)
12447+ err = au_reopen_nondir(file);
c2b27bf2 12448+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 12449+ }
027c5e7a
AM
12450+ } else { /* copyup as wh and reopen */
12451+ /*
12452+ * since writable hfsplus branch is not supported,
12453+ * h_open_pre/post() are unnecessary.
12454+ */
c2b27bf2 12455+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 12456+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 12457+ }
4a4d8108
AM
12458+
12459+ if (!err) {
12460+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12461+ goto out_dput; /* success */
12462+ }
12463+ au_unpin(pin);
12464+ goto out_unlock;
1facf9fc 12465+
4f0767ce 12466+out_dgrade:
4a4d8108 12467+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 12468+out_unlock:
4a4d8108 12469+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12470+out_dput:
4a4d8108 12471+ dput(parent);
4f0767ce 12472+out:
1facf9fc 12473+ return err;
12474+}
12475+
4a4d8108
AM
12476+/* ---------------------------------------------------------------------- */
12477+
12478+int au_do_flush(struct file *file, fl_owner_t id,
12479+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 12480+{
4a4d8108 12481+ int err;
1facf9fc 12482+ struct super_block *sb;
4a4d8108 12483+ struct inode *inode;
1facf9fc 12484+
c06a8ce3
AM
12485+ inode = file_inode(file);
12486+ sb = inode->i_sb;
4a4d8108
AM
12487+ si_noflush_read_lock(sb);
12488+ fi_read_lock(file);
b752ccd1 12489+ ii_read_lock_child(inode);
1facf9fc 12490+
4a4d8108
AM
12491+ err = flush(file, id);
12492+ au_cpup_attr_timesizes(inode);
1facf9fc 12493+
b752ccd1 12494+ ii_read_unlock(inode);
4a4d8108 12495+ fi_read_unlock(file);
1308ab2a 12496+ si_read_unlock(sb);
dece6358 12497+ return err;
1facf9fc 12498+}
12499+
4a4d8108
AM
12500+/* ---------------------------------------------------------------------- */
12501+
12502+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 12503+{
4a4d8108 12504+ int err;
4a4d8108
AM
12505+ struct au_pin pin;
12506+ struct au_finfo *finfo;
c2b27bf2 12507+ struct dentry *parent, *hi_wh;
4a4d8108 12508+ struct inode *inode;
1facf9fc 12509+ struct super_block *sb;
c2b27bf2 12510+ struct au_cp_generic cpg = {
2000de60 12511+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12512+ .bdst = -1,
12513+ .bsrc = -1,
12514+ .len = -1,
12515+ .pin = &pin,
12516+ .flags = AuCpup_DTIME
12517+ };
1facf9fc 12518+
4a4d8108
AM
12519+ FiMustWriteLock(file);
12520+
12521+ err = 0;
12522+ finfo = au_fi(file);
c2b27bf2 12523+ sb = cpg.dentry->d_sb;
5527c038 12524+ inode = d_inode(cpg.dentry);
c2b27bf2
AM
12525+ cpg.bdst = au_ibstart(inode);
12526+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 12527+ goto out;
dece6358 12528+
c2b27bf2
AM
12529+ parent = dget_parent(cpg.dentry);
12530+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 12531+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
12532+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12533+ cpg.bdst = err;
4a4d8108
AM
12534+ di_read_unlock(parent, !AuLock_IR);
12535+ if (unlikely(err < 0))
12536+ goto out_parent;
12537+ err = 0;
1facf9fc 12538+ }
1facf9fc 12539+
4a4d8108 12540+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 12541+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
12542+ if (!S_ISDIR(inode->i_mode)
12543+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 12544+ && au_plink_test(inode)
c2b27bf2
AM
12545+ && !d_unhashed(cpg.dentry)
12546+ && cpg.bdst < au_dbstart(cpg.dentry)) {
12547+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
12548+ if (unlikely(err))
12549+ goto out_unlock;
12550+
12551+ /* always superio. */
c2b27bf2 12552+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 12553+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 12554+ if (!err) {
c2b27bf2 12555+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
12556+ au_unpin(&pin);
12557+ }
4a4d8108
AM
12558+ } else if (hi_wh) {
12559+ /* already copied-up after unlink */
c2b27bf2 12560+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
12561+ *need_reopen = 0;
12562+ }
1facf9fc 12563+
4f0767ce 12564+out_unlock:
4a4d8108 12565+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12566+out_parent:
4a4d8108 12567+ dput(parent);
4f0767ce 12568+out:
1308ab2a 12569+ return err;
dece6358 12570+}
1facf9fc 12571+
4a4d8108 12572+static void au_do_refresh_dir(struct file *file)
dece6358 12573+{
4a4d8108
AM
12574+ aufs_bindex_t bindex, bend, new_bindex, brid;
12575+ struct au_hfile *p, tmp, *q;
12576+ struct au_finfo *finfo;
1308ab2a 12577+ struct super_block *sb;
4a4d8108 12578+ struct au_fidir *fidir;
1facf9fc 12579+
4a4d8108 12580+ FiMustWriteLock(file);
1facf9fc 12581+
2000de60 12582+ sb = file->f_path.dentry->d_sb;
4a4d8108
AM
12583+ finfo = au_fi(file);
12584+ fidir = finfo->fi_hdir;
12585+ AuDebugOn(!fidir);
12586+ p = fidir->fd_hfile + finfo->fi_btop;
12587+ brid = p->hf_br->br_id;
12588+ bend = fidir->fd_bbot;
12589+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
12590+ if (!p->hf_file)
12591+ continue;
1308ab2a 12592+
4a4d8108
AM
12593+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12594+ if (new_bindex == bindex)
12595+ continue;
12596+ if (new_bindex < 0) {
12597+ au_set_h_fptr(file, bindex, NULL);
12598+ continue;
12599+ }
1308ab2a 12600+
4a4d8108
AM
12601+ /* swap two lower inode, and loop again */
12602+ q = fidir->fd_hfile + new_bindex;
12603+ tmp = *q;
12604+ *q = *p;
12605+ *p = tmp;
12606+ if (tmp.hf_file) {
12607+ bindex--;
12608+ p--;
12609+ }
12610+ }
1308ab2a 12611+
4a4d8108 12612+ p = fidir->fd_hfile;
2000de60 12613+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
4a4d8108
AM
12614+ bend = au_sbend(sb);
12615+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
12616+ finfo->fi_btop++, p++)
12617+ if (p->hf_file) {
c06a8ce3 12618+ if (file_inode(p->hf_file))
4a4d8108 12619+ break;
c1595e42 12620+ au_hfput(p, file);
4a4d8108
AM
12621+ }
12622+ } else {
12623+ bend = au_br_index(sb, brid);
12624+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
12625+ finfo->fi_btop++, p++)
12626+ if (p->hf_file)
12627+ au_hfput(p, file);
12628+ bend = au_sbend(sb);
12629+ }
1308ab2a 12630+
4a4d8108
AM
12631+ p = fidir->fd_hfile + bend;
12632+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
12633+ fidir->fd_bbot--, p--)
12634+ if (p->hf_file) {
c06a8ce3 12635+ if (file_inode(p->hf_file))
4a4d8108 12636+ break;
c1595e42 12637+ au_hfput(p, file);
4a4d8108
AM
12638+ }
12639+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 12640+}
12641+
4a4d8108
AM
12642+/*
12643+ * after branch manipulating, refresh the file.
12644+ */
12645+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 12646+{
4a4d8108
AM
12647+ int err, need_reopen;
12648+ aufs_bindex_t bend, bindex;
12649+ struct dentry *dentry;
1308ab2a 12650+ struct au_finfo *finfo;
4a4d8108 12651+ struct au_hfile *hfile;
1facf9fc 12652+
2000de60 12653+ dentry = file->f_path.dentry;
1308ab2a 12654+ finfo = au_fi(file);
4a4d8108
AM
12655+ if (!finfo->fi_hdir) {
12656+ hfile = &finfo->fi_htop;
12657+ AuDebugOn(!hfile->hf_file);
12658+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
12659+ AuDebugOn(bindex < 0);
12660+ if (bindex != finfo->fi_btop)
12661+ au_set_fbstart(file, bindex);
12662+ } else {
12663+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
12664+ if (unlikely(err))
12665+ goto out;
12666+ au_do_refresh_dir(file);
12667+ }
1facf9fc 12668+
4a4d8108
AM
12669+ err = 0;
12670+ need_reopen = 1;
12671+ if (!au_test_mmapped(file))
12672+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 12673+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
12674+ err = reopen(file);
12675+ if (!err) {
12676+ au_update_figen(file);
12677+ goto out; /* success */
12678+ }
12679+
12680+ /* error, close all lower files */
12681+ if (finfo->fi_hdir) {
12682+ bend = au_fbend_dir(file);
12683+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
12684+ au_set_h_fptr(file, bindex, NULL);
12685+ }
1facf9fc 12686+
4f0767ce 12687+out:
1facf9fc 12688+ return err;
12689+}
12690+
4a4d8108
AM
12691+/* common function to regular file and dir */
12692+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12693+ int wlock)
dece6358 12694+{
1308ab2a 12695+ int err;
4a4d8108
AM
12696+ unsigned int sigen, figen;
12697+ aufs_bindex_t bstart;
12698+ unsigned char pseudo_link;
12699+ struct dentry *dentry;
12700+ struct inode *inode;
1facf9fc 12701+
4a4d8108 12702+ err = 0;
2000de60 12703+ dentry = file->f_path.dentry;
5527c038 12704+ inode = d_inode(dentry);
4a4d8108
AM
12705+ sigen = au_sigen(dentry->d_sb);
12706+ fi_write_lock(file);
12707+ figen = au_figen(file);
12708+ di_write_lock_child(dentry);
12709+ bstart = au_dbstart(dentry);
12710+ pseudo_link = (bstart != au_ibstart(inode));
12711+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
12712+ if (!wlock) {
12713+ di_downgrade_lock(dentry, AuLock_IR);
12714+ fi_downgrade_lock(file);
12715+ }
12716+ goto out; /* success */
12717+ }
dece6358 12718+
4a4d8108 12719+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 12720+ if (au_digen_test(dentry, sigen)) {
4a4d8108 12721+ err = au_reval_dpath(dentry, sigen);
027c5e7a 12722+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 12723+ }
dece6358 12724+
027c5e7a
AM
12725+ if (!err)
12726+ err = refresh_file(file, reopen);
4a4d8108
AM
12727+ if (!err) {
12728+ if (!wlock) {
12729+ di_downgrade_lock(dentry, AuLock_IR);
12730+ fi_downgrade_lock(file);
12731+ }
12732+ } else {
12733+ di_write_unlock(dentry);
12734+ fi_write_unlock(file);
12735+ }
1facf9fc 12736+
4f0767ce 12737+out:
1308ab2a 12738+ return err;
12739+}
1facf9fc 12740+
4a4d8108
AM
12741+/* ---------------------------------------------------------------------- */
12742+
12743+/* cf. aufs_nopage() */
12744+/* for madvise(2) */
12745+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 12746+{
4a4d8108
AM
12747+ unlock_page(page);
12748+ return 0;
12749+}
1facf9fc 12750+
4a4d8108 12751+/* it will never be called, but necessary to support O_DIRECT */
5527c038
JR
12752+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
12753+ loff_t offset)
4a4d8108 12754+{ BUG(); return 0; }
1facf9fc 12755+
4a4d8108
AM
12756+/* they will never be called. */
12757+#ifdef CONFIG_AUFS_DEBUG
12758+static int aufs_write_begin(struct file *file, struct address_space *mapping,
12759+ loff_t pos, unsigned len, unsigned flags,
12760+ struct page **pagep, void **fsdata)
12761+{ AuUnsupport(); return 0; }
12762+static int aufs_write_end(struct file *file, struct address_space *mapping,
12763+ loff_t pos, unsigned len, unsigned copied,
12764+ struct page *page, void *fsdata)
12765+{ AuUnsupport(); return 0; }
12766+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
12767+{ AuUnsupport(); return 0; }
1308ab2a 12768+
4a4d8108
AM
12769+static int aufs_set_page_dirty(struct page *page)
12770+{ AuUnsupport(); return 0; }
392086de
AM
12771+static void aufs_invalidatepage(struct page *page, unsigned int offset,
12772+ unsigned int length)
4a4d8108
AM
12773+{ AuUnsupport(); }
12774+static int aufs_releasepage(struct page *page, gfp_t gfp)
12775+{ AuUnsupport(); return 0; }
ab036dbd 12776+#if 0 /* called by memory compaction regardless file */
4a4d8108 12777+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 12778+ struct page *page, enum migrate_mode mode)
4a4d8108 12779+{ AuUnsupport(); return 0; }
ab036dbd 12780+#endif
4a4d8108
AM
12781+static int aufs_launder_page(struct page *page)
12782+{ AuUnsupport(); return 0; }
12783+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
12784+ unsigned long from,
12785+ unsigned long count)
4a4d8108 12786+{ AuUnsupport(); return 0; }
392086de
AM
12787+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
12788+ bool *writeback)
12789+{ AuUnsupport(); }
4a4d8108
AM
12790+static int aufs_error_remove_page(struct address_space *mapping,
12791+ struct page *page)
12792+{ AuUnsupport(); return 0; }
b4510431
AM
12793+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
12794+ sector_t *span)
12795+{ AuUnsupport(); return 0; }
12796+static void aufs_swap_deactivate(struct file *file)
12797+{ AuUnsupport(); }
4a4d8108
AM
12798+#endif /* CONFIG_AUFS_DEBUG */
12799+
12800+const struct address_space_operations aufs_aop = {
12801+ .readpage = aufs_readpage,
12802+ .direct_IO = aufs_direct_IO,
4a4d8108
AM
12803+#ifdef CONFIG_AUFS_DEBUG
12804+ .writepage = aufs_writepage,
4a4d8108
AM
12805+ /* no writepages, because of writepage */
12806+ .set_page_dirty = aufs_set_page_dirty,
12807+ /* no readpages, because of readpage */
12808+ .write_begin = aufs_write_begin,
12809+ .write_end = aufs_write_end,
12810+ /* no bmap, no block device */
12811+ .invalidatepage = aufs_invalidatepage,
12812+ .releasepage = aufs_releasepage,
ab036dbd
AM
12813+ /* is fallback_migrate_page ok? */
12814+ /* .migratepage = aufs_migratepage, */
4a4d8108
AM
12815+ .launder_page = aufs_launder_page,
12816+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 12817+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
12818+ .error_remove_page = aufs_error_remove_page,
12819+ .swap_activate = aufs_swap_activate,
12820+ .swap_deactivate = aufs_swap_deactivate
4a4d8108 12821+#endif /* CONFIG_AUFS_DEBUG */
dece6358 12822+};
7f207e10
AM
12823diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
12824--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 12825+++ linux/fs/aufs/file.h 2015-09-24 10:47:58.251386326 +0200
b912730e 12826@@ -0,0 +1,291 @@
4a4d8108 12827+/*
2000de60 12828+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
12829+ *
12830+ * This program, aufs is free software; you can redistribute it and/or modify
12831+ * it under the terms of the GNU General Public License as published by
12832+ * the Free Software Foundation; either version 2 of the License, or
12833+ * (at your option) any later version.
12834+ *
12835+ * This program is distributed in the hope that it will be useful,
12836+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12837+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12838+ * GNU General Public License for more details.
12839+ *
12840+ * You should have received a copy of the GNU General Public License
523b37e3 12841+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 12842+ */
1facf9fc 12843+
4a4d8108
AM
12844+/*
12845+ * file operations
12846+ */
1facf9fc 12847+
4a4d8108
AM
12848+#ifndef __AUFS_FILE_H__
12849+#define __AUFS_FILE_H__
1facf9fc 12850+
4a4d8108 12851+#ifdef __KERNEL__
1facf9fc 12852+
2cbb1c4b 12853+#include <linux/file.h>
4a4d8108
AM
12854+#include <linux/fs.h>
12855+#include <linux/poll.h>
4a4d8108 12856+#include "rwsem.h"
1facf9fc 12857+
4a4d8108
AM
12858+struct au_branch;
12859+struct au_hfile {
12860+ struct file *hf_file;
12861+ struct au_branch *hf_br;
12862+};
1facf9fc 12863+
4a4d8108
AM
12864+struct au_vdir;
12865+struct au_fidir {
12866+ aufs_bindex_t fd_bbot;
12867+ aufs_bindex_t fd_nent;
12868+ struct au_vdir *fd_vdir_cache;
12869+ struct au_hfile fd_hfile[];
12870+};
1facf9fc 12871+
4a4d8108 12872+static inline int au_fidir_sz(int nent)
dece6358 12873+{
4f0767ce
JR
12874+ AuDebugOn(nent < 0);
12875+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 12876+}
1facf9fc 12877+
4a4d8108
AM
12878+struct au_finfo {
12879+ atomic_t fi_generation;
dece6358 12880+
4a4d8108
AM
12881+ struct au_rwsem fi_rwsem;
12882+ aufs_bindex_t fi_btop;
12883+
12884+ /* do not union them */
12885+ struct { /* for non-dir */
12886+ struct au_hfile fi_htop;
2cbb1c4b 12887+ atomic_t fi_mmapped;
4a4d8108
AM
12888+ };
12889+ struct au_fidir *fi_hdir; /* for dir only */
523b37e3
AM
12890+
12891+ struct hlist_node fi_hlist;
12892+ struct file *fi_file; /* very ugly */
4a4d8108 12893+} ____cacheline_aligned_in_smp;
1facf9fc 12894+
4a4d8108 12895+/* ---------------------------------------------------------------------- */
1facf9fc 12896+
4a4d8108
AM
12897+/* file.c */
12898+extern const struct address_space_operations aufs_aop;
12899+unsigned int au_file_roflags(unsigned int flags);
12900+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12901+ struct file *file, int force_wr);
b912730e
AM
12902+struct au_do_open_args {
12903+ int no_lock;
12904+ int (*open)(struct file *file, int flags,
12905+ struct file *h_file);
12906+ struct au_fidir *fidir;
12907+ struct file *h_file;
12908+};
12909+int au_do_open(struct file *file, struct au_do_open_args *args);
4a4d8108
AM
12910+int au_reopen_nondir(struct file *file);
12911+struct au_pin;
12912+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
12913+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12914+ int wlock);
12915+int au_do_flush(struct file *file, fl_owner_t id,
12916+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 12917+
4a4d8108
AM
12918+/* poll.c */
12919+#ifdef CONFIG_AUFS_POLL
12920+unsigned int aufs_poll(struct file *file, poll_table *wait);
12921+#endif
1facf9fc 12922+
4a4d8108
AM
12923+#ifdef CONFIG_AUFS_BR_HFSPLUS
12924+/* hfsplus.c */
392086de
AM
12925+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
12926+ int force_wr);
4a4d8108
AM
12927+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
12928+ struct file *h_file);
12929+#else
c1595e42
JR
12930+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
12931+ aufs_bindex_t bindex, int force_wr)
4a4d8108
AM
12932+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
12933+ struct file *h_file);
12934+#endif
1facf9fc 12935+
4a4d8108
AM
12936+/* f_op.c */
12937+extern const struct file_operations aufs_file_fop;
b912730e 12938+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
4a4d8108 12939+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
b912730e 12940+struct file *au_read_pre(struct file *file, int keep_fi);
4a4d8108 12941+
4a4d8108
AM
12942+/* finfo.c */
12943+void au_hfput(struct au_hfile *hf, struct file *file);
12944+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
12945+ struct file *h_file);
1facf9fc 12946+
4a4d8108 12947+void au_update_figen(struct file *file);
4a4d8108
AM
12948+struct au_fidir *au_fidir_alloc(struct super_block *sb);
12949+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 12950+
4a4d8108
AM
12951+void au_fi_init_once(void *_fi);
12952+void au_finfo_fin(struct file *file);
12953+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 12954+
4a4d8108
AM
12955+/* ioctl.c */
12956+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
12957+#ifdef CONFIG_COMPAT
12958+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
12959+ unsigned long arg);
c2b27bf2
AM
12960+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
12961+ unsigned long arg);
b752ccd1 12962+#endif
1facf9fc 12963+
4a4d8108 12964+/* ---------------------------------------------------------------------- */
1facf9fc 12965+
4a4d8108
AM
12966+static inline struct au_finfo *au_fi(struct file *file)
12967+{
38d290e6 12968+ return file->private_data;
4a4d8108 12969+}
1facf9fc 12970+
4a4d8108 12971+/* ---------------------------------------------------------------------- */
1facf9fc 12972+
4a4d8108
AM
12973+/*
12974+ * fi_read_lock, fi_write_lock,
12975+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
12976+ */
12977+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 12978+
4a4d8108
AM
12979+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
12980+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
12981+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 12982+
1308ab2a 12983+/* ---------------------------------------------------------------------- */
12984+
4a4d8108
AM
12985+/* todo: hard/soft set? */
12986+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 12987+{
4a4d8108
AM
12988+ FiMustAnyLock(file);
12989+ return au_fi(file)->fi_btop;
12990+}
dece6358 12991+
4a4d8108
AM
12992+static inline aufs_bindex_t au_fbend_dir(struct file *file)
12993+{
12994+ FiMustAnyLock(file);
12995+ AuDebugOn(!au_fi(file)->fi_hdir);
12996+ return au_fi(file)->fi_hdir->fd_bbot;
12997+}
1facf9fc 12998+
4a4d8108
AM
12999+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13000+{
13001+ FiMustAnyLock(file);
13002+ AuDebugOn(!au_fi(file)->fi_hdir);
13003+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13004+}
1facf9fc 13005+
4a4d8108
AM
13006+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
13007+{
13008+ FiMustWriteLock(file);
13009+ au_fi(file)->fi_btop = bindex;
13010+}
1facf9fc 13011+
4a4d8108
AM
13012+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
13013+{
13014+ FiMustWriteLock(file);
13015+ AuDebugOn(!au_fi(file)->fi_hdir);
13016+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13017+}
1308ab2a 13018+
4a4d8108
AM
13019+static inline void au_set_fvdir_cache(struct file *file,
13020+ struct au_vdir *vdir_cache)
13021+{
13022+ FiMustWriteLock(file);
13023+ AuDebugOn(!au_fi(file)->fi_hdir);
13024+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13025+}
dece6358 13026+
4a4d8108
AM
13027+static inline struct file *au_hf_top(struct file *file)
13028+{
13029+ FiMustAnyLock(file);
13030+ AuDebugOn(au_fi(file)->fi_hdir);
13031+ return au_fi(file)->fi_htop.hf_file;
13032+}
1facf9fc 13033+
4a4d8108
AM
13034+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13035+{
13036+ FiMustAnyLock(file);
13037+ AuDebugOn(!au_fi(file)->fi_hdir);
13038+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
13039+}
13040+
4a4d8108
AM
13041+/* todo: memory barrier? */
13042+static inline unsigned int au_figen(struct file *f)
dece6358 13043+{
4a4d8108
AM
13044+ return atomic_read(&au_fi(f)->fi_generation);
13045+}
dece6358 13046+
2cbb1c4b
JR
13047+static inline void au_set_mmapped(struct file *f)
13048+{
13049+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13050+ return;
0c3ec466 13051+ pr_warn("fi_mmapped wrapped around\n");
2cbb1c4b
JR
13052+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13053+ ;
13054+}
13055+
13056+static inline void au_unset_mmapped(struct file *f)
13057+{
13058+ atomic_dec(&au_fi(f)->fi_mmapped);
13059+}
13060+
4a4d8108
AM
13061+static inline int au_test_mmapped(struct file *f)
13062+{
2cbb1c4b
JR
13063+ return atomic_read(&au_fi(f)->fi_mmapped);
13064+}
13065+
13066+/* customize vma->vm_file */
13067+
13068+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13069+ struct file *file)
13070+{
53392da6
AM
13071+ struct file *f;
13072+
13073+ f = vma->vm_file;
2cbb1c4b
JR
13074+ get_file(file);
13075+ vma->vm_file = file;
53392da6 13076+ fput(f);
2cbb1c4b
JR
13077+}
13078+
13079+#ifdef CONFIG_MMU
13080+#define AuDbgVmRegion(file, vma) do {} while (0)
13081+
13082+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13083+ struct file *file)
13084+{
13085+ au_do_vm_file_reset(vma, file);
13086+}
13087+#else
13088+#define AuDbgVmRegion(file, vma) \
13089+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13090+
13091+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13092+ struct file *file)
13093+{
53392da6
AM
13094+ struct file *f;
13095+
2cbb1c4b 13096+ au_do_vm_file_reset(vma, file);
53392da6 13097+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
13098+ get_file(file);
13099+ vma->vm_region->vm_file = file;
53392da6 13100+ fput(f);
2cbb1c4b
JR
13101+}
13102+#endif /* CONFIG_MMU */
13103+
13104+/* handle vma->vm_prfile */
fb47a38f 13105+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
2cbb1c4b
JR
13106+ struct file *file)
13107+{
2cbb1c4b
JR
13108+ get_file(file);
13109+ vma->vm_prfile = file;
13110+#ifndef CONFIG_MMU
13111+ get_file(file);
13112+ vma->vm_region->vm_prfile = file;
13113+#endif
fb47a38f 13114+}
1308ab2a 13115+
4a4d8108
AM
13116+#endif /* __KERNEL__ */
13117+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
13118diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13119--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 13120+++ linux/fs/aufs/finfo.c 2015-09-24 10:47:58.251386326 +0200
b912730e 13121@@ -0,0 +1,157 @@
4a4d8108 13122+/*
2000de60 13123+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
13124+ *
13125+ * This program, aufs is free software; you can redistribute it and/or modify
13126+ * it under the terms of the GNU General Public License as published by
13127+ * the Free Software Foundation; either version 2 of the License, or
13128+ * (at your option) any later version.
13129+ *
13130+ * This program is distributed in the hope that it will be useful,
13131+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13132+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13133+ * GNU General Public License for more details.
13134+ *
13135+ * You should have received a copy of the GNU General Public License
523b37e3 13136+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13137+ */
1308ab2a 13138+
4a4d8108
AM
13139+/*
13140+ * file private data
13141+ */
1facf9fc 13142+
4a4d8108 13143+#include "aufs.h"
1facf9fc 13144+
4a4d8108
AM
13145+void au_hfput(struct au_hfile *hf, struct file *file)
13146+{
13147+ /* todo: direct access f_flags */
2cbb1c4b 13148+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
13149+ allow_write_access(hf->hf_file);
13150+ fput(hf->hf_file);
13151+ hf->hf_file = NULL;
e49829fe 13152+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
13153+ hf->hf_br = NULL;
13154+}
1facf9fc 13155+
4a4d8108
AM
13156+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13157+{
13158+ struct au_finfo *finfo = au_fi(file);
13159+ struct au_hfile *hf;
13160+ struct au_fidir *fidir;
13161+
13162+ fidir = finfo->fi_hdir;
13163+ if (!fidir) {
13164+ AuDebugOn(finfo->fi_btop != bindex);
13165+ hf = &finfo->fi_htop;
13166+ } else
13167+ hf = fidir->fd_hfile + bindex;
13168+
13169+ if (hf && hf->hf_file)
13170+ au_hfput(hf, file);
13171+ if (val) {
13172+ FiMustWriteLock(file);
b912730e 13173+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
4a4d8108 13174+ hf->hf_file = val;
2000de60 13175+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
1308ab2a 13176+ }
4a4d8108 13177+}
1facf9fc 13178+
4a4d8108
AM
13179+void au_update_figen(struct file *file)
13180+{
2000de60 13181+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
4a4d8108 13182+ /* smp_mb(); */ /* atomic_set */
1facf9fc 13183+}
13184+
4a4d8108
AM
13185+/* ---------------------------------------------------------------------- */
13186+
4a4d8108
AM
13187+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13188+{
13189+ struct au_fidir *fidir;
13190+ int nbr;
13191+
13192+ nbr = au_sbend(sb) + 1;
13193+ if (nbr < 2)
13194+ nbr = 2; /* initial allocate for 2 branches */
13195+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13196+ if (fidir) {
13197+ fidir->fd_bbot = -1;
13198+ fidir->fd_nent = nbr;
13199+ fidir->fd_vdir_cache = NULL;
13200+ }
13201+
13202+ return fidir;
13203+}
13204+
13205+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
13206+{
13207+ int err;
13208+ struct au_fidir *fidir, *p;
13209+
13210+ AuRwMustWriteLock(&finfo->fi_rwsem);
13211+ fidir = finfo->fi_hdir;
13212+ AuDebugOn(!fidir);
13213+
13214+ err = -ENOMEM;
13215+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
13216+ GFP_NOFS);
13217+ if (p) {
13218+ p->fd_nent = nbr;
13219+ finfo->fi_hdir = p;
13220+ err = 0;
13221+ }
1facf9fc 13222+
dece6358 13223+ return err;
1facf9fc 13224+}
1308ab2a 13225+
13226+/* ---------------------------------------------------------------------- */
13227+
4a4d8108 13228+void au_finfo_fin(struct file *file)
1308ab2a 13229+{
4a4d8108
AM
13230+ struct au_finfo *finfo;
13231+
2000de60 13232+ au_nfiles_dec(file->f_path.dentry->d_sb);
7f207e10 13233+
4a4d8108
AM
13234+ finfo = au_fi(file);
13235+ AuDebugOn(finfo->fi_hdir);
13236+ AuRwDestroy(&finfo->fi_rwsem);
13237+ au_cache_free_finfo(finfo);
1308ab2a 13238+}
1308ab2a 13239+
e49829fe 13240+void au_fi_init_once(void *_finfo)
4a4d8108 13241+{
e49829fe 13242+ struct au_finfo *finfo = _finfo;
2cbb1c4b 13243+ static struct lock_class_key aufs_fi;
1308ab2a 13244+
e49829fe
JR
13245+ au_rw_init(&finfo->fi_rwsem);
13246+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 13247+}
1308ab2a 13248+
4a4d8108
AM
13249+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13250+{
1716fcea 13251+ int err;
4a4d8108
AM
13252+ struct au_finfo *finfo;
13253+ struct dentry *dentry;
13254+
13255+ err = -ENOMEM;
2000de60 13256+ dentry = file->f_path.dentry;
4a4d8108
AM
13257+ finfo = au_cache_alloc_finfo();
13258+ if (unlikely(!finfo))
13259+ goto out;
13260+
13261+ err = 0;
7f207e10 13262+ au_nfiles_inc(dentry->d_sb);
1716fcea
AM
13263+ /* verbose coding for lock class name */
13264+ if (!fidir)
13265+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO);
13266+ else
13267+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO);
4a4d8108
AM
13268+ au_rw_write_lock(&finfo->fi_rwsem);
13269+ finfo->fi_btop = -1;
13270+ finfo->fi_hdir = fidir;
13271+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13272+ /* smp_mb(); */ /* atomic_set */
13273+
13274+ file->private_data = finfo;
13275+
13276+out:
13277+ return err;
13278+}
7f207e10
AM
13279diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13280--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 13281+++ linux/fs/aufs/f_op.c 2015-09-24 10:47:58.251386326 +0200
5527c038 13282@@ -0,0 +1,738 @@
dece6358 13283+/*
2000de60 13284+ * Copyright (C) 2005-2015 Junjiro R. Okajima
dece6358
AM
13285+ *
13286+ * This program, aufs is free software; you can redistribute it and/or modify
13287+ * it under the terms of the GNU General Public License as published by
13288+ * the Free Software Foundation; either version 2 of the License, or
13289+ * (at your option) any later version.
13290+ *
13291+ * This program is distributed in the hope that it will be useful,
13292+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13293+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13294+ * GNU General Public License for more details.
13295+ *
13296+ * You should have received a copy of the GNU General Public License
523b37e3 13297+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 13298+ */
1facf9fc 13299+
13300+/*
4a4d8108 13301+ * file and vm operations
1facf9fc 13302+ */
dece6358 13303+
86dc4139 13304+#include <linux/aio.h>
4a4d8108
AM
13305+#include <linux/fs_stack.h>
13306+#include <linux/mman.h>
4a4d8108 13307+#include <linux/security.h>
dece6358
AM
13308+#include "aufs.h"
13309+
b912730e 13310+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
1facf9fc 13311+{
4a4d8108
AM
13312+ int err;
13313+ aufs_bindex_t bindex;
4a4d8108
AM
13314+ struct dentry *dentry;
13315+ struct au_finfo *finfo;
38d290e6 13316+ struct inode *h_inode;
4a4d8108
AM
13317+
13318+ FiMustWriteLock(file);
13319+
523b37e3 13320+ err = 0;
2000de60 13321+ dentry = file->f_path.dentry;
b912730e 13322+ AuDebugOn(IS_ERR_OR_NULL(dentry));
4a4d8108
AM
13323+ finfo = au_fi(file);
13324+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 13325+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108 13326+ bindex = au_dbstart(dentry);
b912730e
AM
13327+ if (!h_file)
13328+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
13329+ else
13330+ get_file(h_file);
4a4d8108
AM
13331+ if (IS_ERR(h_file))
13332+ err = PTR_ERR(h_file);
13333+ else {
38d290e6
JR
13334+ if ((flags & __O_TMPFILE)
13335+ && !(flags & O_EXCL)) {
13336+ h_inode = file_inode(h_file);
13337+ spin_lock(&h_inode->i_lock);
13338+ h_inode->i_state |= I_LINKABLE;
13339+ spin_unlock(&h_inode->i_lock);
13340+ }
4a4d8108
AM
13341+ au_set_fbstart(file, bindex);
13342+ au_set_h_fptr(file, bindex, h_file);
13343+ au_update_figen(file);
13344+ /* todo: necessary? */
13345+ /* file->f_ra = h_file->f_ra; */
13346+ }
027c5e7a 13347+
4a4d8108 13348+ return err;
1facf9fc 13349+}
13350+
4a4d8108
AM
13351+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13352+ struct file *file)
1facf9fc 13353+{
4a4d8108 13354+ int err;
1308ab2a 13355+ struct super_block *sb;
b912730e
AM
13356+ struct au_do_open_args args = {
13357+ .open = au_do_open_nondir
13358+ };
1facf9fc 13359+
523b37e3
AM
13360+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13361+ file, vfsub_file_flags(file), file->f_mode);
1facf9fc 13362+
2000de60 13363+ sb = file->f_path.dentry->d_sb;
4a4d8108 13364+ si_read_lock(sb, AuLock_FLUSH);
b912730e 13365+ err = au_do_open(file, &args);
4a4d8108
AM
13366+ si_read_unlock(sb);
13367+ return err;
13368+}
1facf9fc 13369+
4a4d8108
AM
13370+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13371+{
13372+ struct au_finfo *finfo;
13373+ aufs_bindex_t bindex;
1facf9fc 13374+
4a4d8108 13375+ finfo = au_fi(file);
2000de60
JR
13376+ au_sphl_del(&finfo->fi_hlist,
13377+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108 13378+ bindex = finfo->fi_btop;
b4510431 13379+ if (bindex >= 0)
4a4d8108 13380+ au_set_h_fptr(file, bindex, NULL);
7f207e10 13381+
4a4d8108
AM
13382+ au_finfo_fin(file);
13383+ return 0;
1facf9fc 13384+}
13385+
4a4d8108
AM
13386+/* ---------------------------------------------------------------------- */
13387+
13388+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 13389+{
1308ab2a 13390+ int err;
4a4d8108
AM
13391+ struct file *h_file;
13392+
13393+ err = 0;
13394+ h_file = au_hf_top(file);
13395+ if (h_file)
13396+ err = vfsub_flush(h_file, id);
13397+ return err;
13398+}
13399+
13400+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13401+{
13402+ return au_do_flush(file, id, au_do_flush_nondir);
13403+}
13404+
13405+/* ---------------------------------------------------------------------- */
9dbd164d
AM
13406+/*
13407+ * read and write functions acquire [fdi]_rwsem once, but release before
13408+ * mmap_sem. This is because to stop a race condition between mmap(2).
13409+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13410+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13411+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13412+ */
4a4d8108 13413+
b912730e
AM
13414+/* Callers should call au_read_post() or fput() in the end */
13415+struct file *au_read_pre(struct file *file, int keep_fi)
4a4d8108 13416+{
4a4d8108 13417+ struct file *h_file;
b912730e 13418+ int err;
1facf9fc 13419+
4a4d8108 13420+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
b912730e
AM
13421+ if (!err) {
13422+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13423+ h_file = au_hf_top(file);
13424+ get_file(h_file);
13425+ if (!keep_fi)
13426+ fi_read_unlock(file);
13427+ } else
13428+ h_file = ERR_PTR(err);
13429+
13430+ return h_file;
13431+}
13432+
13433+static void au_read_post(struct inode *inode, struct file *h_file)
13434+{
13435+ /* update without lock, I don't think it a problem */
13436+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13437+ fput(h_file);
13438+}
13439+
13440+struct au_write_pre {
13441+ blkcnt_t blks;
13442+ aufs_bindex_t bstart;
13443+};
13444+
13445+/*
13446+ * return with iinfo is write-locked
13447+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13448+ * end
13449+ */
13450+static struct file *au_write_pre(struct file *file, int do_ready,
13451+ struct au_write_pre *wpre)
13452+{
13453+ struct file *h_file;
13454+ struct dentry *dentry;
13455+ int err;
13456+ struct au_pin pin;
13457+
13458+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13459+ h_file = ERR_PTR(err);
dece6358
AM
13460+ if (unlikely(err))
13461+ goto out;
1facf9fc 13462+
b912730e
AM
13463+ dentry = file->f_path.dentry;
13464+ if (do_ready) {
13465+ err = au_ready_to_write(file, -1, &pin);
13466+ if (unlikely(err)) {
13467+ h_file = ERR_PTR(err);
13468+ di_write_unlock(dentry);
13469+ goto out_fi;
13470+ }
13471+ }
13472+
13473+ di_downgrade_lock(dentry, /*flags*/0);
13474+ if (wpre)
13475+ wpre->bstart = au_fbstart(file);
4a4d8108 13476+ h_file = au_hf_top(file);
9dbd164d 13477+ get_file(h_file);
b912730e
AM
13478+ if (wpre)
13479+ wpre->blks = file_inode(h_file)->i_blocks;
13480+ if (do_ready)
13481+ au_unpin(&pin);
13482+ di_read_unlock(dentry, /*flags*/0);
13483+
13484+out_fi:
13485+ fi_write_unlock(file);
13486+out:
13487+ return h_file;
13488+}
13489+
13490+static void au_write_post(struct inode *inode, struct file *h_file,
13491+ struct au_write_pre *wpre, ssize_t written)
13492+{
13493+ struct inode *h_inode;
13494+
13495+ au_cpup_attr_timesizes(inode);
13496+ AuDebugOn(au_ibstart(inode) != wpre->bstart);
13497+ h_inode = file_inode(h_file);
13498+ inode->i_mode = h_inode->i_mode;
13499+ ii_write_unlock(inode);
13500+ fput(h_file);
13501+
13502+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13503+ if (written > 0)
13504+ au_fhsm_wrote(inode->i_sb, wpre->bstart,
13505+ /*force*/h_inode->i_blocks > wpre->blks);
13506+}
13507+
13508+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13509+ loff_t *ppos)
13510+{
13511+ ssize_t err;
13512+ struct inode *inode;
13513+ struct file *h_file;
13514+ struct super_block *sb;
13515+
13516+ inode = file_inode(file);
13517+ sb = inode->i_sb;
13518+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13519+
13520+ h_file = au_read_pre(file, /*keep_fi*/0);
13521+ err = PTR_ERR(h_file);
13522+ if (IS_ERR(h_file))
13523+ goto out;
9dbd164d
AM
13524+
13525+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
13526+ err = vfsub_read_u(h_file, buf, count, ppos);
13527+ /* todo: necessary? */
13528+ /* file->f_ra = h_file->f_ra; */
b912730e 13529+ au_read_post(inode, h_file);
1308ab2a 13530+
4f0767ce 13531+out:
dece6358
AM
13532+ si_read_unlock(sb);
13533+ return err;
13534+}
1facf9fc 13535+
e49829fe
JR
13536+/*
13537+ * todo: very ugly
13538+ * it locks both of i_mutex and si_rwsem for read in safe.
13539+ * if the plink maintenance mode continues forever (that is the problem),
13540+ * may loop forever.
13541+ */
13542+static void au_mtx_and_read_lock(struct inode *inode)
13543+{
13544+ int err;
13545+ struct super_block *sb = inode->i_sb;
13546+
13547+ while (1) {
13548+ mutex_lock(&inode->i_mutex);
13549+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13550+ if (!err)
13551+ break;
13552+ mutex_unlock(&inode->i_mutex);
13553+ si_read_lock(sb, AuLock_NOPLMW);
13554+ si_read_unlock(sb);
13555+ }
13556+}
13557+
4a4d8108
AM
13558+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13559+ size_t count, loff_t *ppos)
dece6358 13560+{
4a4d8108 13561+ ssize_t err;
b912730e
AM
13562+ struct au_write_pre wpre;
13563+ struct inode *inode;
4a4d8108
AM
13564+ struct file *h_file;
13565+ char __user *buf = (char __user *)ubuf;
1facf9fc 13566+
b912730e 13567+ inode = file_inode(file);
e49829fe 13568+ au_mtx_and_read_lock(inode);
1facf9fc 13569+
b912730e
AM
13570+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13571+ err = PTR_ERR(h_file);
13572+ if (IS_ERR(h_file))
9dbd164d 13573+ goto out;
9dbd164d 13574+
4a4d8108 13575+ err = vfsub_write_u(h_file, buf, count, ppos);
b912730e 13576+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13577+
4f0767ce 13578+out:
b912730e 13579+ si_read_unlock(inode->i_sb);
4a4d8108 13580+ mutex_unlock(&inode->i_mutex);
dece6358
AM
13581+ return err;
13582+}
1facf9fc 13583+
076b876e
AM
13584+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13585+ struct iov_iter *iov_iter)
dece6358 13586+{
4a4d8108
AM
13587+ ssize_t err;
13588+ struct file *file;
076b876e 13589+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
1facf9fc 13590+
4a4d8108
AM
13591+ err = security_file_permission(h_file, rw);
13592+ if (unlikely(err))
13593+ goto out;
1facf9fc 13594+
4a4d8108 13595+ err = -ENOSYS;
076b876e 13596+ iter = NULL;
5527c038 13597+ if (rw == MAY_READ)
076b876e 13598+ iter = h_file->f_op->read_iter;
5527c038 13599+ else if (rw == MAY_WRITE)
076b876e 13600+ iter = h_file->f_op->write_iter;
076b876e
AM
13601+
13602+ file = kio->ki_filp;
13603+ kio->ki_filp = h_file;
13604+ if (iter) {
2cbb1c4b 13605+ lockdep_off();
076b876e
AM
13606+ err = iter(kio, iov_iter);
13607+ lockdep_on();
4a4d8108
AM
13608+ } else
13609+ /* currently there is no such fs */
13610+ WARN_ON_ONCE(1);
076b876e 13611+ kio->ki_filp = file;
1facf9fc 13612+
4f0767ce 13613+out:
dece6358
AM
13614+ return err;
13615+}
1facf9fc 13616+
076b876e 13617+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13618+{
4a4d8108
AM
13619+ ssize_t err;
13620+ struct file *file, *h_file;
b912730e 13621+ struct inode *inode;
dece6358 13622+ struct super_block *sb;
1facf9fc 13623+
4a4d8108 13624+ file = kio->ki_filp;
b912730e
AM
13625+ inode = file_inode(file);
13626+ sb = inode->i_sb;
e49829fe 13627+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13628+
b912730e
AM
13629+ h_file = au_read_pre(file, /*keep_fi*/0);
13630+ err = PTR_ERR(h_file);
13631+ if (IS_ERR(h_file))
13632+ goto out;
9dbd164d 13633+
076b876e 13634+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
4a4d8108
AM
13635+ /* todo: necessary? */
13636+ /* file->f_ra = h_file->f_ra; */
b912730e 13637+ au_read_post(inode, h_file);
1facf9fc 13638+
4f0767ce 13639+out:
4a4d8108 13640+ si_read_unlock(sb);
1308ab2a 13641+ return err;
13642+}
1facf9fc 13643+
076b876e 13644+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1308ab2a 13645+{
4a4d8108 13646+ ssize_t err;
b912730e
AM
13647+ struct au_write_pre wpre;
13648+ struct inode *inode;
4a4d8108 13649+ struct file *file, *h_file;
1308ab2a 13650+
4a4d8108 13651+ file = kio->ki_filp;
b912730e 13652+ inode = file_inode(file);
e49829fe
JR
13653+ au_mtx_and_read_lock(inode);
13654+
b912730e
AM
13655+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13656+ err = PTR_ERR(h_file);
13657+ if (IS_ERR(h_file))
9dbd164d 13658+ goto out;
9dbd164d 13659+
076b876e 13660+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
b912730e 13661+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13662+
4f0767ce 13663+out:
b912730e 13664+ si_read_unlock(inode->i_sb);
4a4d8108 13665+ mutex_unlock(&inode->i_mutex);
dece6358 13666+ return err;
1facf9fc 13667+}
13668+
4a4d8108
AM
13669+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13670+ struct pipe_inode_info *pipe, size_t len,
13671+ unsigned int flags)
1facf9fc 13672+{
4a4d8108
AM
13673+ ssize_t err;
13674+ struct file *h_file;
b912730e 13675+ struct inode *inode;
dece6358 13676+ struct super_block *sb;
1facf9fc 13677+
b912730e
AM
13678+ inode = file_inode(file);
13679+ sb = inode->i_sb;
e49829fe 13680+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
13681+
13682+ h_file = au_read_pre(file, /*keep_fi*/1);
13683+ err = PTR_ERR(h_file);
13684+ if (IS_ERR(h_file))
dece6358 13685+ goto out;
1facf9fc 13686+
4a4d8108 13687+ if (au_test_loopback_kthread()) {
2000de60 13688+ au_warn_loopback(h_file->f_path.dentry->d_sb);
87a755f4
AM
13689+ if (file->f_mapping != h_file->f_mapping) {
13690+ file->f_mapping = h_file->f_mapping;
13691+ smp_mb(); /* unnecessary? */
13692+ }
1308ab2a 13693+ }
9dbd164d
AM
13694+ fi_read_unlock(file);
13695+
4a4d8108
AM
13696+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
13697+ /* todo: necessasry? */
13698+ /* file->f_ra = h_file->f_ra; */
b912730e 13699+ au_read_post(inode, h_file);
1facf9fc 13700+
4f0767ce 13701+out:
4a4d8108 13702+ si_read_unlock(sb);
dece6358 13703+ return err;
1facf9fc 13704+}
13705+
4a4d8108
AM
13706+static ssize_t
13707+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
13708+ size_t len, unsigned int flags)
1facf9fc 13709+{
4a4d8108 13710+ ssize_t err;
b912730e
AM
13711+ struct au_write_pre wpre;
13712+ struct inode *inode;
076b876e 13713+ struct file *h_file;
1facf9fc 13714+
b912730e 13715+ inode = file_inode(file);
e49829fe 13716+ au_mtx_and_read_lock(inode);
9dbd164d 13717+
b912730e
AM
13718+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13719+ err = PTR_ERR(h_file);
13720+ if (IS_ERR(h_file))
9dbd164d 13721+ goto out;
9dbd164d 13722+
4a4d8108 13723+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
b912730e 13724+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13725+
4f0767ce 13726+out:
b912730e 13727+ si_read_unlock(inode->i_sb);
4a4d8108
AM
13728+ mutex_unlock(&inode->i_mutex);
13729+ return err;
13730+}
1facf9fc 13731+
38d290e6
JR
13732+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
13733+ loff_t len)
13734+{
13735+ long err;
b912730e 13736+ struct au_write_pre wpre;
38d290e6
JR
13737+ struct inode *inode;
13738+ struct file *h_file;
13739+
b912730e 13740+ inode = file_inode(file);
38d290e6
JR
13741+ au_mtx_and_read_lock(inode);
13742+
b912730e
AM
13743+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13744+ err = PTR_ERR(h_file);
13745+ if (IS_ERR(h_file))
38d290e6 13746+ goto out;
38d290e6
JR
13747+
13748+ lockdep_off();
03673fb0 13749+ err = vfs_fallocate(h_file, mode, offset, len);
38d290e6 13750+ lockdep_on();
b912730e 13751+ au_write_post(inode, h_file, &wpre, /*written*/1);
38d290e6
JR
13752+
13753+out:
b912730e 13754+ si_read_unlock(inode->i_sb);
38d290e6
JR
13755+ mutex_unlock(&inode->i_mutex);
13756+ return err;
13757+}
13758+
4a4d8108
AM
13759+/* ---------------------------------------------------------------------- */
13760+
9dbd164d
AM
13761+/*
13762+ * The locking order around current->mmap_sem.
13763+ * - in most and regular cases
13764+ * file I/O syscall -- aufs_read() or something
13765+ * -- si_rwsem for read -- mmap_sem
13766+ * (Note that [fdi]i_rwsem are released before mmap_sem).
13767+ * - in mmap case
13768+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
13769+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
13770+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
13771+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
13772+ * It means that when aufs acquires si_rwsem for write, the process should never
13773+ * acquire mmap_sem.
13774+ *
392086de 13775+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
9dbd164d
AM
13776+ * problem either since any directory is not able to be mmap-ed.
13777+ * The similar scenario is applied to aufs_readlink() too.
13778+ */
13779+
38d290e6 13780+#if 0 /* stop calling security_file_mmap() */
2dfbb274
AM
13781+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
13782+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
13783+
13784+static unsigned long au_arch_prot_conv(unsigned long flags)
13785+{
13786+ /* currently ppc64 only */
13787+#ifdef CONFIG_PPC64
13788+ /* cf. linux/arch/powerpc/include/asm/mman.h */
13789+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
13790+ return AuConv_VM_PROT(flags, SAO);
13791+#else
13792+ AuDebugOn(arch_calc_vm_prot_bits(-1));
13793+ return 0;
13794+#endif
13795+}
13796+
13797+static unsigned long au_prot_conv(unsigned long flags)
13798+{
13799+ return AuConv_VM_PROT(flags, READ)
13800+ | AuConv_VM_PROT(flags, WRITE)
13801+ | AuConv_VM_PROT(flags, EXEC)
13802+ | au_arch_prot_conv(flags);
13803+}
13804+
13805+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
13806+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
13807+
13808+static unsigned long au_flag_conv(unsigned long flags)
13809+{
13810+ return AuConv_VM_MAP(flags, GROWSDOWN)
13811+ | AuConv_VM_MAP(flags, DENYWRITE)
2dfbb274
AM
13812+ | AuConv_VM_MAP(flags, LOCKED);
13813+}
38d290e6 13814+#endif
2dfbb274 13815+
9dbd164d 13816+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 13817+{
4a4d8108 13818+ int err;
4a4d8108 13819+ const unsigned char wlock
9dbd164d 13820+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108 13821+ struct super_block *sb;
9dbd164d 13822+ struct file *h_file;
b912730e 13823+ struct inode *inode;
9dbd164d
AM
13824+
13825+ AuDbgVmRegion(file, vma);
1308ab2a 13826+
b912730e
AM
13827+ inode = file_inode(file);
13828+ sb = inode->i_sb;
9dbd164d 13829+ lockdep_off();
e49829fe 13830+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108 13831+
b912730e 13832+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
9dbd164d 13833+ lockdep_on();
b912730e
AM
13834+ err = PTR_ERR(h_file);
13835+ if (IS_ERR(h_file))
13836+ goto out;
1308ab2a 13837+
b912730e
AM
13838+ err = 0;
13839+ au_set_mmapped(file);
9dbd164d 13840+ au_vm_file_reset(vma, h_file);
38d290e6
JR
13841+ /*
13842+ * we cannot call security_mmap_file() here since it may acquire
13843+ * mmap_sem or i_mutex.
13844+ *
13845+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
13846+ * au_flag_conv(vma->vm_flags));
13847+ */
9dbd164d
AM
13848+ if (!err)
13849+ err = h_file->f_op->mmap(h_file, vma);
b912730e
AM
13850+ if (!err) {
13851+ au_vm_prfile_set(vma, file);
13852+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13853+ goto out_fput; /* success */
13854+ }
2cbb1c4b
JR
13855+ au_unset_mmapped(file);
13856+ au_vm_file_reset(vma, file);
b912730e 13857+
2cbb1c4b 13858+out_fput:
9dbd164d 13859+ lockdep_off();
b912730e
AM
13860+ ii_write_unlock(inode);
13861+ lockdep_on();
13862+ fput(h_file);
4f0767ce 13863+out:
b912730e 13864+ lockdep_off();
9dbd164d
AM
13865+ si_read_unlock(sb);
13866+ lockdep_on();
13867+ AuTraceErr(err);
4a4d8108
AM
13868+ return err;
13869+}
13870+
13871+/* ---------------------------------------------------------------------- */
13872+
1e00d052
AM
13873+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
13874+ int datasync)
4a4d8108
AM
13875+{
13876+ int err;
b912730e 13877+ struct au_write_pre wpre;
4a4d8108
AM
13878+ struct inode *inode;
13879+ struct file *h_file;
4a4d8108
AM
13880+
13881+ err = 0; /* -EBADF; */ /* posix? */
13882+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
b912730e 13883+ goto out;
4a4d8108 13884+
b912730e
AM
13885+ inode = file_inode(file);
13886+ au_mtx_and_read_lock(inode);
13887+
13888+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13889+ err = PTR_ERR(h_file);
13890+ if (IS_ERR(h_file))
4a4d8108 13891+ goto out_unlock;
4a4d8108 13892+
53392da6 13893+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
b912730e 13894+ au_write_post(inode, h_file, &wpre, /*written*/0);
4a4d8108 13895+
4f0767ce 13896+out_unlock:
b912730e 13897+ si_read_unlock(inode->i_sb);
1e00d052 13898+ mutex_unlock(&inode->i_mutex);
b912730e 13899+out:
4a4d8108 13900+ return err;
dece6358
AM
13901+}
13902+
4a4d8108
AM
13903+/* no one supports this operation, currently */
13904+#if 0
13905+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 13906+{
4a4d8108 13907+ int err;
b912730e 13908+ struct au_write_pre wpre;
4a4d8108
AM
13909+ struct inode *inode;
13910+ struct file *file, *h_file;
1308ab2a 13911+
4a4d8108
AM
13912+ err = 0; /* -EBADF; */ /* posix? */
13913+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
13914+ goto out;
1308ab2a 13915+
b912730e
AM
13916+ file = kio->ki_filp;
13917+ inode = file_inode(file);
13918+ au_mtx_and_read_lock(inode);
13919+
13920+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13921+ err = PTR_ERR(h_file);
13922+ if (IS_ERR(h_file))
4a4d8108 13923+ goto out_unlock;
1308ab2a 13924+
4a4d8108
AM
13925+ err = -ENOSYS;
13926+ h_file = au_hf_top(file);
523b37e3 13927+ if (h_file->f_op->aio_fsync) {
4a4d8108 13928+ struct mutex *h_mtx;
1308ab2a 13929+
c06a8ce3 13930+ h_mtx = &file_inode(h_file)->i_mutex;
4a4d8108
AM
13931+ if (!is_sync_kiocb(kio)) {
13932+ get_file(h_file);
13933+ fput(file);
13934+ }
13935+ kio->ki_filp = h_file;
13936+ err = h_file->f_op->aio_fsync(kio, datasync);
13937+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
13938+ if (!err)
13939+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
13940+ /*ignore*/
4a4d8108
AM
13941+ mutex_unlock(h_mtx);
13942+ }
b912730e 13943+ au_write_post(inode, h_file, &wpre, /*written*/0);
1308ab2a 13944+
4f0767ce 13945+out_unlock:
e49829fe 13946+ si_read_unlock(inode->sb);
4a4d8108 13947+ mutex_unlock(&inode->i_mutex);
b912730e 13948+out:
4a4d8108 13949+ return err;
dece6358 13950+}
4a4d8108 13951+#endif
dece6358 13952+
4a4d8108 13953+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 13954+{
4a4d8108
AM
13955+ int err;
13956+ struct file *h_file;
4a4d8108 13957+ struct super_block *sb;
1308ab2a 13958+
b912730e 13959+ sb = file->f_path.dentry->d_sb;
e49829fe 13960+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
13961+
13962+ h_file = au_read_pre(file, /*keep_fi*/0);
13963+ err = PTR_ERR(h_file);
13964+ if (IS_ERR(h_file))
4a4d8108
AM
13965+ goto out;
13966+
523b37e3 13967+ if (h_file->f_op->fasync)
4a4d8108 13968+ err = h_file->f_op->fasync(fd, h_file, flag);
b912730e 13969+ fput(h_file); /* instead of au_read_post() */
1308ab2a 13970+
4f0767ce 13971+out:
4a4d8108 13972+ si_read_unlock(sb);
1308ab2a 13973+ return err;
dece6358 13974+}
4a4d8108
AM
13975+
13976+/* ---------------------------------------------------------------------- */
13977+
13978+/* no one supports this operation, currently */
13979+#if 0
13980+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
2000de60 13981+ size_t len, loff_t *pos, int more)
4a4d8108
AM
13982+{
13983+}
13984+#endif
13985+
13986+/* ---------------------------------------------------------------------- */
13987+
13988+const struct file_operations aufs_file_fop = {
13989+ .owner = THIS_MODULE,
2cbb1c4b 13990+
027c5e7a 13991+ .llseek = default_llseek,
4a4d8108
AM
13992+
13993+ .read = aufs_read,
13994+ .write = aufs_write,
076b876e
AM
13995+ .read_iter = aufs_read_iter,
13996+ .write_iter = aufs_write_iter,
13997+
4a4d8108
AM
13998+#ifdef CONFIG_AUFS_POLL
13999+ .poll = aufs_poll,
14000+#endif
14001+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1 14002+#ifdef CONFIG_COMPAT
c2b27bf2 14003+ .compat_ioctl = aufs_compat_ioctl_nondir,
b752ccd1 14004+#endif
4a4d8108
AM
14005+ .mmap = aufs_mmap,
14006+ .open = aufs_open_nondir,
14007+ .flush = aufs_flush_nondir,
14008+ .release = aufs_release_nondir,
14009+ .fsync = aufs_fsync_nondir,
14010+ /* .aio_fsync = aufs_aio_fsync_nondir, */
14011+ .fasync = aufs_fasync,
14012+ /* .sendpage = aufs_sendpage, */
14013+ .splice_write = aufs_splice_write,
14014+ .splice_read = aufs_splice_read,
14015+#if 0
14016+ .aio_splice_write = aufs_aio_splice_write,
38d290e6 14017+ .aio_splice_read = aufs_aio_splice_read,
4a4d8108 14018+#endif
38d290e6 14019+ .fallocate = aufs_fallocate
4a4d8108 14020+};
7f207e10
AM
14021diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14022--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 14023+++ linux/fs/aufs/fstype.h 2015-12-10 17:59:16.836166410 +0100
b912730e 14024@@ -0,0 +1,400 @@
4a4d8108 14025+/*
2000de60 14026+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
14027+ *
14028+ * This program, aufs is free software; you can redistribute it and/or modify
14029+ * it under the terms of the GNU General Public License as published by
14030+ * the Free Software Foundation; either version 2 of the License, or
14031+ * (at your option) any later version.
14032+ *
14033+ * This program is distributed in the hope that it will be useful,
14034+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14035+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14036+ * GNU General Public License for more details.
14037+ *
14038+ * You should have received a copy of the GNU General Public License
523b37e3 14039+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
14040+ */
14041+
14042+/*
14043+ * judging filesystem type
14044+ */
14045+
14046+#ifndef __AUFS_FSTYPE_H__
14047+#define __AUFS_FSTYPE_H__
14048+
14049+#ifdef __KERNEL__
14050+
14051+#include <linux/fs.h>
14052+#include <linux/magic.h>
b912730e 14053+#include <linux/nfs_fs.h>
ab036dbd 14054+#include <linux/romfs_fs.h>
4a4d8108
AM
14055+
14056+static inline int au_test_aufs(struct super_block *sb)
14057+{
14058+ return sb->s_magic == AUFS_SUPER_MAGIC;
14059+}
14060+
14061+static inline const char *au_sbtype(struct super_block *sb)
14062+{
14063+ return sb->s_type->name;
14064+}
1308ab2a 14065+
14066+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14067+{
2000de60
JR
14068+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
14069+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
14070+#else
14071+ return 0;
14072+#endif
14073+}
14074+
1308ab2a 14075+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 14076+{
2000de60
JR
14077+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
14078+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
14079+#else
14080+ return 0;
14081+#endif
14082+}
14083+
1308ab2a 14084+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 14085+{
1308ab2a 14086+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
14087+ return sb->s_magic == CRAMFS_MAGIC;
14088+#endif
14089+ return 0;
14090+}
14091+
14092+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14093+{
14094+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
14095+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
14096+#else
14097+ return 0;
14098+#endif
14099+}
14100+
1308ab2a 14101+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 14102+{
1308ab2a 14103+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
14104+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
14105+#else
14106+ return 0;
14107+#endif
14108+}
14109+
1308ab2a 14110+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 14111+{
1308ab2a 14112+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
14113+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
14114+#else
14115+ return 0;
14116+#endif
14117+}
14118+
1308ab2a 14119+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 14120+{
1308ab2a 14121+#ifdef CONFIG_TMPFS
14122+ return sb->s_magic == TMPFS_MAGIC;
14123+#else
14124+ return 0;
dece6358 14125+#endif
dece6358
AM
14126+}
14127+
1308ab2a 14128+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 14129+{
1308ab2a 14130+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
14131+ return !strcmp(au_sbtype(sb), "ecryptfs");
14132+#else
14133+ return 0;
14134+#endif
1facf9fc 14135+}
14136+
1308ab2a 14137+static inline int au_test_ramfs(struct super_block *sb)
14138+{
14139+ return sb->s_magic == RAMFS_MAGIC;
14140+}
14141+
14142+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14143+{
14144+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
14145+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14146+#else
14147+ return 0;
14148+#endif
14149+}
14150+
14151+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14152+{
14153+#ifdef CONFIG_PROC_FS
14154+ return sb->s_magic == PROC_SUPER_MAGIC;
14155+#else
14156+ return 0;
14157+#endif
14158+}
14159+
14160+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14161+{
14162+#ifdef CONFIG_SYSFS
14163+ return sb->s_magic == SYSFS_MAGIC;
14164+#else
14165+ return 0;
14166+#endif
14167+}
14168+
14169+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14170+{
14171+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
14172+ return sb->s_magic == CONFIGFS_MAGIC;
14173+#else
14174+ return 0;
14175+#endif
14176+}
14177+
14178+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14179+{
14180+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
14181+ return sb->s_magic == MINIX3_SUPER_MAGIC
14182+ || sb->s_magic == MINIX2_SUPER_MAGIC
14183+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14184+ || sb->s_magic == MINIX_SUPER_MAGIC
14185+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14186+#else
14187+ return 0;
14188+#endif
14189+}
14190+
1308ab2a 14191+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14192+{
14193+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
14194+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14195+#else
14196+ return 0;
14197+#endif
14198+}
14199+
14200+static inline int au_test_msdos(struct super_block *sb)
14201+{
14202+ return au_test_fat(sb);
14203+}
14204+
14205+static inline int au_test_vfat(struct super_block *sb)
14206+{
14207+ return au_test_fat(sb);
14208+}
14209+
14210+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14211+{
14212+#ifdef CONFIG_SECURITYFS
14213+ return sb->s_magic == SECURITYFS_MAGIC;
14214+#else
14215+ return 0;
14216+#endif
14217+}
14218+
14219+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14220+{
14221+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
14222+ return sb->s_magic == SQUASHFS_MAGIC;
14223+#else
14224+ return 0;
14225+#endif
14226+}
14227+
14228+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14229+{
14230+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
14231+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14232+#else
14233+ return 0;
14234+#endif
14235+}
14236+
14237+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14238+{
14239+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
14240+ return sb->s_magic == XENFS_SUPER_MAGIC;
14241+#else
14242+ return 0;
14243+#endif
14244+}
14245+
14246+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14247+{
14248+#ifdef CONFIG_DEBUG_FS
14249+ return sb->s_magic == DEBUGFS_MAGIC;
14250+#else
14251+ return 0;
14252+#endif
14253+}
14254+
14255+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14256+{
14257+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
14258+ return sb->s_magic == NILFS_SUPER_MAGIC;
14259+#else
14260+ return 0;
14261+#endif
14262+}
14263+
4a4d8108
AM
14264+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14265+{
14266+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
14267+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14268+#else
14269+ return 0;
14270+#endif
14271+}
14272+
1308ab2a 14273+/* ---------------------------------------------------------------------- */
14274+/*
14275+ * they can't be an aufs branch.
14276+ */
14277+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14278+{
14279+ return
14280+#ifndef CONFIG_AUFS_BR_RAMFS
14281+ au_test_ramfs(sb) ||
14282+#endif
14283+ au_test_procfs(sb)
14284+ || au_test_sysfs(sb)
14285+ || au_test_configfs(sb)
14286+ || au_test_debugfs(sb)
14287+ || au_test_securityfs(sb)
14288+ || au_test_xenfs(sb)
14289+ || au_test_ecryptfs(sb)
14290+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14291+ || au_test_aufs(sb); /* will be supported in next version */
14292+}
14293+
1308ab2a 14294+static inline int au_test_fs_remote(struct super_block *sb)
14295+{
14296+ return !au_test_tmpfs(sb)
14297+#ifdef CONFIG_AUFS_BR_RAMFS
14298+ && !au_test_ramfs(sb)
14299+#endif
14300+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14301+}
14302+
14303+/* ---------------------------------------------------------------------- */
14304+
14305+/*
14306+ * Note: these functions (below) are created after reading ->getattr() in all
14307+ * filesystems under linux/fs. it means we have to do so in every update...
14308+ */
14309+
14310+/*
14311+ * some filesystems require getattr to refresh the inode attributes before
14312+ * referencing.
14313+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14314+ * and leave the work for d_revalidate()
14315+ */
14316+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14317+{
14318+ return au_test_nfs(sb)
14319+ || au_test_fuse(sb)
1308ab2a 14320+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 14321+ ;
14322+}
14323+
14324+/*
14325+ * filesystems which don't maintain i_size or i_blocks.
14326+ */
14327+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14328+{
14329+ return au_test_xfs(sb)
4a4d8108
AM
14330+ || au_test_btrfs(sb)
14331+ || au_test_ubifs(sb)
14332+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 14333+ /* || au_test_minix(sb) */ /* untested */
14334+ ;
14335+}
14336+
14337+/*
14338+ * filesystems which don't store the correct value in some of their inode
14339+ * attributes.
14340+ */
14341+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14342+{
14343+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 14344+ || au_test_fat(sb)
14345+ || au_test_msdos(sb)
14346+ || au_test_vfat(sb);
1facf9fc 14347+}
14348+
14349+/* they don't check i_nlink in link(2) */
14350+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14351+{
14352+ return au_test_tmpfs(sb)
14353+#ifdef CONFIG_AUFS_BR_RAMFS
14354+ || au_test_ramfs(sb)
14355+#endif
4a4d8108 14356+ || au_test_ubifs(sb)
4a4d8108 14357+ || au_test_hfsplus(sb);
1facf9fc 14358+}
14359+
14360+/*
14361+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14362+ */
14363+static inline int au_test_fs_notime(struct super_block *sb)
14364+{
14365+ return au_test_nfs(sb)
14366+ || au_test_fuse(sb)
dece6358 14367+ || au_test_ubifs(sb)
1facf9fc 14368+ ;
14369+}
14370+
1facf9fc 14371+/* temporary support for i#1 in cramfs */
14372+static inline int au_test_fs_unique_ino(struct inode *inode)
14373+{
14374+ if (au_test_cramfs(inode->i_sb))
14375+ return inode->i_ino != 1;
14376+ return 1;
14377+}
14378+
14379+/* ---------------------------------------------------------------------- */
14380+
14381+/*
14382+ * the filesystem where the xino files placed must support i/o after unlink and
14383+ * maintain i_size and i_blocks.
14384+ */
14385+static inline int au_test_fs_bad_xino(struct super_block *sb)
14386+{
14387+ return au_test_fs_remote(sb)
14388+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 14389+ /* don't want unnecessary work for xino */
14390+ || au_test_aufs(sb)
1308ab2a 14391+ || au_test_ecryptfs(sb)
14392+ || au_test_nilfs(sb);
1facf9fc 14393+}
14394+
14395+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14396+{
14397+ return au_test_tmpfs(sb)
14398+ || au_test_ramfs(sb);
14399+}
14400+
14401+/*
14402+ * test if the @sb is real-readonly.
14403+ */
14404+static inline int au_test_fs_rr(struct super_block *sb)
14405+{
14406+ return au_test_squashfs(sb)
14407+ || au_test_iso9660(sb)
14408+ || au_test_cramfs(sb)
14409+ || au_test_romfs(sb);
14410+}
14411+
b912730e
AM
14412+/*
14413+ * test if the @inode is nfs with 'noacl' option
14414+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14415+ */
14416+static inline int au_test_nfs_noacl(struct inode *inode)
14417+{
14418+ return au_test_nfs(inode->i_sb)
14419+ /* && IS_POSIXACL(inode) */
14420+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14421+}
14422+
1facf9fc 14423+#endif /* __KERNEL__ */
14424+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
14425diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14426--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 14427+++ linux/fs/aufs/hfsnotify.c 2015-09-24 10:47:58.254719746 +0200
c1595e42 14428@@ -0,0 +1,288 @@
1facf9fc 14429+/*
2000de60 14430+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 14431+ *
14432+ * This program, aufs is free software; you can redistribute it and/or modify
14433+ * it under the terms of the GNU General Public License as published by
14434+ * the Free Software Foundation; either version 2 of the License, or
14435+ * (at your option) any later version.
dece6358
AM
14436+ *
14437+ * This program is distributed in the hope that it will be useful,
14438+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14439+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14440+ * GNU General Public License for more details.
14441+ *
14442+ * You should have received a copy of the GNU General Public License
523b37e3 14443+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14444+ */
14445+
14446+/*
4a4d8108 14447+ * fsnotify for the lower directories
1facf9fc 14448+ */
14449+
14450+#include "aufs.h"
14451+
4a4d8108
AM
14452+/* FS_IN_IGNORED is unnecessary */
14453+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14454+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 14455+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 14456+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 14457+
0c5527e5 14458+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 14459+{
0c5527e5
AM
14460+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14461+ hn_mark);
4a4d8108 14462+ AuDbg("here\n");
7eafdf33 14463+ au_cache_free_hnotify(hn);
076b876e 14464+ smp_mb__before_atomic();
1716fcea
AM
14465+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14466+ wake_up(&au_hfsn_wq);
4a4d8108 14467+}
1facf9fc 14468+
027c5e7a 14469+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 14470+{
1716fcea 14471+ int err;
027c5e7a
AM
14472+ struct au_hnotify *hn;
14473+ struct super_block *sb;
14474+ struct au_branch *br;
0c5527e5 14475+ struct fsnotify_mark *mark;
027c5e7a 14476+ aufs_bindex_t bindex;
1facf9fc 14477+
027c5e7a
AM
14478+ hn = hinode->hi_notify;
14479+ sb = hn->hn_aufs_inode->i_sb;
14480+ bindex = au_br_index(sb, hinode->hi_id);
14481+ br = au_sbr(sb, bindex);
1716fcea
AM
14482+ AuDebugOn(!br->br_hfsn);
14483+
0c5527e5
AM
14484+ mark = &hn->hn_mark;
14485+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14486+ mark->mask = AuHfsnMask;
7f207e10
AM
14487+ /*
14488+ * by udba rename or rmdir, aufs assign a new inode to the known
14489+ * h_inode, so specify 1 to allow dups.
14490+ */
c1595e42 14491+ lockdep_off();
1716fcea 14492+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 14493+ /*mnt*/NULL, /*allow_dups*/1);
1716fcea
AM
14494+ /* even if err */
14495+ fsnotify_put_mark(mark);
c1595e42 14496+ lockdep_on();
1716fcea
AM
14497+
14498+ return err;
1facf9fc 14499+}
14500+
7eafdf33 14501+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 14502+{
0c5527e5 14503+ struct fsnotify_mark *mark;
7eafdf33 14504+ unsigned long long ull;
1716fcea 14505+ struct fsnotify_group *group;
7eafdf33
AM
14506+
14507+ ull = atomic64_inc_return(&au_hfsn_ifree);
14508+ BUG_ON(!ull);
953406b4 14509+
0c5527e5 14510+ mark = &hn->hn_mark;
1716fcea
AM
14511+ spin_lock(&mark->lock);
14512+ group = mark->group;
14513+ fsnotify_get_group(group);
14514+ spin_unlock(&mark->lock);
c1595e42 14515+ lockdep_off();
1716fcea
AM
14516+ fsnotify_destroy_mark(mark, group);
14517+ fsnotify_put_group(group);
c1595e42 14518+ lockdep_on();
7f207e10 14519+
7eafdf33
AM
14520+ /* free hn by myself */
14521+ return 0;
1facf9fc 14522+}
14523+
14524+/* ---------------------------------------------------------------------- */
14525+
4a4d8108 14526+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 14527+{
0c5527e5 14528+ struct fsnotify_mark *mark;
1facf9fc 14529+
0c5527e5
AM
14530+ mark = &hinode->hi_notify->hn_mark;
14531+ spin_lock(&mark->lock);
1facf9fc 14532+ if (do_set) {
0c5527e5
AM
14533+ AuDebugOn(mark->mask & AuHfsnMask);
14534+ mark->mask |= AuHfsnMask;
1facf9fc 14535+ } else {
0c5527e5
AM
14536+ AuDebugOn(!(mark->mask & AuHfsnMask));
14537+ mark->mask &= ~AuHfsnMask;
1facf9fc 14538+ }
0c5527e5 14539+ spin_unlock(&mark->lock);
4a4d8108 14540+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 14541+}
14542+
4a4d8108 14543+/* ---------------------------------------------------------------------- */
1facf9fc 14544+
4a4d8108
AM
14545+/* #define AuDbgHnotify */
14546+#ifdef AuDbgHnotify
14547+static char *au_hfsn_name(u32 mask)
14548+{
14549+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
14550+#define test_ret(flag) \
14551+ do { \
14552+ if (mask & flag) \
14553+ return #flag; \
14554+ } while (0)
4a4d8108
AM
14555+ test_ret(FS_ACCESS);
14556+ test_ret(FS_MODIFY);
14557+ test_ret(FS_ATTRIB);
14558+ test_ret(FS_CLOSE_WRITE);
14559+ test_ret(FS_CLOSE_NOWRITE);
14560+ test_ret(FS_OPEN);
14561+ test_ret(FS_MOVED_FROM);
14562+ test_ret(FS_MOVED_TO);
14563+ test_ret(FS_CREATE);
14564+ test_ret(FS_DELETE);
14565+ test_ret(FS_DELETE_SELF);
14566+ test_ret(FS_MOVE_SELF);
14567+ test_ret(FS_UNMOUNT);
14568+ test_ret(FS_Q_OVERFLOW);
14569+ test_ret(FS_IN_IGNORED);
b912730e 14570+ test_ret(FS_ISDIR);
4a4d8108
AM
14571+ test_ret(FS_IN_ONESHOT);
14572+ test_ret(FS_EVENT_ON_CHILD);
14573+ return "";
14574+#undef test_ret
14575+#else
14576+ return "??";
14577+#endif
1facf9fc 14578+}
4a4d8108 14579+#endif
1facf9fc 14580+
14581+/* ---------------------------------------------------------------------- */
14582+
1716fcea
AM
14583+static void au_hfsn_free_group(struct fsnotify_group *group)
14584+{
14585+ struct au_br_hfsnotify *hfsn = group->private;
14586+
14587+ AuDbg("here\n");
14588+ kfree(hfsn);
14589+}
14590+
4a4d8108 14591+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 14592+ struct inode *inode,
0c5527e5
AM
14593+ struct fsnotify_mark *inode_mark,
14594+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
14595+ u32 mask, void *data, int data_type,
14596+ const unsigned char *file_name, u32 cookie)
1facf9fc 14597+{
14598+ int err;
4a4d8108
AM
14599+ struct au_hnotify *hnotify;
14600+ struct inode *h_dir, *h_inode;
fb47a38f 14601+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 14602+
fb47a38f 14603+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 14604+
14605+ err = 0;
0c5527e5 14606+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 14607+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 14608+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 14609+ goto out;
1facf9fc 14610+
fb47a38f
JR
14611+ h_dir = inode;
14612+ h_inode = NULL;
4a4d8108 14613+#ifdef AuDbgHnotify
392086de 14614+ au_debug_on();
4a4d8108
AM
14615+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14616+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14617+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14618+ h_dir->i_ino, mask, au_hfsn_name(mask),
14619+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14620+ /* WARN_ON(1); */
1facf9fc 14621+ }
392086de 14622+ au_debug_off();
1facf9fc 14623+#endif
4a4d8108 14624+
0c5527e5
AM
14625+ AuDebugOn(!inode_mark);
14626+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14627+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 14628+
4a4d8108
AM
14629+out:
14630+ return err;
14631+}
1facf9fc 14632+
4a4d8108 14633+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
14634+ .handle_event = au_hfsn_handle_event,
14635+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
14636+};
14637+
14638+/* ---------------------------------------------------------------------- */
14639+
027c5e7a
AM
14640+static void au_hfsn_fin_br(struct au_branch *br)
14641+{
1716fcea 14642+ struct au_br_hfsnotify *hfsn;
027c5e7a 14643+
1716fcea 14644+ hfsn = br->br_hfsn;
c1595e42
JR
14645+ if (hfsn) {
14646+ lockdep_off();
1716fcea 14647+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
14648+ lockdep_on();
14649+ }
027c5e7a
AM
14650+}
14651+
1716fcea 14652+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
14653+{
14654+ int err;
1716fcea
AM
14655+ struct fsnotify_group *group;
14656+ struct au_br_hfsnotify *hfsn;
1facf9fc 14657+
4a4d8108 14658+ err = 0;
1716fcea
AM
14659+ br->br_hfsn = NULL;
14660+ if (!au_br_hnotifyable(perm))
027c5e7a 14661+ goto out;
027c5e7a 14662+
1716fcea
AM
14663+ err = -ENOMEM;
14664+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14665+ if (unlikely(!hfsn))
027c5e7a
AM
14666+ goto out;
14667+
1716fcea
AM
14668+ err = 0;
14669+ group = fsnotify_alloc_group(&au_hfsn_ops);
14670+ if (IS_ERR(group)) {
14671+ err = PTR_ERR(group);
0c5527e5 14672+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 14673+ goto out_hfsn;
4a4d8108 14674+ }
1facf9fc 14675+
1716fcea
AM
14676+ group->private = hfsn;
14677+ hfsn->hfsn_group = group;
14678+ br->br_hfsn = hfsn;
14679+ goto out; /* success */
14680+
14681+out_hfsn:
14682+ kfree(hfsn);
027c5e7a 14683+out:
1716fcea
AM
14684+ return err;
14685+}
14686+
14687+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
14688+{
14689+ int err;
14690+
14691+ err = 0;
14692+ if (!br->br_hfsn)
14693+ err = au_hfsn_init_br(br, perm);
14694+
1facf9fc 14695+ return err;
14696+}
14697+
7eafdf33
AM
14698+/* ---------------------------------------------------------------------- */
14699+
14700+static void au_hfsn_fin(void)
14701+{
14702+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
14703+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
14704+}
14705+
4a4d8108
AM
14706+const struct au_hnotify_op au_hnotify_op = {
14707+ .ctl = au_hfsn_ctl,
14708+ .alloc = au_hfsn_alloc,
14709+ .free = au_hfsn_free,
1facf9fc 14710+
7eafdf33
AM
14711+ .fin = au_hfsn_fin,
14712+
027c5e7a
AM
14713+ .reset_br = au_hfsn_reset_br,
14714+ .fin_br = au_hfsn_fin_br,
14715+ .init_br = au_hfsn_init_br
4a4d8108 14716+};
7f207e10
AM
14717diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
14718--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 14719+++ linux/fs/aufs/hfsplus.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 14720@@ -0,0 +1,56 @@
4a4d8108 14721+/*
2000de60 14722+ * Copyright (C) 2010-2015 Junjiro R. Okajima
4a4d8108
AM
14723+ *
14724+ * This program, aufs is free software; you can redistribute it and/or modify
14725+ * it under the terms of the GNU General Public License as published by
14726+ * the Free Software Foundation; either version 2 of the License, or
14727+ * (at your option) any later version.
14728+ *
14729+ * This program is distributed in the hope that it will be useful,
14730+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14731+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14732+ * GNU General Public License for more details.
14733+ *
14734+ * You should have received a copy of the GNU General Public License
523b37e3 14735+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 14736+ */
1facf9fc 14737+
4a4d8108
AM
14738+/*
14739+ * special support for filesystems which aqucires an inode mutex
14740+ * at final closing a file, eg, hfsplus.
14741+ *
14742+ * This trick is very simple and stupid, just to open the file before really
14743+ * neceeary open to tell hfsplus that this is not the final closing.
14744+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
14745+ * and au_h_open_post() after releasing it.
14746+ */
1facf9fc 14747+
4a4d8108 14748+#include "aufs.h"
1facf9fc 14749+
392086de
AM
14750+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
14751+ int force_wr)
4a4d8108
AM
14752+{
14753+ struct file *h_file;
14754+ struct dentry *h_dentry;
1facf9fc 14755+
4a4d8108
AM
14756+ h_dentry = au_h_dptr(dentry, bindex);
14757+ AuDebugOn(!h_dentry);
5527c038 14758+ AuDebugOn(d_is_negative(h_dentry));
4a4d8108
AM
14759+
14760+ h_file = NULL;
14761+ if (au_test_hfsplus(h_dentry->d_sb)
7e9cd9fe 14762+ && d_is_reg(h_dentry))
4a4d8108
AM
14763+ h_file = au_h_open(dentry, bindex,
14764+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 14765+ /*file*/NULL, force_wr);
4a4d8108 14766+ return h_file;
1facf9fc 14767+}
14768+
4a4d8108
AM
14769+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
14770+ struct file *h_file)
14771+{
14772+ if (h_file) {
14773+ fput(h_file);
14774+ au_sbr_put(dentry->d_sb, bindex);
14775+ }
14776+}
7f207e10
AM
14777diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
14778--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 14779+++ linux/fs/aufs/hnotify.c 2015-09-24 10:47:58.254719746 +0200
5527c038 14780@@ -0,0 +1,710 @@
e49829fe 14781+/*
2000de60 14782+ * Copyright (C) 2005-2015 Junjiro R. Okajima
e49829fe
JR
14783+ *
14784+ * This program, aufs is free software; you can redistribute it and/or modify
14785+ * it under the terms of the GNU General Public License as published by
14786+ * the Free Software Foundation; either version 2 of the License, or
14787+ * (at your option) any later version.
14788+ *
14789+ * This program is distributed in the hope that it will be useful,
14790+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14791+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14792+ * GNU General Public License for more details.
14793+ *
14794+ * You should have received a copy of the GNU General Public License
523b37e3 14795+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
14796+ */
14797+
14798+/*
7f207e10 14799+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
14800+ */
14801+
14802+#include "aufs.h"
14803+
027c5e7a 14804+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
14805+{
14806+ int err;
7f207e10 14807+ struct au_hnotify *hn;
1facf9fc 14808+
4a4d8108
AM
14809+ err = -ENOMEM;
14810+ hn = au_cache_alloc_hnotify();
14811+ if (hn) {
14812+ hn->hn_aufs_inode = inode;
027c5e7a
AM
14813+ hinode->hi_notify = hn;
14814+ err = au_hnotify_op.alloc(hinode);
14815+ AuTraceErr(err);
14816+ if (unlikely(err)) {
14817+ hinode->hi_notify = NULL;
4a4d8108
AM
14818+ au_cache_free_hnotify(hn);
14819+ /*
14820+ * The upper dir was removed by udba, but the same named
14821+ * dir left. In this case, aufs assignes a new inode
14822+ * number and set the monitor again.
14823+ * For the lower dir, the old monitnor is still left.
14824+ */
14825+ if (err == -EEXIST)
14826+ err = 0;
14827+ }
1308ab2a 14828+ }
1308ab2a 14829+
027c5e7a 14830+ AuTraceErr(err);
1308ab2a 14831+ return err;
dece6358 14832+}
1facf9fc 14833+
4a4d8108 14834+void au_hn_free(struct au_hinode *hinode)
dece6358 14835+{
4a4d8108 14836+ struct au_hnotify *hn;
1facf9fc 14837+
4a4d8108
AM
14838+ hn = hinode->hi_notify;
14839+ if (hn) {
4a4d8108 14840+ hinode->hi_notify = NULL;
7eafdf33
AM
14841+ if (au_hnotify_op.free(hinode, hn))
14842+ au_cache_free_hnotify(hn);
4a4d8108
AM
14843+ }
14844+}
dece6358 14845+
4a4d8108 14846+/* ---------------------------------------------------------------------- */
dece6358 14847+
4a4d8108
AM
14848+void au_hn_ctl(struct au_hinode *hinode, int do_set)
14849+{
14850+ if (hinode->hi_notify)
14851+ au_hnotify_op.ctl(hinode, do_set);
14852+}
14853+
14854+void au_hn_reset(struct inode *inode, unsigned int flags)
14855+{
14856+ aufs_bindex_t bindex, bend;
14857+ struct inode *hi;
14858+ struct dentry *iwhdentry;
1facf9fc 14859+
1308ab2a 14860+ bend = au_ibend(inode);
4a4d8108
AM
14861+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14862+ hi = au_h_iptr(inode, bindex);
14863+ if (!hi)
14864+ continue;
1308ab2a 14865+
4a4d8108
AM
14866+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
14867+ iwhdentry = au_hi_wh(inode, bindex);
14868+ if (iwhdentry)
14869+ dget(iwhdentry);
14870+ au_igrab(hi);
14871+ au_set_h_iptr(inode, bindex, NULL, 0);
14872+ au_set_h_iptr(inode, bindex, au_igrab(hi),
14873+ flags & ~AuHi_XINO);
14874+ iput(hi);
14875+ dput(iwhdentry);
14876+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 14877+ }
1facf9fc 14878+}
14879+
1308ab2a 14880+/* ---------------------------------------------------------------------- */
1facf9fc 14881+
4a4d8108 14882+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 14883+{
4a4d8108
AM
14884+ int err;
14885+ aufs_bindex_t bindex, bend, bfound, bstart;
14886+ struct inode *h_i;
1facf9fc 14887+
4a4d8108
AM
14888+ err = 0;
14889+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 14890+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
14891+ goto out;
14892+ }
1facf9fc 14893+
4a4d8108
AM
14894+ bfound = -1;
14895+ bend = au_ibend(inode);
14896+ bstart = au_ibstart(inode);
14897+#if 0 /* reserved for future use */
14898+ if (bindex == bend) {
14899+ /* keep this ino in rename case */
14900+ goto out;
14901+ }
14902+#endif
14903+ for (bindex = bstart; bindex <= bend; bindex++)
14904+ if (au_h_iptr(inode, bindex) == h_inode) {
14905+ bfound = bindex;
14906+ break;
14907+ }
14908+ if (bfound < 0)
1308ab2a 14909+ goto out;
1facf9fc 14910+
4a4d8108
AM
14911+ for (bindex = bstart; bindex <= bend; bindex++) {
14912+ h_i = au_h_iptr(inode, bindex);
14913+ if (!h_i)
14914+ continue;
1facf9fc 14915+
4a4d8108
AM
14916+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
14917+ /* ignore this error */
14918+ /* bad action? */
1facf9fc 14919+ }
1facf9fc 14920+
4a4d8108 14921+ /* children inode number will be broken */
1facf9fc 14922+
4f0767ce 14923+out:
4a4d8108
AM
14924+ AuTraceErr(err);
14925+ return err;
1facf9fc 14926+}
14927+
4a4d8108 14928+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 14929+{
4a4d8108
AM
14930+ int err, i, j, ndentry;
14931+ struct au_dcsub_pages dpages;
14932+ struct au_dpage *dpage;
14933+ struct dentry **dentries;
1facf9fc 14934+
4a4d8108
AM
14935+ err = au_dpages_init(&dpages, GFP_NOFS);
14936+ if (unlikely(err))
14937+ goto out;
14938+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
14939+ if (unlikely(err))
14940+ goto out_dpages;
1facf9fc 14941+
4a4d8108
AM
14942+ for (i = 0; i < dpages.ndpage; i++) {
14943+ dpage = dpages.dpages + i;
14944+ dentries = dpage->dentries;
14945+ ndentry = dpage->ndentry;
14946+ for (j = 0; j < ndentry; j++) {
14947+ struct dentry *d;
14948+
14949+ d = dentries[j];
14950+ if (IS_ROOT(d))
14951+ continue;
14952+
4a4d8108 14953+ au_digen_dec(d);
5527c038 14954+ if (d_really_is_positive(d))
4a4d8108
AM
14955+ /* todo: reset children xino?
14956+ cached children only? */
5527c038 14957+ au_iigen_dec(d_inode(d));
1308ab2a 14958+ }
dece6358 14959+ }
1facf9fc 14960+
4f0767ce 14961+out_dpages:
4a4d8108 14962+ au_dpages_free(&dpages);
dece6358 14963+
027c5e7a 14964+#if 0
4a4d8108
AM
14965+ /* discard children */
14966+ dentry_unhash(dentry);
14967+ dput(dentry);
027c5e7a 14968+#endif
4f0767ce 14969+out:
dece6358
AM
14970+ return err;
14971+}
14972+
1308ab2a 14973+/*
4a4d8108 14974+ * return 0 if processed.
1308ab2a 14975+ */
4a4d8108
AM
14976+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
14977+ const unsigned int isdir)
dece6358 14978+{
1308ab2a 14979+ int err;
4a4d8108
AM
14980+ struct dentry *d;
14981+ struct qstr *dname;
1facf9fc 14982+
4a4d8108
AM
14983+ err = 1;
14984+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 14985+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
14986+ err = 0;
14987+ goto out;
14988+ }
dece6358 14989+
4a4d8108
AM
14990+ if (!isdir) {
14991+ AuDebugOn(!name);
14992+ au_iigen_dec(inode);
027c5e7a 14993+ spin_lock(&inode->i_lock);
c1595e42 14994+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 14995+ spin_lock(&d->d_lock);
4a4d8108
AM
14996+ dname = &d->d_name;
14997+ if (dname->len != nlen
027c5e7a
AM
14998+ && memcmp(dname->name, name, nlen)) {
14999+ spin_unlock(&d->d_lock);
4a4d8108 15000+ continue;
027c5e7a 15001+ }
4a4d8108 15002+ err = 0;
4a4d8108
AM
15003+ au_digen_dec(d);
15004+ spin_unlock(&d->d_lock);
15005+ break;
1facf9fc 15006+ }
027c5e7a 15007+ spin_unlock(&inode->i_lock);
1308ab2a 15008+ } else {
027c5e7a 15009+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 15010+ d = d_find_any_alias(inode);
4a4d8108
AM
15011+ if (!d) {
15012+ au_iigen_dec(inode);
15013+ goto out;
15014+ }
1facf9fc 15015+
027c5e7a 15016+ spin_lock(&d->d_lock);
4a4d8108 15017+ dname = &d->d_name;
027c5e7a
AM
15018+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15019+ spin_unlock(&d->d_lock);
4a4d8108 15020+ err = hn_gen_tree(d);
027c5e7a
AM
15021+ spin_lock(&d->d_lock);
15022+ }
15023+ spin_unlock(&d->d_lock);
4a4d8108
AM
15024+ dput(d);
15025+ }
1facf9fc 15026+
4f0767ce 15027+out:
4a4d8108 15028+ AuTraceErr(err);
1308ab2a 15029+ return err;
15030+}
dece6358 15031+
4a4d8108 15032+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 15033+{
4a4d8108 15034+ int err;
1facf9fc 15035+
5527c038 15036+ if (IS_ROOT(dentry)) {
0c3ec466 15037+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15038+ return 0;
15039+ }
1308ab2a 15040+
4a4d8108
AM
15041+ err = 0;
15042+ if (!isdir) {
4a4d8108 15043+ au_digen_dec(dentry);
5527c038
JR
15044+ if (d_really_is_positive(dentry))
15045+ au_iigen_dec(d_inode(dentry));
4a4d8108 15046+ } else {
027c5e7a 15047+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
5527c038 15048+ if (d_really_is_positive(dentry))
4a4d8108
AM
15049+ err = hn_gen_tree(dentry);
15050+ }
15051+
15052+ AuTraceErr(err);
15053+ return err;
1facf9fc 15054+}
15055+
4a4d8108 15056+/* ---------------------------------------------------------------------- */
1facf9fc 15057+
4a4d8108
AM
15058+/* hnotify job flags */
15059+#define AuHnJob_XINO0 1
15060+#define AuHnJob_GEN (1 << 1)
15061+#define AuHnJob_DIRENT (1 << 2)
15062+#define AuHnJob_ISDIR (1 << 3)
15063+#define AuHnJob_TRYXINO0 (1 << 4)
15064+#define AuHnJob_MNTPNT (1 << 5)
15065+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
15066+#define au_fset_hnjob(flags, name) \
15067+ do { (flags) |= AuHnJob_##name; } while (0)
15068+#define au_fclr_hnjob(flags, name) \
15069+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 15070+
4a4d8108
AM
15071+enum {
15072+ AuHn_CHILD,
15073+ AuHn_PARENT,
15074+ AuHnLast
15075+};
1facf9fc 15076+
4a4d8108
AM
15077+struct au_hnotify_args {
15078+ struct inode *h_dir, *dir, *h_child_inode;
15079+ u32 mask;
15080+ unsigned int flags[AuHnLast];
15081+ unsigned int h_child_nlen;
15082+ char h_child_name[];
15083+};
1facf9fc 15084+
4a4d8108
AM
15085+struct hn_job_args {
15086+ unsigned int flags;
15087+ struct inode *inode, *h_inode, *dir, *h_dir;
15088+ struct dentry *dentry;
15089+ char *h_name;
15090+ int h_nlen;
15091+};
1308ab2a 15092+
4a4d8108
AM
15093+static int hn_job(struct hn_job_args *a)
15094+{
15095+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 15096+ int e;
1308ab2a 15097+
4a4d8108
AM
15098+ /* reset xino */
15099+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15100+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 15101+
4a4d8108
AM
15102+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15103+ && a->inode
15104+ && a->h_inode) {
15105+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
38d290e6
JR
15106+ if (!a->h_inode->i_nlink
15107+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108
AM
15108+ hn_xino(a->inode, a->h_inode); /* ignore this error */
15109+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 15110+ }
1facf9fc 15111+
4a4d8108
AM
15112+ /* make the generation obsolete */
15113+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 15114+ e = -1;
4a4d8108 15115+ if (a->inode)
076b876e 15116+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 15117+ isdir);
076b876e 15118+ if (e && a->dentry)
4a4d8108
AM
15119+ hn_gen_by_name(a->dentry, isdir);
15120+ /* ignore this error */
1facf9fc 15121+ }
1facf9fc 15122+
4a4d8108
AM
15123+ /* make dir entries obsolete */
15124+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15125+ struct au_vdir *vdir;
1facf9fc 15126+
4a4d8108
AM
15127+ vdir = au_ivdir(a->inode);
15128+ if (vdir)
15129+ vdir->vd_jiffy = 0;
15130+ /* IMustLock(a->inode); */
15131+ /* a->inode->i_version++; */
15132+ }
1facf9fc 15133+
4a4d8108
AM
15134+ /* can do nothing but warn */
15135+ if (au_ftest_hnjob(a->flags, MNTPNT)
15136+ && a->dentry
15137+ && d_mountpoint(a->dentry))
523b37e3 15138+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 15139+
4a4d8108 15140+ return 0;
1308ab2a 15141+}
1facf9fc 15142+
1308ab2a 15143+/* ---------------------------------------------------------------------- */
1facf9fc 15144+
4a4d8108
AM
15145+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15146+ struct inode *dir)
1308ab2a 15147+{
4a4d8108
AM
15148+ struct dentry *dentry, *d, *parent;
15149+ struct qstr *dname;
1308ab2a 15150+
c1595e42 15151+ parent = d_find_any_alias(dir);
4a4d8108
AM
15152+ if (!parent)
15153+ return NULL;
1308ab2a 15154+
4a4d8108 15155+ dentry = NULL;
027c5e7a 15156+ spin_lock(&parent->d_lock);
c1595e42 15157+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 15158+ /* AuDbg("%pd\n", d); */
027c5e7a 15159+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
15160+ dname = &d->d_name;
15161+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
15162+ goto cont_unlock;
15163+ if (au_di(d))
15164+ au_digen_dec(d);
15165+ else
15166+ goto cont_unlock;
c1595e42 15167+ if (au_dcount(d) > 0) {
027c5e7a 15168+ dentry = dget_dlock(d);
4a4d8108 15169+ spin_unlock(&d->d_lock);
027c5e7a 15170+ break;
dece6358 15171+ }
1facf9fc 15172+
f6b6e03d 15173+cont_unlock:
027c5e7a 15174+ spin_unlock(&d->d_lock);
1308ab2a 15175+ }
027c5e7a 15176+ spin_unlock(&parent->d_lock);
4a4d8108 15177+ dput(parent);
1facf9fc 15178+
4a4d8108
AM
15179+ if (dentry)
15180+ di_write_lock_child(dentry);
1308ab2a 15181+
4a4d8108
AM
15182+ return dentry;
15183+}
dece6358 15184+
4a4d8108
AM
15185+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15186+ aufs_bindex_t bindex, ino_t h_ino)
15187+{
15188+ struct inode *inode;
15189+ ino_t ino;
15190+ int err;
15191+
15192+ inode = NULL;
15193+ err = au_xino_read(sb, bindex, h_ino, &ino);
15194+ if (!err && ino)
15195+ inode = ilookup(sb, ino);
15196+ if (!inode)
15197+ goto out;
15198+
15199+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15200+ pr_warn("wrong root branch\n");
4a4d8108
AM
15201+ iput(inode);
15202+ inode = NULL;
15203+ goto out;
1308ab2a 15204+ }
15205+
4a4d8108 15206+ ii_write_lock_child(inode);
1308ab2a 15207+
4f0767ce 15208+out:
4a4d8108 15209+ return inode;
dece6358
AM
15210+}
15211+
4a4d8108 15212+static void au_hn_bh(void *_args)
1facf9fc 15213+{
4a4d8108
AM
15214+ struct au_hnotify_args *a = _args;
15215+ struct super_block *sb;
15216+ aufs_bindex_t bindex, bend, bfound;
15217+ unsigned char xino, try_iput;
1facf9fc 15218+ int err;
1308ab2a 15219+ struct inode *inode;
4a4d8108
AM
15220+ ino_t h_ino;
15221+ struct hn_job_args args;
15222+ struct dentry *dentry;
15223+ struct au_sbinfo *sbinfo;
1facf9fc 15224+
4a4d8108
AM
15225+ AuDebugOn(!_args);
15226+ AuDebugOn(!a->h_dir);
15227+ AuDebugOn(!a->dir);
15228+ AuDebugOn(!a->mask);
15229+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15230+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15231+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 15232+
4a4d8108
AM
15233+ inode = NULL;
15234+ dentry = NULL;
15235+ /*
15236+ * do not lock a->dir->i_mutex here
15237+ * because of d_revalidate() may cause a deadlock.
15238+ */
15239+ sb = a->dir->i_sb;
15240+ AuDebugOn(!sb);
15241+ sbinfo = au_sbi(sb);
15242+ AuDebugOn(!sbinfo);
7f207e10 15243+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 15244+
4a4d8108
AM
15245+ ii_read_lock_parent(a->dir);
15246+ bfound = -1;
15247+ bend = au_ibend(a->dir);
15248+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
15249+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15250+ bfound = bindex;
15251+ break;
15252+ }
15253+ ii_read_unlock(a->dir);
15254+ if (unlikely(bfound < 0))
15255+ goto out;
1facf9fc 15256+
4a4d8108
AM
15257+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15258+ h_ino = 0;
15259+ if (a->h_child_inode)
15260+ h_ino = a->h_child_inode->i_ino;
1facf9fc 15261+
4a4d8108
AM
15262+ if (a->h_child_nlen
15263+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15264+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15265+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15266+ a->dir);
15267+ try_iput = 0;
5527c038
JR
15268+ if (dentry && d_really_is_positive(dentry))
15269+ inode = d_inode(dentry);
4a4d8108
AM
15270+ if (xino && !inode && h_ino
15271+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15272+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15273+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15274+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15275+ try_iput = 1;
15276+ }
1facf9fc 15277+
4a4d8108
AM
15278+ args.flags = a->flags[AuHn_CHILD];
15279+ args.dentry = dentry;
15280+ args.inode = inode;
15281+ args.h_inode = a->h_child_inode;
15282+ args.dir = a->dir;
15283+ args.h_dir = a->h_dir;
15284+ args.h_name = a->h_child_name;
15285+ args.h_nlen = a->h_child_nlen;
15286+ err = hn_job(&args);
15287+ if (dentry) {
027c5e7a 15288+ if (au_di(dentry))
4a4d8108
AM
15289+ di_write_unlock(dentry);
15290+ dput(dentry);
15291+ }
15292+ if (inode && try_iput) {
15293+ ii_write_unlock(inode);
15294+ iput(inode);
15295+ }
1facf9fc 15296+
4a4d8108
AM
15297+ ii_write_lock_parent(a->dir);
15298+ args.flags = a->flags[AuHn_PARENT];
15299+ args.dentry = NULL;
15300+ args.inode = a->dir;
15301+ args.h_inode = a->h_dir;
15302+ args.dir = NULL;
15303+ args.h_dir = NULL;
15304+ args.h_name = NULL;
15305+ args.h_nlen = 0;
15306+ err = hn_job(&args);
15307+ ii_write_unlock(a->dir);
1facf9fc 15308+
4f0767ce 15309+out:
4a4d8108
AM
15310+ iput(a->h_child_inode);
15311+ iput(a->h_dir);
15312+ iput(a->dir);
027c5e7a
AM
15313+ si_write_unlock(sb);
15314+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 15315+ kfree(a);
dece6358 15316+}
1facf9fc 15317+
4a4d8108
AM
15318+/* ---------------------------------------------------------------------- */
15319+
15320+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15321+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 15322+{
4a4d8108 15323+ int err, len;
53392da6 15324+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
15325+ unsigned char isdir, isroot, wh;
15326+ struct inode *dir;
15327+ struct au_hnotify_args *args;
15328+ char *p, *h_child_name;
dece6358 15329+
1308ab2a 15330+ err = 0;
4a4d8108
AM
15331+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15332+ dir = igrab(hnotify->hn_aufs_inode);
15333+ if (!dir)
15334+ goto out;
1facf9fc 15335+
4a4d8108
AM
15336+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15337+ wh = 0;
15338+ h_child_name = (void *)h_child_qstr->name;
15339+ len = h_child_qstr->len;
15340+ if (h_child_name) {
15341+ if (len > AUFS_WH_PFX_LEN
15342+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15343+ h_child_name += AUFS_WH_PFX_LEN;
15344+ len -= AUFS_WH_PFX_LEN;
15345+ wh = 1;
15346+ }
1facf9fc 15347+ }
dece6358 15348+
4a4d8108
AM
15349+ isdir = 0;
15350+ if (h_child_inode)
15351+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15352+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15353+ flags[AuHn_CHILD] = 0;
15354+ if (isdir)
15355+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15356+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15357+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15358+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15359+ case FS_MOVED_FROM:
15360+ case FS_MOVED_TO:
15361+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15362+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15363+ /*FALLTHROUGH*/
15364+ case FS_CREATE:
fb47a38f 15365+ AuDebugOn(!h_child_name);
4a4d8108 15366+ break;
1facf9fc 15367+
4a4d8108
AM
15368+ case FS_DELETE:
15369+ /*
15370+ * aufs never be able to get this child inode.
15371+ * revalidation should be in d_revalidate()
15372+ * by checking i_nlink, i_generation or d_unhashed().
15373+ */
15374+ AuDebugOn(!h_child_name);
15375+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15376+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15377+ break;
dece6358 15378+
4a4d8108
AM
15379+ default:
15380+ AuDebugOn(1);
15381+ }
1308ab2a 15382+
4a4d8108
AM
15383+ if (wh)
15384+ h_child_inode = NULL;
1308ab2a 15385+
4a4d8108
AM
15386+ err = -ENOMEM;
15387+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 15388+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
15389+ if (unlikely(!args)) {
15390+ AuErr1("no memory\n");
15391+ iput(dir);
15392+ goto out;
15393+ }
15394+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15395+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15396+ args->mask = mask;
15397+ args->dir = dir;
15398+ args->h_dir = igrab(h_dir);
15399+ if (h_child_inode)
15400+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15401+ args->h_child_inode = h_child_inode;
15402+ args->h_child_nlen = len;
15403+ if (len) {
15404+ p = (void *)args;
15405+ p += sizeof(*args);
15406+ memcpy(p, h_child_name, len);
15407+ p[len] = 0;
1308ab2a 15408+ }
1308ab2a 15409+
38d290e6 15410+ /* NFS fires the event for silly-renamed one from kworker */
53392da6 15411+ f = 0;
38d290e6
JR
15412+ if (!dir->i_nlink
15413+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
53392da6
AM
15414+ f = AuWkq_NEST;
15415+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
15416+ if (unlikely(err)) {
15417+ pr_err("wkq %d\n", err);
15418+ iput(args->h_child_inode);
15419+ iput(args->h_dir);
15420+ iput(args->dir);
15421+ kfree(args);
1facf9fc 15422+ }
1facf9fc 15423+
4a4d8108 15424+out:
1facf9fc 15425+ return err;
15426+}
15427+
027c5e7a
AM
15428+/* ---------------------------------------------------------------------- */
15429+
15430+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15431+{
15432+ int err;
15433+
15434+ AuDebugOn(!(udba & AuOptMask_UDBA));
15435+
15436+ err = 0;
15437+ if (au_hnotify_op.reset_br)
15438+ err = au_hnotify_op.reset_br(udba, br, perm);
15439+
15440+ return err;
15441+}
15442+
15443+int au_hnotify_init_br(struct au_branch *br, int perm)
15444+{
15445+ int err;
15446+
15447+ err = 0;
15448+ if (au_hnotify_op.init_br)
15449+ err = au_hnotify_op.init_br(br, perm);
15450+
15451+ return err;
15452+}
15453+
15454+void au_hnotify_fin_br(struct au_branch *br)
15455+{
15456+ if (au_hnotify_op.fin_br)
15457+ au_hnotify_op.fin_br(br);
15458+}
15459+
4a4d8108
AM
15460+static void au_hn_destroy_cache(void)
15461+{
15462+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
15463+ au_cachep[AuCache_HNOTIFY] = NULL;
15464+}
1308ab2a 15465+
4a4d8108 15466+int __init au_hnotify_init(void)
1facf9fc 15467+{
1308ab2a 15468+ int err;
1308ab2a 15469+
4a4d8108
AM
15470+ err = -ENOMEM;
15471+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
15472+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
15473+ err = 0;
15474+ if (au_hnotify_op.init)
15475+ err = au_hnotify_op.init();
4a4d8108
AM
15476+ if (unlikely(err))
15477+ au_hn_destroy_cache();
1308ab2a 15478+ }
1308ab2a 15479+ AuTraceErr(err);
4a4d8108 15480+ return err;
1308ab2a 15481+}
15482+
4a4d8108 15483+void au_hnotify_fin(void)
1308ab2a 15484+{
027c5e7a
AM
15485+ if (au_hnotify_op.fin)
15486+ au_hnotify_op.fin();
4a4d8108
AM
15487+ /* cf. au_cache_fin() */
15488+ if (au_cachep[AuCache_HNOTIFY])
15489+ au_hn_destroy_cache();
dece6358 15490+}
7f207e10
AM
15491diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15492--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 15493+++ linux/fs/aufs/iinfo.c 2015-09-24 10:47:58.254719746 +0200
38d290e6 15494@@ -0,0 +1,277 @@
dece6358 15495+/*
2000de60 15496+ * Copyright (C) 2005-2015 Junjiro R. Okajima
dece6358
AM
15497+ *
15498+ * This program, aufs is free software; you can redistribute it and/or modify
15499+ * it under the terms of the GNU General Public License as published by
15500+ * the Free Software Foundation; either version 2 of the License, or
15501+ * (at your option) any later version.
15502+ *
15503+ * This program is distributed in the hope that it will be useful,
15504+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15505+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15506+ * GNU General Public License for more details.
15507+ *
15508+ * You should have received a copy of the GNU General Public License
523b37e3 15509+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15510+ */
1facf9fc 15511+
dece6358 15512+/*
4a4d8108 15513+ * inode private data
dece6358 15514+ */
1facf9fc 15515+
1308ab2a 15516+#include "aufs.h"
1facf9fc 15517+
4a4d8108 15518+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 15519+{
4a4d8108 15520+ struct inode *h_inode;
1facf9fc 15521+
4a4d8108 15522+ IiMustAnyLock(inode);
1facf9fc 15523+
4a4d8108
AM
15524+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
15525+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15526+ return h_inode;
15527+}
1facf9fc 15528+
4a4d8108
AM
15529+/* todo: hard/soft set? */
15530+void au_hiput(struct au_hinode *hinode)
15531+{
15532+ au_hn_free(hinode);
15533+ dput(hinode->hi_whdentry);
15534+ iput(hinode->hi_inode);
15535+}
1facf9fc 15536+
4a4d8108
AM
15537+unsigned int au_hi_flags(struct inode *inode, int isdir)
15538+{
15539+ unsigned int flags;
15540+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 15541+
4a4d8108
AM
15542+ flags = 0;
15543+ if (au_opt_test(mnt_flags, XINO))
15544+ au_fset_hi(flags, XINO);
15545+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15546+ au_fset_hi(flags, HNOTIFY);
15547+ return flags;
1facf9fc 15548+}
15549+
4a4d8108
AM
15550+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15551+ struct inode *h_inode, unsigned int flags)
1308ab2a 15552+{
4a4d8108
AM
15553+ struct au_hinode *hinode;
15554+ struct inode *hi;
15555+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 15556+
4a4d8108 15557+ IiMustWriteLock(inode);
dece6358 15558+
4a4d8108
AM
15559+ hinode = iinfo->ii_hinode + bindex;
15560+ hi = hinode->hi_inode;
15561+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15562+
15563+ if (hi)
15564+ au_hiput(hinode);
15565+ hinode->hi_inode = h_inode;
15566+ if (h_inode) {
15567+ int err;
15568+ struct super_block *sb = inode->i_sb;
15569+ struct au_branch *br;
15570+
027c5e7a
AM
15571+ AuDebugOn(inode->i_mode
15572+ && (h_inode->i_mode & S_IFMT)
15573+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
15574+ if (bindex == iinfo->ii_bstart)
15575+ au_cpup_igen(inode, h_inode);
15576+ br = au_sbr(sb, bindex);
15577+ hinode->hi_id = br->br_id;
15578+ if (au_ftest_hi(flags, XINO)) {
15579+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15580+ inode->i_ino);
15581+ if (unlikely(err))
15582+ AuIOErr1("failed au_xino_write() %d\n", err);
15583+ }
15584+
15585+ if (au_ftest_hi(flags, HNOTIFY)
15586+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 15587+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
15588+ if (unlikely(err))
15589+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 15590+ }
15591+ }
4a4d8108 15592+}
dece6358 15593+
4a4d8108
AM
15594+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15595+ struct dentry *h_wh)
15596+{
15597+ struct au_hinode *hinode;
dece6358 15598+
4a4d8108
AM
15599+ IiMustWriteLock(inode);
15600+
15601+ hinode = au_ii(inode)->ii_hinode + bindex;
15602+ AuDebugOn(hinode->hi_whdentry);
15603+ hinode->hi_whdentry = h_wh;
1facf9fc 15604+}
15605+
537831f9 15606+void au_update_iigen(struct inode *inode, int half)
1308ab2a 15607+{
537831f9
AM
15608+ struct au_iinfo *iinfo;
15609+ struct au_iigen *iigen;
15610+ unsigned int sigen;
15611+
15612+ sigen = au_sigen(inode->i_sb);
15613+ iinfo = au_ii(inode);
15614+ iigen = &iinfo->ii_generation;
15615+ spin_lock(&iinfo->ii_genspin);
15616+ iigen->ig_generation = sigen;
15617+ if (half)
15618+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15619+ else
15620+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
15621+ spin_unlock(&iinfo->ii_genspin);
4a4d8108 15622+}
1facf9fc 15623+
4a4d8108
AM
15624+/* it may be called at remount time, too */
15625+void au_update_ibrange(struct inode *inode, int do_put_zero)
15626+{
15627+ struct au_iinfo *iinfo;
027c5e7a 15628+ aufs_bindex_t bindex, bend;
1facf9fc 15629+
4a4d8108 15630+ iinfo = au_ii(inode);
027c5e7a 15631+ if (!iinfo)
4a4d8108 15632+ return;
1facf9fc 15633+
4a4d8108 15634+ IiMustWriteLock(inode);
1facf9fc 15635+
027c5e7a 15636+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
15637+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15638+ bindex++) {
15639+ struct inode *h_i;
1facf9fc 15640+
4a4d8108 15641+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
38d290e6
JR
15642+ if (h_i
15643+ && !h_i->i_nlink
15644+ && !(h_i->i_state & I_LINKABLE))
027c5e7a
AM
15645+ au_set_h_iptr(inode, bindex, NULL, 0);
15646+ }
4a4d8108
AM
15647+ }
15648+
027c5e7a
AM
15649+ iinfo->ii_bstart = -1;
15650+ iinfo->ii_bend = -1;
15651+ bend = au_sbend(inode->i_sb);
15652+ for (bindex = 0; bindex <= bend; bindex++)
15653+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15654+ iinfo->ii_bstart = bindex;
4a4d8108 15655+ break;
027c5e7a
AM
15656+ }
15657+ if (iinfo->ii_bstart >= 0)
15658+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
15659+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15660+ iinfo->ii_bend = bindex;
15661+ break;
15662+ }
15663+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 15664+}
1facf9fc 15665+
dece6358 15666+/* ---------------------------------------------------------------------- */
1facf9fc 15667+
4a4d8108 15668+void au_icntnr_init_once(void *_c)
dece6358 15669+{
4a4d8108
AM
15670+ struct au_icntnr *c = _c;
15671+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 15672+ static struct lock_class_key aufs_ii;
1facf9fc 15673+
537831f9 15674+ spin_lock_init(&iinfo->ii_genspin);
4a4d8108 15675+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 15676+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
15677+ inode_init_once(&c->vfs_inode);
15678+}
1facf9fc 15679+
4a4d8108
AM
15680+int au_iinfo_init(struct inode *inode)
15681+{
15682+ struct au_iinfo *iinfo;
15683+ struct super_block *sb;
15684+ int nbr, i;
1facf9fc 15685+
4a4d8108
AM
15686+ sb = inode->i_sb;
15687+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15688+ nbr = au_sbend(sb) + 1;
15689+ if (unlikely(nbr <= 0))
15690+ nbr = 1;
15691+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
15692+ if (iinfo->ii_hinode) {
7f207e10 15693+ au_ninodes_inc(sb);
4a4d8108
AM
15694+ for (i = 0; i < nbr; i++)
15695+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 15696+
537831f9 15697+ iinfo->ii_generation.ig_generation = au_sigen(sb);
4a4d8108
AM
15698+ iinfo->ii_bstart = -1;
15699+ iinfo->ii_bend = -1;
15700+ iinfo->ii_vdir = NULL;
15701+ return 0;
1308ab2a 15702+ }
4a4d8108
AM
15703+ return -ENOMEM;
15704+}
1facf9fc 15705+
4a4d8108
AM
15706+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
15707+{
15708+ int err, sz;
15709+ struct au_hinode *hip;
1facf9fc 15710+
4a4d8108
AM
15711+ AuRwMustWriteLock(&iinfo->ii_rwsem);
15712+
15713+ err = -ENOMEM;
15714+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
15715+ if (!sz)
15716+ sz = sizeof(*hip);
15717+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
15718+ if (hip) {
15719+ iinfo->ii_hinode = hip;
15720+ err = 0;
1308ab2a 15721+ }
4a4d8108 15722+
1308ab2a 15723+ return err;
1facf9fc 15724+}
15725+
4a4d8108 15726+void au_iinfo_fin(struct inode *inode)
1facf9fc 15727+{
4a4d8108
AM
15728+ struct au_iinfo *iinfo;
15729+ struct au_hinode *hi;
15730+ struct super_block *sb;
b752ccd1
AM
15731+ aufs_bindex_t bindex, bend;
15732+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 15733+
4a4d8108
AM
15734+ iinfo = au_ii(inode);
15735+ /* bad_inode case */
15736+ if (!iinfo)
15737+ return;
1308ab2a 15738+
b752ccd1 15739+ sb = inode->i_sb;
7f207e10 15740+ au_ninodes_dec(sb);
b752ccd1
AM
15741+ if (si_pid_test(sb))
15742+ au_xino_delete_inode(inode, unlinked);
15743+ else {
15744+ /*
15745+ * it is safe to hide the dependency between sbinfo and
15746+ * sb->s_umount.
15747+ */
15748+ lockdep_off();
15749+ si_noflush_read_lock(sb);
15750+ au_xino_delete_inode(inode, unlinked);
15751+ si_read_unlock(sb);
15752+ lockdep_on();
15753+ }
15754+
4a4d8108
AM
15755+ if (iinfo->ii_vdir)
15756+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 15757+
b752ccd1
AM
15758+ bindex = iinfo->ii_bstart;
15759+ if (bindex >= 0) {
15760+ hi = iinfo->ii_hinode + bindex;
4a4d8108 15761+ bend = iinfo->ii_bend;
b752ccd1
AM
15762+ while (bindex++ <= bend) {
15763+ if (hi->hi_inode)
4a4d8108 15764+ au_hiput(hi);
4a4d8108
AM
15765+ hi++;
15766+ }
15767+ }
4a4d8108 15768+ kfree(iinfo->ii_hinode);
027c5e7a 15769+ iinfo->ii_hinode = NULL;
4a4d8108 15770+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 15771+}
7f207e10
AM
15772diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
15773--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
15774+++ linux/fs/aufs/inode.c 2015-12-10 17:59:16.836166410 +0100
15775@@ -0,0 +1,528 @@
4a4d8108 15776+/*
2000de60 15777+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
15778+ *
15779+ * This program, aufs is free software; you can redistribute it and/or modify
15780+ * it under the terms of the GNU General Public License as published by
15781+ * the Free Software Foundation; either version 2 of the License, or
15782+ * (at your option) any later version.
15783+ *
15784+ * This program is distributed in the hope that it will be useful,
15785+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15786+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15787+ * GNU General Public License for more details.
15788+ *
15789+ * You should have received a copy of the GNU General Public License
523b37e3 15790+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 15791+ */
1facf9fc 15792+
4a4d8108
AM
15793+/*
15794+ * inode functions
15795+ */
1facf9fc 15796+
4a4d8108 15797+#include "aufs.h"
1308ab2a 15798+
4a4d8108
AM
15799+struct inode *au_igrab(struct inode *inode)
15800+{
15801+ if (inode) {
15802+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 15803+ ihold(inode);
1facf9fc 15804+ }
4a4d8108
AM
15805+ return inode;
15806+}
1facf9fc 15807+
4a4d8108
AM
15808+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
15809+{
15810+ au_cpup_attr_all(inode, /*force*/0);
537831f9 15811+ au_update_iigen(inode, /*half*/1);
4a4d8108
AM
15812+ if (do_version)
15813+ inode->i_version++;
dece6358 15814+}
1facf9fc 15815+
027c5e7a 15816+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 15817+{
4a4d8108 15818+ int err, e;
027c5e7a 15819+ umode_t type;
4a4d8108 15820+ aufs_bindex_t bindex, new_bindex;
1308ab2a 15821+ struct super_block *sb;
4a4d8108 15822+ struct au_iinfo *iinfo;
027c5e7a 15823+ struct au_hinode *p, *q, tmp;
1facf9fc 15824+
4a4d8108 15825+ IiMustWriteLock(inode);
1facf9fc 15826+
027c5e7a 15827+ *update = 0;
4a4d8108 15828+ sb = inode->i_sb;
027c5e7a 15829+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
15830+ iinfo = au_ii(inode);
15831+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
15832+ if (unlikely(err))
1308ab2a 15833+ goto out;
1facf9fc 15834+
027c5e7a 15835+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 15836+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
15837+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15838+ bindex++, p++) {
15839+ if (!p->hi_inode)
15840+ continue;
1facf9fc 15841+
027c5e7a 15842+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
15843+ new_bindex = au_br_index(sb, p->hi_id);
15844+ if (new_bindex == bindex)
15845+ continue;
1facf9fc 15846+
4a4d8108 15847+ if (new_bindex < 0) {
027c5e7a 15848+ *update = 1;
4a4d8108
AM
15849+ au_hiput(p);
15850+ p->hi_inode = NULL;
15851+ continue;
1308ab2a 15852+ }
4a4d8108
AM
15853+
15854+ if (new_bindex < iinfo->ii_bstart)
15855+ iinfo->ii_bstart = new_bindex;
15856+ if (iinfo->ii_bend < new_bindex)
15857+ iinfo->ii_bend = new_bindex;
15858+ /* swap two lower inode, and loop again */
15859+ q = iinfo->ii_hinode + new_bindex;
15860+ tmp = *q;
15861+ *q = *p;
15862+ *p = tmp;
15863+ if (tmp.hi_inode) {
15864+ bindex--;
15865+ p--;
1308ab2a 15866+ }
15867+ }
4a4d8108
AM
15868+ au_update_ibrange(inode, /*do_put_zero*/0);
15869+ e = au_dy_irefresh(inode);
15870+ if (unlikely(e && !err))
15871+ err = e;
1facf9fc 15872+
4f0767ce 15873+out:
027c5e7a
AM
15874+ AuTraceErr(err);
15875+ return err;
15876+}
15877+
ab036dbd
AM
15878+void au_refresh_iop(struct inode *inode, int force_getattr)
15879+{
15880+ int type;
15881+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
15882+ const struct inode_operations *iop
15883+ = force_getattr ? aufs_iop : sbi->si_iop_array;
15884+
15885+ if (inode->i_op == iop)
15886+ return;
15887+
15888+ switch (inode->i_mode & S_IFMT) {
15889+ case S_IFDIR:
15890+ type = AuIop_DIR;
15891+ break;
15892+ case S_IFLNK:
15893+ type = AuIop_SYMLINK;
15894+ break;
15895+ default:
15896+ type = AuIop_OTHER;
15897+ break;
15898+ }
15899+
15900+ inode->i_op = iop + type;
15901+ /* unnecessary smp_wmb() */
15902+}
15903+
027c5e7a
AM
15904+int au_refresh_hinode_self(struct inode *inode)
15905+{
15906+ int err, update;
15907+
15908+ err = au_ii_refresh(inode, &update);
15909+ if (!err)
15910+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
15911+
15912+ AuTraceErr(err);
4a4d8108
AM
15913+ return err;
15914+}
1facf9fc 15915+
4a4d8108
AM
15916+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
15917+{
027c5e7a 15918+ int err, e, update;
4a4d8108 15919+ unsigned int flags;
027c5e7a 15920+ umode_t mode;
4a4d8108 15921+ aufs_bindex_t bindex, bend;
027c5e7a 15922+ unsigned char isdir;
4a4d8108
AM
15923+ struct au_hinode *p;
15924+ struct au_iinfo *iinfo;
1facf9fc 15925+
027c5e7a 15926+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
15927+ if (unlikely(err))
15928+ goto out;
15929+
15930+ update = 0;
15931+ iinfo = au_ii(inode);
15932+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
15933+ mode = (inode->i_mode & S_IFMT);
15934+ isdir = S_ISDIR(mode);
4a4d8108
AM
15935+ flags = au_hi_flags(inode, isdir);
15936+ bend = au_dbend(dentry);
15937+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
5527c038 15938+ struct inode *h_i, *h_inode;
4a4d8108
AM
15939+ struct dentry *h_d;
15940+
15941+ h_d = au_h_dptr(dentry, bindex);
5527c038 15942+ if (!h_d || d_is_negative(h_d))
4a4d8108
AM
15943+ continue;
15944+
5527c038
JR
15945+ h_inode = d_inode(h_d);
15946+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
4a4d8108
AM
15947+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
15948+ h_i = au_h_iptr(inode, bindex);
15949+ if (h_i) {
5527c038 15950+ if (h_i == h_inode)
4a4d8108
AM
15951+ continue;
15952+ err = -EIO;
15953+ break;
15954+ }
15955+ }
15956+ if (bindex < iinfo->ii_bstart)
15957+ iinfo->ii_bstart = bindex;
15958+ if (iinfo->ii_bend < bindex)
15959+ iinfo->ii_bend = bindex;
5527c038 15960+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
4a4d8108 15961+ update = 1;
1308ab2a 15962+ }
4a4d8108
AM
15963+ au_update_ibrange(inode, /*do_put_zero*/0);
15964+ e = au_dy_irefresh(inode);
15965+ if (unlikely(e && !err))
15966+ err = e;
027c5e7a
AM
15967+ if (!err)
15968+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 15969+
4f0767ce 15970+out:
4a4d8108 15971+ AuTraceErr(err);
1308ab2a 15972+ return err;
dece6358
AM
15973+}
15974+
4a4d8108 15975+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 15976+{
4a4d8108
AM
15977+ int err;
15978+ unsigned int flags;
15979+ umode_t mode;
15980+ aufs_bindex_t bindex, bstart, btail;
15981+ unsigned char isdir;
15982+ struct dentry *h_dentry;
15983+ struct inode *h_inode;
15984+ struct au_iinfo *iinfo;
ab036dbd 15985+ struct inode_operations *iop;
dece6358 15986+
4a4d8108 15987+ IiMustWriteLock(inode);
dece6358 15988+
4a4d8108
AM
15989+ err = 0;
15990+ isdir = 0;
ab036dbd 15991+ iop = au_sbi(inode->i_sb)->si_iop_array;
4a4d8108 15992+ bstart = au_dbstart(dentry);
5527c038
JR
15993+ h_dentry = au_h_dptr(dentry, bstart);
15994+ h_inode = d_inode(h_dentry);
4a4d8108
AM
15995+ mode = h_inode->i_mode;
15996+ switch (mode & S_IFMT) {
15997+ case S_IFREG:
15998+ btail = au_dbtail(dentry);
ab036dbd 15999+ inode->i_op = iop + AuIop_OTHER;
4a4d8108
AM
16000+ inode->i_fop = &aufs_file_fop;
16001+ err = au_dy_iaop(inode, bstart, h_inode);
16002+ if (unlikely(err))
16003+ goto out;
16004+ break;
16005+ case S_IFDIR:
16006+ isdir = 1;
16007+ btail = au_dbtaildir(dentry);
ab036dbd 16008+ inode->i_op = iop + AuIop_DIR;
4a4d8108
AM
16009+ inode->i_fop = &aufs_dir_fop;
16010+ break;
16011+ case S_IFLNK:
16012+ btail = au_dbtail(dentry);
ab036dbd 16013+ inode->i_op = iop + AuIop_SYMLINK;
4a4d8108
AM
16014+ break;
16015+ case S_IFBLK:
16016+ case S_IFCHR:
16017+ case S_IFIFO:
16018+ case S_IFSOCK:
16019+ btail = au_dbtail(dentry);
ab036dbd 16020+ inode->i_op = iop + AuIop_OTHER;
38d290e6 16021+ init_special_inode(inode, mode, h_inode->i_rdev);
4a4d8108
AM
16022+ break;
16023+ default:
16024+ AuIOErr("Unknown file type 0%o\n", mode);
16025+ err = -EIO;
1308ab2a 16026+ goto out;
4a4d8108 16027+ }
dece6358 16028+
4a4d8108
AM
16029+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16030+ flags = au_hi_flags(inode, isdir);
16031+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16032+ && au_ftest_hi(flags, HNOTIFY)
16033+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16034+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16035+ au_fclr_hi(flags, HNOTIFY);
16036+ iinfo = au_ii(inode);
16037+ iinfo->ii_bstart = bstart;
16038+ iinfo->ii_bend = btail;
16039+ for (bindex = bstart; bindex <= btail; bindex++) {
16040+ h_dentry = au_h_dptr(dentry, bindex);
16041+ if (h_dentry)
16042+ au_set_h_iptr(inode, bindex,
5527c038 16043+ au_igrab(d_inode(h_dentry)), flags);
4a4d8108
AM
16044+ }
16045+ au_cpup_attr_all(inode, /*force*/1);
c1595e42
JR
16046+ /*
16047+ * to force calling aufs_get_acl() every time,
16048+ * do not call cache_no_acl() for aufs inode.
16049+ */
dece6358 16050+
4f0767ce 16051+out:
4a4d8108
AM
16052+ return err;
16053+}
dece6358 16054+
027c5e7a
AM
16055+/*
16056+ * successful returns with iinfo write_locked
16057+ * minus: errno
16058+ * zero: success, matched
16059+ * plus: no error, but unmatched
16060+ */
16061+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
16062+{
16063+ int err;
537831f9
AM
16064+ unsigned int gen;
16065+ struct au_iigen iigen;
4a4d8108
AM
16066+ aufs_bindex_t bindex, bend;
16067+ struct inode *h_inode, *h_dinode;
5527c038 16068+ struct dentry *h_dentry;
dece6358 16069+
4a4d8108
AM
16070+ /*
16071+ * before this function, if aufs got any iinfo lock, it must be only
16072+ * one, the parent dir.
16073+ * it can happen by UDBA and the obsoleted inode number.
16074+ */
16075+ err = -EIO;
16076+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16077+ goto out;
16078+
027c5e7a 16079+ err = 1;
4a4d8108 16080+ ii_write_lock_new_child(inode);
5527c038
JR
16081+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
16082+ h_dinode = d_inode(h_dentry);
4a4d8108
AM
16083+ bend = au_ibend(inode);
16084+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
16085+ h_inode = au_h_iptr(inode, bindex);
537831f9
AM
16086+ if (!h_inode || h_inode != h_dinode)
16087+ continue;
16088+
16089+ err = 0;
16090+ gen = au_iigen(inode, &iigen);
16091+ if (gen == au_digen(dentry)
16092+ && !au_ig_ftest(iigen.ig_flags, HALF_REFRESHED))
4a4d8108 16093+ break;
537831f9
AM
16094+
16095+ /* fully refresh inode using dentry */
16096+ err = au_refresh_hinode(inode, dentry);
16097+ if (!err)
16098+ au_update_iigen(inode, /*half*/0);
16099+ break;
1facf9fc 16100+ }
dece6358 16101+
4a4d8108
AM
16102+ if (unlikely(err))
16103+ ii_write_unlock(inode);
4f0767ce 16104+out:
1facf9fc 16105+ return err;
16106+}
1facf9fc 16107+
4a4d8108
AM
16108+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16109+ unsigned int d_type, ino_t *ino)
1facf9fc 16110+{
4a4d8108
AM
16111+ int err;
16112+ struct mutex *mtx;
1facf9fc 16113+
b752ccd1 16114+ /* prevent hardlinked inode number from race condition */
4a4d8108 16115+ mtx = NULL;
b752ccd1 16116+ if (d_type != DT_DIR) {
4a4d8108
AM
16117+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16118+ mutex_lock(mtx);
16119+ }
16120+ err = au_xino_read(sb, bindex, h_ino, ino);
16121+ if (unlikely(err))
16122+ goto out;
1308ab2a 16123+
4a4d8108
AM
16124+ if (!*ino) {
16125+ err = -EIO;
16126+ *ino = au_xino_new_ino(sb);
16127+ if (unlikely(!*ino))
1facf9fc 16128+ goto out;
4a4d8108
AM
16129+ err = au_xino_write(sb, bindex, h_ino, *ino);
16130+ if (unlikely(err))
1308ab2a 16131+ goto out;
1308ab2a 16132+ }
1facf9fc 16133+
4f0767ce 16134+out:
b752ccd1 16135+ if (mtx)
4a4d8108 16136+ mutex_unlock(mtx);
1facf9fc 16137+ return err;
16138+}
16139+
4a4d8108
AM
16140+/* successful returns with iinfo write_locked */
16141+/* todo: return with unlocked? */
16142+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 16143+{
5527c038 16144+ struct inode *inode, *h_inode;
4a4d8108
AM
16145+ struct dentry *h_dentry;
16146+ struct super_block *sb;
b752ccd1 16147+ struct mutex *mtx;
4a4d8108 16148+ ino_t h_ino, ino;
1716fcea 16149+ int err;
4a4d8108 16150+ aufs_bindex_t bstart;
1facf9fc 16151+
4a4d8108
AM
16152+ sb = dentry->d_sb;
16153+ bstart = au_dbstart(dentry);
16154+ h_dentry = au_h_dptr(dentry, bstart);
5527c038
JR
16155+ h_inode = d_inode(h_dentry);
16156+ h_ino = h_inode->i_ino;
b752ccd1
AM
16157+
16158+ /*
16159+ * stop 'race'-ing between hardlinks under different
16160+ * parents.
16161+ */
16162+ mtx = NULL;
2000de60 16163+ if (!d_is_dir(h_dentry))
b752ccd1
AM
16164+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
16165+
4f0767ce 16166+new_ino:
b752ccd1
AM
16167+ if (mtx)
16168+ mutex_lock(mtx);
4a4d8108
AM
16169+ err = au_xino_read(sb, bstart, h_ino, &ino);
16170+ inode = ERR_PTR(err);
16171+ if (unlikely(err))
16172+ goto out;
b752ccd1 16173+
4a4d8108
AM
16174+ if (!ino) {
16175+ ino = au_xino_new_ino(sb);
16176+ if (unlikely(!ino)) {
16177+ inode = ERR_PTR(-EIO);
dece6358
AM
16178+ goto out;
16179+ }
16180+ }
1facf9fc 16181+
4a4d8108
AM
16182+ AuDbg("i%lu\n", (unsigned long)ino);
16183+ inode = au_iget_locked(sb, ino);
16184+ err = PTR_ERR(inode);
16185+ if (IS_ERR(inode))
1facf9fc 16186+ goto out;
1facf9fc 16187+
4a4d8108
AM
16188+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16189+ if (inode->i_state & I_NEW) {
1716fcea 16190+ /* verbose coding for lock class name */
2000de60 16191+ if (unlikely(d_is_symlink(h_dentry)))
1716fcea
AM
16192+ au_rw_class(&au_ii(inode)->ii_rwsem,
16193+ au_lc_key + AuLcSymlink_IIINFO);
2000de60 16194+ else if (unlikely(d_is_dir(h_dentry)))
1716fcea
AM
16195+ au_rw_class(&au_ii(inode)->ii_rwsem,
16196+ au_lc_key + AuLcDir_IIINFO);
16197+ else /* likely */
16198+ au_rw_class(&au_ii(inode)->ii_rwsem,
16199+ au_lc_key + AuLcNonDir_IIINFO);
2dfbb274 16200+
4a4d8108
AM
16201+ ii_write_lock_new_child(inode);
16202+ err = set_inode(inode, dentry);
16203+ if (!err) {
16204+ unlock_new_inode(inode);
16205+ goto out; /* success */
16206+ }
1308ab2a 16207+
027c5e7a
AM
16208+ /*
16209+ * iget_failed() calls iput(), but we need to call
16210+ * ii_write_unlock() after iget_failed(). so dirty hack for
16211+ * i_count.
16212+ */
16213+ atomic_inc(&inode->i_count);
4a4d8108 16214+ iget_failed(inode);
027c5e7a
AM
16215+ ii_write_unlock(inode);
16216+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
16217+ /* ignore this error */
16218+ goto out_iput;
16219+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
16220+ /*
16221+ * horrible race condition between lookup, readdir and copyup
16222+ * (or something).
16223+ */
16224+ if (mtx)
16225+ mutex_unlock(mtx);
027c5e7a
AM
16226+ err = reval_inode(inode, dentry);
16227+ if (unlikely(err < 0)) {
16228+ mtx = NULL;
16229+ goto out_iput;
16230+ }
16231+
b752ccd1
AM
16232+ if (!err) {
16233+ mtx = NULL;
4a4d8108 16234+ goto out; /* success */
b752ccd1
AM
16235+ } else if (mtx)
16236+ mutex_lock(mtx);
4a4d8108
AM
16237+ }
16238+
5527c038 16239+ if (unlikely(au_test_fs_unique_ino(h_inode)))
4a4d8108 16240+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
523b37e3
AM
16241+ " b%d, %s, %pd, hi%lu, i%lu.\n",
16242+ bstart, au_sbtype(h_dentry->d_sb), dentry,
4a4d8108
AM
16243+ (unsigned long)h_ino, (unsigned long)ino);
16244+ ino = 0;
16245+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
16246+ if (!err) {
16247+ iput(inode);
b752ccd1
AM
16248+ if (mtx)
16249+ mutex_unlock(mtx);
4a4d8108
AM
16250+ goto new_ino;
16251+ }
1308ab2a 16252+
4f0767ce 16253+out_iput:
4a4d8108 16254+ iput(inode);
4a4d8108 16255+ inode = ERR_PTR(err);
4f0767ce 16256+out:
b752ccd1
AM
16257+ if (mtx)
16258+ mutex_unlock(mtx);
4a4d8108 16259+ return inode;
1facf9fc 16260+}
16261+
4a4d8108 16262+/* ---------------------------------------------------------------------- */
1facf9fc 16263+
4a4d8108
AM
16264+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16265+ struct inode *inode)
16266+{
16267+ int err;
076b876e 16268+ struct inode *hi;
1facf9fc 16269+
4a4d8108 16270+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 16271+
4a4d8108
AM
16272+ /* pseudo-link after flushed may happen out of bounds */
16273+ if (!err
16274+ && inode
16275+ && au_ibstart(inode) <= bindex
16276+ && bindex <= au_ibend(inode)) {
16277+ /*
16278+ * permission check is unnecessary since vfsub routine
16279+ * will be called later
16280+ */
076b876e 16281+ hi = au_h_iptr(inode, bindex);
4a4d8108
AM
16282+ if (hi)
16283+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 16284+ }
16285+
4a4d8108
AM
16286+ return err;
16287+}
dece6358 16288+
4a4d8108
AM
16289+int au_test_h_perm(struct inode *h_inode, int mask)
16290+{
2dfbb274 16291+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
4a4d8108
AM
16292+ return 0;
16293+ return inode_permission(h_inode, mask);
16294+}
1facf9fc 16295+
4a4d8108
AM
16296+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16297+{
16298+ if (au_test_nfs(h_inode->i_sb)
16299+ && (mask & MAY_WRITE)
16300+ && S_ISDIR(h_inode->i_mode))
16301+ mask |= MAY_READ; /* force permission check */
16302+ return au_test_h_perm(h_inode, mask);
1facf9fc 16303+}
7f207e10
AM
16304diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16305--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
16306+++ linux/fs/aufs/inode.h 2015-12-10 17:59:16.836166410 +0100
16307@@ -0,0 +1,681 @@
4a4d8108 16308+/*
2000de60 16309+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
16310+ *
16311+ * This program, aufs is free software; you can redistribute it and/or modify
16312+ * it under the terms of the GNU General Public License as published by
16313+ * the Free Software Foundation; either version 2 of the License, or
16314+ * (at your option) any later version.
16315+ *
16316+ * This program is distributed in the hope that it will be useful,
16317+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16318+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16319+ * GNU General Public License for more details.
16320+ *
16321+ * You should have received a copy of the GNU General Public License
523b37e3 16322+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16323+ */
1facf9fc 16324+
1308ab2a 16325+/*
4a4d8108 16326+ * inode operations
1308ab2a 16327+ */
dece6358 16328+
4a4d8108
AM
16329+#ifndef __AUFS_INODE_H__
16330+#define __AUFS_INODE_H__
dece6358 16331+
4a4d8108 16332+#ifdef __KERNEL__
1308ab2a 16333+
4a4d8108 16334+#include <linux/fsnotify.h>
4a4d8108 16335+#include "rwsem.h"
1308ab2a 16336+
4a4d8108 16337+struct vfsmount;
1facf9fc 16338+
4a4d8108
AM
16339+struct au_hnotify {
16340+#ifdef CONFIG_AUFS_HNOTIFY
16341+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 16342+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 16343+ struct fsnotify_mark hn_mark;
4a4d8108 16344+#endif
7f207e10 16345+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
16346+#endif
16347+} ____cacheline_aligned_in_smp;
1facf9fc 16348+
4a4d8108
AM
16349+struct au_hinode {
16350+ struct inode *hi_inode;
16351+ aufs_bindex_t hi_id;
16352+#ifdef CONFIG_AUFS_HNOTIFY
16353+ struct au_hnotify *hi_notify;
16354+#endif
dece6358 16355+
4a4d8108
AM
16356+ /* reference to the copied-up whiteout with get/put */
16357+ struct dentry *hi_whdentry;
16358+};
dece6358 16359+
537831f9
AM
16360+/* ig_flags */
16361+#define AuIG_HALF_REFRESHED 1
16362+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16363+#define au_ig_fset(flags, name) \
16364+ do { (flags) |= AuIG_##name; } while (0)
16365+#define au_ig_fclr(flags, name) \
16366+ do { (flags) &= ~AuIG_##name; } while (0)
16367+
16368+struct au_iigen {
16369+ __u32 ig_generation, ig_flags;
16370+};
16371+
4a4d8108
AM
16372+struct au_vdir;
16373+struct au_iinfo {
537831f9 16374+ spinlock_t ii_genspin;
7a9e40b8 16375+ struct au_iigen ii_generation;
4a4d8108 16376+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 16377+
4a4d8108
AM
16378+ struct au_rwsem ii_rwsem;
16379+ aufs_bindex_t ii_bstart, ii_bend;
16380+ __u32 ii_higen;
16381+ struct au_hinode *ii_hinode;
16382+ struct au_vdir *ii_vdir;
16383+};
1facf9fc 16384+
4a4d8108
AM
16385+struct au_icntnr {
16386+ struct au_iinfo iinfo;
16387+ struct inode vfs_inode;
16388+} ____cacheline_aligned_in_smp;
1308ab2a 16389+
4a4d8108
AM
16390+/* au_pin flags */
16391+#define AuPin_DI_LOCKED 1
16392+#define AuPin_MNT_WRITE (1 << 1)
16393+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
16394+#define au_fset_pin(flags, name) \
16395+ do { (flags) |= AuPin_##name; } while (0)
16396+#define au_fclr_pin(flags, name) \
16397+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
16398+
16399+struct au_pin {
16400+ /* input */
16401+ struct dentry *dentry;
16402+ unsigned int udba;
16403+ unsigned char lsc_di, lsc_hi, flags;
16404+ aufs_bindex_t bindex;
16405+
16406+ /* output */
16407+ struct dentry *parent;
16408+ struct au_hinode *hdir;
16409+ struct vfsmount *h_mnt;
86dc4139
AM
16410+
16411+ /* temporary unlock/relock for copyup */
16412+ struct dentry *h_dentry, *h_parent;
16413+ struct au_branch *br;
16414+ struct task_struct *task;
4a4d8108 16415+};
1facf9fc 16416+
86dc4139 16417+void au_pin_hdir_unlock(struct au_pin *p);
c1595e42 16418+int au_pin_hdir_lock(struct au_pin *p);
86dc4139
AM
16419+int au_pin_hdir_relock(struct au_pin *p);
16420+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task);
16421+void au_pin_hdir_acquire_nest(struct au_pin *p);
16422+void au_pin_hdir_release(struct au_pin *p);
16423+
1308ab2a 16424+/* ---------------------------------------------------------------------- */
16425+
4a4d8108 16426+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 16427+{
4a4d8108 16428+ struct au_iinfo *iinfo;
1facf9fc 16429+
4a4d8108
AM
16430+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
16431+ if (iinfo->ii_hinode)
16432+ return iinfo;
16433+ return NULL; /* debugging bad_inode case */
16434+}
1facf9fc 16435+
4a4d8108 16436+/* ---------------------------------------------------------------------- */
1facf9fc 16437+
4a4d8108
AM
16438+/* inode.c */
16439+struct inode *au_igrab(struct inode *inode);
ab036dbd 16440+void au_refresh_iop(struct inode *inode, int force_getattr);
027c5e7a 16441+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
16442+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16443+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16444+ unsigned int d_type, ino_t *ino);
16445+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16446+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16447+ struct inode *inode);
16448+int au_test_h_perm(struct inode *h_inode, int mask);
16449+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 16450+
4a4d8108
AM
16451+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16452+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16453+{
16454+#ifdef CONFIG_AUFS_SHWH
16455+ return au_ino(sb, bindex, h_ino, d_type, ino);
16456+#else
16457+ return 0;
16458+#endif
16459+}
1facf9fc 16460+
4a4d8108 16461+/* i_op.c */
ab036dbd
AM
16462+enum {
16463+ AuIop_SYMLINK,
16464+ AuIop_DIR,
16465+ AuIop_OTHER,
16466+ AuIop_Last
16467+};
16468+extern struct inode_operations aufs_iop[AuIop_Last],
16469+ aufs_iop_nogetattr[AuIop_Last];
1308ab2a 16470+
4a4d8108
AM
16471+/* au_wr_dir flags */
16472+#define AuWrDir_ADD_ENTRY 1
7e9cd9fe
AM
16473+#define AuWrDir_ISDIR (1 << 1)
16474+#define AuWrDir_TMPFILE (1 << 2)
4a4d8108 16475+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
16476+#define au_fset_wrdir(flags, name) \
16477+ do { (flags) |= AuWrDir_##name; } while (0)
16478+#define au_fclr_wrdir(flags, name) \
16479+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 16480+
4a4d8108
AM
16481+struct au_wr_dir_args {
16482+ aufs_bindex_t force_btgt;
16483+ unsigned char flags;
16484+};
16485+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16486+ struct au_wr_dir_args *args);
dece6358 16487+
4a4d8108
AM
16488+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16489+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16490+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16491+ unsigned int udba, unsigned char flags);
16492+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16493+ unsigned int udba, unsigned char flags) __must_check;
16494+int au_do_pin(struct au_pin *pin) __must_check;
16495+void au_unpin(struct au_pin *pin);
c1595e42
JR
16496+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16497+
16498+#define AuIcpup_DID_CPUP 1
16499+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16500+#define au_fset_icpup(flags, name) \
16501+ do { (flags) |= AuIcpup_##name; } while (0)
16502+#define au_fclr_icpup(flags, name) \
16503+ do { (flags) &= ~AuIcpup_##name; } while (0)
16504+
16505+struct au_icpup_args {
16506+ unsigned char flags;
16507+ unsigned char pin_flags;
16508+ aufs_bindex_t btgt;
16509+ unsigned int udba;
16510+ struct au_pin pin;
16511+ struct path h_path;
16512+ struct inode *h_inode;
16513+};
16514+
16515+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16516+ struct au_icpup_args *a);
16517+
16518+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
1facf9fc 16519+
4a4d8108
AM
16520+/* i_op_add.c */
16521+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16522+ struct dentry *h_parent, int isdir);
7eafdf33
AM
16523+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16524+ dev_t dev);
4a4d8108 16525+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
7eafdf33 16526+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 16527+ bool want_excl);
b912730e
AM
16528+struct vfsub_aopen_args;
16529+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16530+ struct vfsub_aopen_args *args);
38d290e6 16531+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
4a4d8108
AM
16532+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16533+ struct dentry *dentry);
7eafdf33 16534+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
1facf9fc 16535+
4a4d8108
AM
16536+/* i_op_del.c */
16537+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16538+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16539+ struct dentry *h_parent, int isdir);
16540+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16541+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 16542+
4a4d8108
AM
16543+/* i_op_ren.c */
16544+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16545+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
16546+ struct inode *dir, struct dentry *dentry);
1facf9fc 16547+
4a4d8108
AM
16548+/* iinfo.c */
16549+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16550+void au_hiput(struct au_hinode *hinode);
16551+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16552+ struct dentry *h_wh);
16553+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 16554+
4a4d8108
AM
16555+/* hinode flags */
16556+#define AuHi_XINO 1
16557+#define AuHi_HNOTIFY (1 << 1)
16558+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
16559+#define au_fset_hi(flags, name) \
16560+ do { (flags) |= AuHi_##name; } while (0)
16561+#define au_fclr_hi(flags, name) \
16562+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 16563+
4a4d8108
AM
16564+#ifndef CONFIG_AUFS_HNOTIFY
16565+#undef AuHi_HNOTIFY
16566+#define AuHi_HNOTIFY 0
16567+#endif
1facf9fc 16568+
4a4d8108
AM
16569+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16570+ struct inode *h_inode, unsigned int flags);
1facf9fc 16571+
537831f9 16572+void au_update_iigen(struct inode *inode, int half);
4a4d8108 16573+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 16574+
4a4d8108
AM
16575+void au_icntnr_init_once(void *_c);
16576+int au_iinfo_init(struct inode *inode);
16577+void au_iinfo_fin(struct inode *inode);
16578+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 16579+
e49829fe 16580+#ifdef CONFIG_PROC_FS
4a4d8108 16581+/* plink.c */
e49829fe 16582+int au_plink_maint(struct super_block *sb, int flags);
7e9cd9fe 16583+struct au_sbinfo;
e49829fe
JR
16584+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16585+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
16586+#ifdef CONFIG_AUFS_DEBUG
16587+void au_plink_list(struct super_block *sb);
16588+#else
16589+AuStubVoid(au_plink_list, struct super_block *sb)
16590+#endif
16591+int au_plink_test(struct inode *inode);
16592+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16593+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16594+ struct dentry *h_dentry);
e49829fe
JR
16595+void au_plink_put(struct super_block *sb, int verbose);
16596+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 16597+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
16598+#else
16599+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16600+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16601+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16602+AuStubVoid(au_plink_list, struct super_block *sb);
16603+AuStubInt0(au_plink_test, struct inode *inode);
16604+AuStub(struct dentry *, au_plink_lkup, return NULL,
16605+ struct inode *inode, aufs_bindex_t bindex);
16606+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16607+ struct dentry *h_dentry);
16608+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16609+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16610+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16611+#endif /* CONFIG_PROC_FS */
1facf9fc 16612+
c1595e42
JR
16613+#ifdef CONFIG_AUFS_XATTR
16614+/* xattr.c */
7e9cd9fe
AM
16615+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16616+ unsigned int verbose);
c1595e42
JR
16617+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
16618+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
16619+ size_t size);
16620+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
16621+ size_t size, int flags);
16622+int aufs_removexattr(struct dentry *dentry, const char *name);
16623+
16624+/* void au_xattr_init(struct super_block *sb); */
16625+#else
16626+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe 16627+ int ignore_flags, unsigned int verbose);
c1595e42
JR
16628+/* AuStubVoid(au_xattr_init, struct super_block *sb); */
16629+#endif
16630+
16631+#ifdef CONFIG_FS_POSIX_ACL
16632+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16633+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16634+#endif
16635+
16636+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16637+enum {
16638+ AU_XATTR_SET,
16639+ AU_XATTR_REMOVE,
16640+ AU_ACL_SET
16641+};
16642+
16643+struct au_srxattr {
16644+ int type;
16645+ union {
16646+ struct {
16647+ const char *name;
16648+ const void *value;
16649+ size_t size;
16650+ int flags;
16651+ } set;
16652+ struct {
16653+ const char *name;
16654+ } remove;
16655+ struct {
16656+ struct posix_acl *acl;
16657+ int type;
16658+ } acl_set;
16659+ } u;
16660+};
16661+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg);
16662+#endif
16663+
4a4d8108 16664+/* ---------------------------------------------------------------------- */
1308ab2a 16665+
4a4d8108
AM
16666+/* lock subclass for iinfo */
16667+enum {
16668+ AuLsc_II_CHILD, /* child first */
16669+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
16670+ AuLsc_II_CHILD3, /* copyup dirs */
16671+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
16672+ AuLsc_II_PARENT2,
16673+ AuLsc_II_PARENT3, /* copyup dirs */
16674+ AuLsc_II_NEW_CHILD
16675+};
1308ab2a 16676+
1facf9fc 16677+/*
4a4d8108
AM
16678+ * ii_read_lock_child, ii_write_lock_child,
16679+ * ii_read_lock_child2, ii_write_lock_child2,
16680+ * ii_read_lock_child3, ii_write_lock_child3,
16681+ * ii_read_lock_parent, ii_write_lock_parent,
16682+ * ii_read_lock_parent2, ii_write_lock_parent2,
16683+ * ii_read_lock_parent3, ii_write_lock_parent3,
16684+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 16685+ */
4a4d8108
AM
16686+#define AuReadLockFunc(name, lsc) \
16687+static inline void ii_read_lock_##name(struct inode *i) \
16688+{ \
16689+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16690+}
16691+
16692+#define AuWriteLockFunc(name, lsc) \
16693+static inline void ii_write_lock_##name(struct inode *i) \
16694+{ \
16695+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16696+}
16697+
16698+#define AuRWLockFuncs(name, lsc) \
16699+ AuReadLockFunc(name, lsc) \
16700+ AuWriteLockFunc(name, lsc)
16701+
16702+AuRWLockFuncs(child, CHILD);
16703+AuRWLockFuncs(child2, CHILD2);
16704+AuRWLockFuncs(child3, CHILD3);
16705+AuRWLockFuncs(parent, PARENT);
16706+AuRWLockFuncs(parent2, PARENT2);
16707+AuRWLockFuncs(parent3, PARENT3);
16708+AuRWLockFuncs(new_child, NEW_CHILD);
16709+
16710+#undef AuReadLockFunc
16711+#undef AuWriteLockFunc
16712+#undef AuRWLockFuncs
1facf9fc 16713+
16714+/*
4a4d8108 16715+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 16716+ */
4a4d8108 16717+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 16718+
4a4d8108
AM
16719+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
16720+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
16721+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 16722+
4a4d8108 16723+/* ---------------------------------------------------------------------- */
1308ab2a 16724+
027c5e7a
AM
16725+static inline void au_icntnr_init(struct au_icntnr *c)
16726+{
16727+#ifdef CONFIG_AUFS_DEBUG
16728+ c->vfs_inode.i_mode = 0;
16729+#endif
16730+}
16731+
537831f9 16732+static inline unsigned int au_iigen(struct inode *inode, struct au_iigen *iigen)
4a4d8108 16733+{
537831f9
AM
16734+ unsigned int gen;
16735+ struct au_iinfo *iinfo;
16736+
16737+ iinfo = au_ii(inode);
16738+ spin_lock(&iinfo->ii_genspin);
16739+ if (iigen)
16740+ *iigen = iinfo->ii_generation;
16741+ gen = iinfo->ii_generation.ig_generation;
16742+ spin_unlock(&iinfo->ii_genspin);
16743+
16744+ return gen;
4a4d8108 16745+}
1308ab2a 16746+
4a4d8108
AM
16747+/* tiny test for inode number */
16748+/* tmpfs generation is too rough */
16749+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
16750+{
16751+ struct au_iinfo *iinfo;
1308ab2a 16752+
4a4d8108
AM
16753+ iinfo = au_ii(inode);
16754+ AuRwMustAnyLock(&iinfo->ii_rwsem);
16755+ return !(iinfo->ii_hsb1 == h_inode->i_sb
16756+ && iinfo->ii_higen == h_inode->i_generation);
16757+}
1308ab2a 16758+
4a4d8108
AM
16759+static inline void au_iigen_dec(struct inode *inode)
16760+{
537831f9
AM
16761+ struct au_iinfo *iinfo;
16762+
16763+ iinfo = au_ii(inode);
16764+ spin_lock(&iinfo->ii_genspin);
16765+ iinfo->ii_generation.ig_generation--;
16766+ spin_unlock(&iinfo->ii_genspin);
027c5e7a
AM
16767+}
16768+
16769+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
16770+{
16771+ int err;
16772+
16773+ err = 0;
537831f9 16774+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
027c5e7a
AM
16775+ err = -EIO;
16776+
16777+ return err;
4a4d8108 16778+}
1308ab2a 16779+
4a4d8108 16780+/* ---------------------------------------------------------------------- */
1308ab2a 16781+
4a4d8108
AM
16782+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
16783+ aufs_bindex_t bindex)
16784+{
16785+ IiMustAnyLock(inode);
16786+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
16787+}
1308ab2a 16788+
4a4d8108
AM
16789+static inline aufs_bindex_t au_ibstart(struct inode *inode)
16790+{
16791+ IiMustAnyLock(inode);
16792+ return au_ii(inode)->ii_bstart;
16793+}
1308ab2a 16794+
4a4d8108
AM
16795+static inline aufs_bindex_t au_ibend(struct inode *inode)
16796+{
16797+ IiMustAnyLock(inode);
16798+ return au_ii(inode)->ii_bend;
16799+}
1308ab2a 16800+
4a4d8108
AM
16801+static inline struct au_vdir *au_ivdir(struct inode *inode)
16802+{
16803+ IiMustAnyLock(inode);
16804+ return au_ii(inode)->ii_vdir;
16805+}
1308ab2a 16806+
4a4d8108
AM
16807+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
16808+{
16809+ IiMustAnyLock(inode);
16810+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
16811+}
1308ab2a 16812+
4a4d8108 16813+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16814+{
4a4d8108
AM
16815+ IiMustWriteLock(inode);
16816+ au_ii(inode)->ii_bstart = bindex;
16817+}
1308ab2a 16818+
4a4d8108
AM
16819+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
16820+{
16821+ IiMustWriteLock(inode);
16822+ au_ii(inode)->ii_bend = bindex;
1308ab2a 16823+}
16824+
4a4d8108
AM
16825+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
16826+{
16827+ IiMustWriteLock(inode);
16828+ au_ii(inode)->ii_vdir = vdir;
16829+}
1facf9fc 16830+
4a4d8108 16831+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16832+{
4a4d8108
AM
16833+ IiMustAnyLock(inode);
16834+ return au_ii(inode)->ii_hinode + bindex;
16835+}
dece6358 16836+
4a4d8108 16837+/* ---------------------------------------------------------------------- */
1facf9fc 16838+
4a4d8108
AM
16839+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
16840+{
16841+ if (pin)
16842+ return pin->parent;
16843+ return NULL;
1facf9fc 16844+}
16845+
4a4d8108 16846+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 16847+{
4a4d8108
AM
16848+ if (pin && pin->hdir)
16849+ return pin->hdir->hi_inode;
16850+ return NULL;
1308ab2a 16851+}
1facf9fc 16852+
4a4d8108
AM
16853+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
16854+{
16855+ if (pin)
16856+ return pin->hdir;
16857+ return NULL;
16858+}
1facf9fc 16859+
4a4d8108 16860+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 16861+{
4a4d8108
AM
16862+ if (pin)
16863+ pin->dentry = dentry;
16864+}
1308ab2a 16865+
4a4d8108
AM
16866+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
16867+ unsigned char lflag)
16868+{
16869+ if (pin) {
7f207e10 16870+ if (lflag)
4a4d8108 16871+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 16872+ else
4a4d8108 16873+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 16874+ }
4a4d8108
AM
16875+}
16876+
7e9cd9fe 16877+#if 0 /* reserved */
4a4d8108
AM
16878+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
16879+{
16880+ if (pin) {
16881+ dput(pin->parent);
16882+ pin->parent = dget(parent);
1facf9fc 16883+ }
4a4d8108 16884+}
7e9cd9fe 16885+#endif
1facf9fc 16886+
4a4d8108
AM
16887+/* ---------------------------------------------------------------------- */
16888+
027c5e7a 16889+struct au_branch;
4a4d8108
AM
16890+#ifdef CONFIG_AUFS_HNOTIFY
16891+struct au_hnotify_op {
16892+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a 16893+ int (*alloc)(struct au_hinode *hinode);
7eafdf33
AM
16894+
16895+ /*
16896+ * if it returns true, the the caller should free hinode->hi_notify,
16897+ * otherwise ->free() frees it.
16898+ */
16899+ int (*free)(struct au_hinode *hinode,
16900+ struct au_hnotify *hn) __must_check;
4a4d8108
AM
16901+
16902+ void (*fin)(void);
16903+ int (*init)(void);
027c5e7a
AM
16904+
16905+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
16906+ void (*fin_br)(struct au_branch *br);
16907+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
16908+};
16909+
16910+/* hnotify.c */
027c5e7a 16911+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
16912+void au_hn_free(struct au_hinode *hinode);
16913+void au_hn_ctl(struct au_hinode *hinode, int do_set);
16914+void au_hn_reset(struct inode *inode, unsigned int flags);
16915+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
16916+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
16917+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
16918+int au_hnotify_init_br(struct au_branch *br, int perm);
16919+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
16920+int __init au_hnotify_init(void);
16921+void au_hnotify_fin(void);
16922+
7f207e10 16923+/* hfsnotify.c */
4a4d8108
AM
16924+extern const struct au_hnotify_op au_hnotify_op;
16925+
16926+static inline
16927+void au_hn_init(struct au_hinode *hinode)
16928+{
16929+ hinode->hi_notify = NULL;
1308ab2a 16930+}
16931+
53392da6
AM
16932+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
16933+{
16934+ return hinode->hi_notify;
16935+}
16936+
4a4d8108 16937+#else
c1595e42
JR
16938+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
16939+ struct au_hinode *hinode __maybe_unused,
16940+ struct inode *inode __maybe_unused)
16941+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
4a4d8108
AM
16942+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
16943+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
16944+ int do_set __maybe_unused)
16945+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
16946+ unsigned int flags __maybe_unused)
027c5e7a
AM
16947+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
16948+ struct au_branch *br __maybe_unused,
16949+ int perm __maybe_unused)
16950+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
16951+ int perm __maybe_unused)
16952+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
16953+AuStubInt0(__init au_hnotify_init, void)
16954+AuStubVoid(au_hnotify_fin, void)
16955+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
16956+#endif /* CONFIG_AUFS_HNOTIFY */
16957+
16958+static inline void au_hn_suspend(struct au_hinode *hdir)
16959+{
16960+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 16961+}
16962+
4a4d8108 16963+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 16964+{
4a4d8108
AM
16965+ au_hn_ctl(hdir, /*do_set*/1);
16966+}
1308ab2a 16967+
4a4d8108
AM
16968+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
16969+{
16970+ mutex_lock(&hdir->hi_inode->i_mutex);
16971+ au_hn_suspend(hdir);
16972+}
dece6358 16973+
4a4d8108
AM
16974+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
16975+ unsigned int sc __maybe_unused)
16976+{
16977+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
16978+ au_hn_suspend(hdir);
1facf9fc 16979+}
1facf9fc 16980+
4a4d8108
AM
16981+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
16982+{
16983+ au_hn_resume(hdir);
16984+ mutex_unlock(&hdir->hi_inode->i_mutex);
16985+}
16986+
16987+#endif /* __KERNEL__ */
16988+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
16989diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
16990--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 16991+++ linux/fs/aufs/ioctl.c 2015-09-24 10:47:58.254719746 +0200
c1595e42 16992@@ -0,0 +1,219 @@
4a4d8108 16993+/*
2000de60 16994+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
16995+ *
16996+ * This program, aufs is free software; you can redistribute it and/or modify
16997+ * it under the terms of the GNU General Public License as published by
16998+ * the Free Software Foundation; either version 2 of the License, or
16999+ * (at your option) any later version.
17000+ *
17001+ * This program is distributed in the hope that it will be useful,
17002+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17003+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17004+ * GNU General Public License for more details.
17005+ *
17006+ * You should have received a copy of the GNU General Public License
523b37e3 17007+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17008+ */
17009+
17010+/*
17011+ * ioctl
17012+ * plink-management and readdir in userspace.
17013+ * assist the pathconf(3) wrapper library.
c2b27bf2 17014+ * move-down
076b876e 17015+ * File-based Hierarchical Storage Management.
4a4d8108
AM
17016+ */
17017+
c2b27bf2
AM
17018+#include <linux/compat.h>
17019+#include <linux/file.h>
4a4d8108
AM
17020+#include "aufs.h"
17021+
1e00d052 17022+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
17023+{
17024+ int err, fd;
17025+ aufs_bindex_t wbi, bindex, bend;
17026+ struct file *h_file;
17027+ struct super_block *sb;
17028+ struct dentry *root;
1e00d052
AM
17029+ struct au_branch *br;
17030+ struct aufs_wbr_fd wbrfd = {
17031+ .oflags = au_dir_roflags,
17032+ .brid = -1
17033+ };
17034+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17035+ | O_NOATIME | O_CLOEXEC;
4a4d8108 17036+
1e00d052
AM
17037+ AuDebugOn(wbrfd.oflags & ~valid);
17038+
17039+ if (arg) {
17040+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17041+ if (unlikely(err)) {
17042+ err = -EFAULT;
17043+ goto out;
17044+ }
17045+
17046+ err = -EINVAL;
17047+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17048+ wbrfd.oflags |= au_dir_roflags;
17049+ AuDbg("0%o\n", wbrfd.oflags);
17050+ if (unlikely(wbrfd.oflags & ~valid))
17051+ goto out;
17052+ }
17053+
2000de60 17054+ fd = get_unused_fd_flags(0);
1e00d052
AM
17055+ err = fd;
17056+ if (unlikely(fd < 0))
4a4d8108 17057+ goto out;
4a4d8108 17058+
1e00d052 17059+ h_file = ERR_PTR(-EINVAL);
4a4d8108 17060+ wbi = 0;
1e00d052 17061+ br = NULL;
4a4d8108
AM
17062+ sb = path->dentry->d_sb;
17063+ root = sb->s_root;
17064+ aufs_read_lock(root, AuLock_IR);
1e00d052
AM
17065+ bend = au_sbend(sb);
17066+ if (wbrfd.brid >= 0) {
17067+ wbi = au_br_index(sb, wbrfd.brid);
17068+ if (unlikely(wbi < 0 || wbi > bend))
17069+ goto out_unlock;
17070+ }
17071+
17072+ h_file = ERR_PTR(-ENOENT);
17073+ br = au_sbr(sb, wbi);
17074+ if (!au_br_writable(br->br_perm)) {
17075+ if (arg)
17076+ goto out_unlock;
17077+
17078+ bindex = wbi + 1;
17079+ wbi = -1;
17080+ for (; bindex <= bend; bindex++) {
17081+ br = au_sbr(sb, bindex);
17082+ if (au_br_writable(br->br_perm)) {
4a4d8108 17083+ wbi = bindex;
1e00d052 17084+ br = au_sbr(sb, wbi);
4a4d8108
AM
17085+ break;
17086+ }
17087+ }
4a4d8108
AM
17088+ }
17089+ AuDbg("wbi %d\n", wbi);
1e00d052 17090+ if (wbi >= 0)
392086de
AM
17091+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17092+ /*force_wr*/0);
1e00d052
AM
17093+
17094+out_unlock:
4a4d8108
AM
17095+ aufs_read_unlock(root, AuLock_IR);
17096+ err = PTR_ERR(h_file);
17097+ if (IS_ERR(h_file))
17098+ goto out_fd;
17099+
1e00d052 17100+ atomic_dec(&br->br_count); /* cf. au_h_open() */
4a4d8108
AM
17101+ fd_install(fd, h_file);
17102+ err = fd;
17103+ goto out; /* success */
17104+
4f0767ce 17105+out_fd:
4a4d8108 17106+ put_unused_fd(fd);
4f0767ce 17107+out:
1e00d052 17108+ AuTraceErr(err);
4a4d8108
AM
17109+ return err;
17110+}
17111+
17112+/* ---------------------------------------------------------------------- */
17113+
17114+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17115+{
17116+ long err;
c1595e42 17117+ struct dentry *dentry;
4a4d8108
AM
17118+
17119+ switch (cmd) {
4a4d8108
AM
17120+ case AUFS_CTL_RDU:
17121+ case AUFS_CTL_RDU_INO:
17122+ err = au_rdu_ioctl(file, cmd, arg);
17123+ break;
17124+
17125+ case AUFS_CTL_WBR_FD:
1e00d052 17126+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17127+ break;
17128+
027c5e7a
AM
17129+ case AUFS_CTL_IBUSY:
17130+ err = au_ibusy_ioctl(file, arg);
17131+ break;
17132+
076b876e
AM
17133+ case AUFS_CTL_BRINFO:
17134+ err = au_brinfo_ioctl(file, arg);
17135+ break;
17136+
17137+ case AUFS_CTL_FHSM_FD:
2000de60 17138+ dentry = file->f_path.dentry;
c1595e42
JR
17139+ if (IS_ROOT(dentry))
17140+ err = au_fhsm_fd(dentry->d_sb, arg);
17141+ else
17142+ err = -ENOTTY;
076b876e
AM
17143+ break;
17144+
4a4d8108
AM
17145+ default:
17146+ /* do not call the lower */
17147+ AuDbg("0x%x\n", cmd);
17148+ err = -ENOTTY;
17149+ }
17150+
17151+ AuTraceErr(err);
17152+ return err;
17153+}
17154+
17155+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17156+{
17157+ long err;
17158+
17159+ switch (cmd) {
c2b27bf2 17160+ case AUFS_CTL_MVDOWN:
2000de60 17161+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
c2b27bf2
AM
17162+ break;
17163+
4a4d8108 17164+ case AUFS_CTL_WBR_FD:
1e00d052 17165+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17166+ break;
17167+
17168+ default:
17169+ /* do not call the lower */
17170+ AuDbg("0x%x\n", cmd);
17171+ err = -ENOTTY;
17172+ }
17173+
17174+ AuTraceErr(err);
17175+ return err;
17176+}
b752ccd1
AM
17177+
17178+#ifdef CONFIG_COMPAT
17179+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17180+ unsigned long arg)
17181+{
17182+ long err;
17183+
17184+ switch (cmd) {
17185+ case AUFS_CTL_RDU:
17186+ case AUFS_CTL_RDU_INO:
17187+ err = au_rdu_compat_ioctl(file, cmd, arg);
17188+ break;
17189+
027c5e7a
AM
17190+ case AUFS_CTL_IBUSY:
17191+ err = au_ibusy_compat_ioctl(file, arg);
17192+ break;
17193+
076b876e
AM
17194+ case AUFS_CTL_BRINFO:
17195+ err = au_brinfo_compat_ioctl(file, arg);
17196+ break;
17197+
b752ccd1
AM
17198+ default:
17199+ err = aufs_ioctl_dir(file, cmd, arg);
17200+ }
17201+
17202+ AuTraceErr(err);
17203+ return err;
17204+}
17205+
b752ccd1
AM
17206+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17207+ unsigned long arg)
17208+{
17209+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17210+}
17211+#endif
7f207e10
AM
17212diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17213--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 17214+++ linux/fs/aufs/i_op_add.c 2015-09-24 10:47:58.254719746 +0200
5527c038 17215@@ -0,0 +1,932 @@
4a4d8108 17216+/*
2000de60 17217+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
17218+ *
17219+ * This program, aufs is free software; you can redistribute it and/or modify
17220+ * it under the terms of the GNU General Public License as published by
17221+ * the Free Software Foundation; either version 2 of the License, or
17222+ * (at your option) any later version.
17223+ *
17224+ * This program is distributed in the hope that it will be useful,
17225+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17226+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17227+ * GNU General Public License for more details.
17228+ *
17229+ * You should have received a copy of the GNU General Public License
523b37e3 17230+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17231+ */
17232+
17233+/*
17234+ * inode operations (add entry)
17235+ */
17236+
17237+#include "aufs.h"
17238+
17239+/*
17240+ * final procedure of adding a new entry, except link(2).
17241+ * remove whiteout, instantiate, copyup the parent dir's times and size
17242+ * and update version.
17243+ * if it failed, re-create the removed whiteout.
17244+ */
17245+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17246+ struct dentry *wh_dentry, struct dentry *dentry)
17247+{
17248+ int err, rerr;
17249+ aufs_bindex_t bwh;
17250+ struct path h_path;
076b876e 17251+ struct super_block *sb;
4a4d8108
AM
17252+ struct inode *inode, *h_dir;
17253+ struct dentry *wh;
17254+
17255+ bwh = -1;
076b876e 17256+ sb = dir->i_sb;
4a4d8108 17257+ if (wh_dentry) {
5527c038 17258+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
4a4d8108
AM
17259+ IMustLock(h_dir);
17260+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17261+ bwh = au_dbwh(dentry);
17262+ h_path.dentry = wh_dentry;
076b876e 17263+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
17264+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17265+ dentry);
17266+ if (unlikely(err))
17267+ goto out;
17268+ }
17269+
17270+ inode = au_new_inode(dentry, /*must_new*/1);
17271+ if (!IS_ERR(inode)) {
17272+ d_instantiate(dentry, inode);
5527c038 17273+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
4a4d8108 17274+ IMustLock(dir);
b912730e 17275+ au_dir_ts(dir, bindex);
4a4d8108 17276+ dir->i_version++;
076b876e 17277+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
17278+ return 0; /* success */
17279+ }
17280+
17281+ err = PTR_ERR(inode);
17282+ if (!wh_dentry)
17283+ goto out;
17284+
17285+ /* revert */
17286+ /* dir inode is locked */
17287+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17288+ rerr = PTR_ERR(wh);
17289+ if (IS_ERR(wh)) {
523b37e3
AM
17290+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17291+ dentry, err, rerr);
4a4d8108
AM
17292+ err = -EIO;
17293+ } else
17294+ dput(wh);
17295+
4f0767ce 17296+out:
4a4d8108
AM
17297+ return err;
17298+}
17299+
027c5e7a
AM
17300+static int au_d_may_add(struct dentry *dentry)
17301+{
17302+ int err;
17303+
17304+ err = 0;
17305+ if (unlikely(d_unhashed(dentry)))
17306+ err = -ENOENT;
5527c038 17307+ if (unlikely(d_really_is_positive(dentry)))
027c5e7a
AM
17308+ err = -EEXIST;
17309+ return err;
17310+}
17311+
4a4d8108
AM
17312+/*
17313+ * simple tests for the adding inode operations.
17314+ * following the checks in vfs, plus the parent-child relationship.
17315+ */
17316+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17317+ struct dentry *h_parent, int isdir)
17318+{
17319+ int err;
17320+ umode_t h_mode;
17321+ struct dentry *h_dentry;
17322+ struct inode *h_inode;
17323+
17324+ err = -ENAMETOOLONG;
17325+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17326+ goto out;
17327+
17328+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 17329+ if (d_really_is_negative(dentry)) {
4a4d8108 17330+ err = -EEXIST;
5527c038 17331+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
17332+ goto out;
17333+ } else {
17334+ /* rename(2) case */
17335+ err = -EIO;
5527c038
JR
17336+ if (unlikely(d_is_negative(h_dentry)))
17337+ goto out;
17338+ h_inode = d_inode(h_dentry);
17339+ if (unlikely(!h_inode->i_nlink))
4a4d8108
AM
17340+ goto out;
17341+
17342+ h_mode = h_inode->i_mode;
17343+ if (!isdir) {
17344+ err = -EISDIR;
17345+ if (unlikely(S_ISDIR(h_mode)))
17346+ goto out;
17347+ } else if (unlikely(!S_ISDIR(h_mode))) {
17348+ err = -ENOTDIR;
17349+ goto out;
17350+ }
17351+ }
17352+
17353+ err = 0;
17354+ /* expected parent dir is locked */
17355+ if (unlikely(h_parent != h_dentry->d_parent))
17356+ err = -EIO;
17357+
4f0767ce 17358+out:
4a4d8108
AM
17359+ AuTraceErr(err);
17360+ return err;
17361+}
17362+
17363+/*
17364+ * initial procedure of adding a new entry.
17365+ * prepare writable branch and the parent dir, lock it,
17366+ * and lookup whiteout for the new entry.
17367+ */
17368+static struct dentry*
17369+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17370+ struct dentry *src_dentry, struct au_pin *pin,
17371+ struct au_wr_dir_args *wr_dir_args)
17372+{
17373+ struct dentry *wh_dentry, *h_parent;
17374+ struct super_block *sb;
17375+ struct au_branch *br;
17376+ int err;
17377+ unsigned int udba;
17378+ aufs_bindex_t bcpup;
17379+
523b37e3 17380+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17381+
17382+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17383+ bcpup = err;
17384+ wh_dentry = ERR_PTR(err);
17385+ if (unlikely(err < 0))
17386+ goto out;
17387+
17388+ sb = dentry->d_sb;
17389+ udba = au_opt_udba(sb);
17390+ err = au_pin(pin, dentry, bcpup, udba,
17391+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17392+ wh_dentry = ERR_PTR(err);
17393+ if (unlikely(err))
17394+ goto out;
17395+
17396+ h_parent = au_pinned_h_parent(pin);
17397+ if (udba != AuOpt_UDBA_NONE
17398+ && au_dbstart(dentry) == bcpup)
17399+ err = au_may_add(dentry, bcpup, h_parent,
17400+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17401+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17402+ err = -ENAMETOOLONG;
17403+ wh_dentry = ERR_PTR(err);
17404+ if (unlikely(err))
17405+ goto out_unpin;
17406+
17407+ br = au_sbr(sb, bcpup);
17408+ if (dt) {
17409+ struct path tmp = {
17410+ .dentry = h_parent,
86dc4139 17411+ .mnt = au_br_mnt(br)
4a4d8108
AM
17412+ };
17413+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17414+ }
17415+
17416+ wh_dentry = NULL;
17417+ if (bcpup != au_dbwh(dentry))
17418+ goto out; /* success */
17419+
2000de60
JR
17420+ /*
17421+ * ENAMETOOLONG here means that if we allowed create such name, then it
17422+ * would not be able to removed in the future. So we don't allow such
17423+ * name here and we don't handle ENAMETOOLONG differently here.
17424+ */
4a4d8108
AM
17425+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17426+
4f0767ce 17427+out_unpin:
4a4d8108
AM
17428+ if (IS_ERR(wh_dentry))
17429+ au_unpin(pin);
4f0767ce 17430+out:
4a4d8108
AM
17431+ return wh_dentry;
17432+}
17433+
17434+/* ---------------------------------------------------------------------- */
17435+
17436+enum { Mknod, Symlink, Creat };
17437+struct simple_arg {
17438+ int type;
17439+ union {
17440+ struct {
b912730e
AM
17441+ umode_t mode;
17442+ bool want_excl;
17443+ bool try_aopen;
17444+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
17445+ } c;
17446+ struct {
17447+ const char *symname;
17448+ } s;
17449+ struct {
7eafdf33 17450+ umode_t mode;
4a4d8108
AM
17451+ dev_t dev;
17452+ } m;
17453+ } u;
17454+};
17455+
17456+static int add_simple(struct inode *dir, struct dentry *dentry,
17457+ struct simple_arg *arg)
17458+{
076b876e 17459+ int err, rerr;
4a4d8108
AM
17460+ aufs_bindex_t bstart;
17461+ unsigned char created;
b912730e
AM
17462+ const unsigned char try_aopen
17463+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
17464+ struct dentry *wh_dentry, *parent;
17465+ struct inode *h_dir;
b912730e
AM
17466+ struct super_block *sb;
17467+ struct au_branch *br;
c2b27bf2
AM
17468+ /* to reuduce stack size */
17469+ struct {
17470+ struct au_dtime dt;
17471+ struct au_pin pin;
17472+ struct path h_path;
17473+ struct au_wr_dir_args wr_dir_args;
17474+ } *a;
4a4d8108 17475+
523b37e3 17476+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17477+ IMustLock(dir);
17478+
c2b27bf2
AM
17479+ err = -ENOMEM;
17480+ a = kmalloc(sizeof(*a), GFP_NOFS);
17481+ if (unlikely(!a))
17482+ goto out;
17483+ a->wr_dir_args.force_btgt = -1;
17484+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17485+
4a4d8108 17486+ parent = dentry->d_parent; /* dir inode is locked */
b912730e
AM
17487+ if (!try_aopen) {
17488+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17489+ if (unlikely(err))
17490+ goto out_free;
17491+ }
027c5e7a
AM
17492+ err = au_d_may_add(dentry);
17493+ if (unlikely(err))
17494+ goto out_unlock;
b912730e
AM
17495+ if (!try_aopen)
17496+ di_write_lock_parent(parent);
c2b27bf2
AM
17497+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17498+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
17499+ err = PTR_ERR(wh_dentry);
17500+ if (IS_ERR(wh_dentry))
027c5e7a 17501+ goto out_parent;
4a4d8108
AM
17502+
17503+ bstart = au_dbstart(dentry);
b912730e
AM
17504+ sb = dentry->d_sb;
17505+ br = au_sbr(sb, bstart);
c2b27bf2 17506+ a->h_path.dentry = au_h_dptr(dentry, bstart);
b912730e 17507+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 17508+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
17509+ switch (arg->type) {
17510+ case Creat:
b912730e
AM
17511+ err = 0;
17512+ if (!try_aopen || !h_dir->i_op->atomic_open)
17513+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17514+ arg->u.c.want_excl);
17515+ else
17516+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17517+ arg->u.c.aopen, br);
4a4d8108
AM
17518+ break;
17519+ case Symlink:
c2b27bf2 17520+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
17521+ break;
17522+ case Mknod:
c2b27bf2
AM
17523+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17524+ arg->u.m.dev);
4a4d8108
AM
17525+ break;
17526+ default:
17527+ BUG();
17528+ }
17529+ created = !err;
17530+ if (!err)
17531+ err = epilog(dir, bstart, wh_dentry, dentry);
17532+
17533+ /* revert */
5527c038 17534+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
523b37e3
AM
17535+ /* no delegation since it is just created */
17536+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17537+ /*force*/0);
4a4d8108 17538+ if (rerr) {
523b37e3
AM
17539+ AuIOErr("%pd revert failure(%d, %d)\n",
17540+ dentry, err, rerr);
4a4d8108
AM
17541+ err = -EIO;
17542+ }
c2b27bf2 17543+ au_dtime_revert(&a->dt);
4a4d8108
AM
17544+ }
17545+
b912730e
AM
17546+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17547+ *arg->u.c.aopen->opened |= FILE_CREATED;
17548+
c2b27bf2 17549+ au_unpin(&a->pin);
4a4d8108
AM
17550+ dput(wh_dentry);
17551+
027c5e7a 17552+out_parent:
b912730e
AM
17553+ if (!try_aopen)
17554+ di_write_unlock(parent);
027c5e7a 17555+out_unlock:
4a4d8108
AM
17556+ if (unlikely(err)) {
17557+ au_update_dbstart(dentry);
17558+ d_drop(dentry);
17559+ }
b912730e
AM
17560+ if (!try_aopen)
17561+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
17562+out_free:
17563+ kfree(a);
027c5e7a 17564+out:
4a4d8108
AM
17565+ return err;
17566+}
17567+
7eafdf33
AM
17568+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17569+ dev_t dev)
4a4d8108
AM
17570+{
17571+ struct simple_arg arg = {
17572+ .type = Mknod,
17573+ .u.m = {
17574+ .mode = mode,
17575+ .dev = dev
17576+ }
17577+ };
17578+ return add_simple(dir, dentry, &arg);
17579+}
17580+
17581+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17582+{
17583+ struct simple_arg arg = {
17584+ .type = Symlink,
17585+ .u.s.symname = symname
17586+ };
17587+ return add_simple(dir, dentry, &arg);
17588+}
17589+
7eafdf33 17590+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 17591+ bool want_excl)
4a4d8108
AM
17592+{
17593+ struct simple_arg arg = {
17594+ .type = Creat,
17595+ .u.c = {
b4510431
AM
17596+ .mode = mode,
17597+ .want_excl = want_excl
4a4d8108
AM
17598+ }
17599+ };
17600+ return add_simple(dir, dentry, &arg);
17601+}
17602+
b912730e
AM
17603+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17604+ struct vfsub_aopen_args *aopen_args)
17605+{
17606+ struct simple_arg arg = {
17607+ .type = Creat,
17608+ .u.c = {
17609+ .mode = aopen_args->create_mode,
17610+ .want_excl = aopen_args->open_flag & O_EXCL,
17611+ .try_aopen = true,
17612+ .aopen = aopen_args
17613+ }
17614+ };
17615+ return add_simple(dir, dentry, &arg);
17616+}
17617+
38d290e6
JR
17618+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17619+{
17620+ int err;
17621+ aufs_bindex_t bindex;
17622+ struct super_block *sb;
17623+ struct dentry *parent, *h_parent, *h_dentry;
17624+ struct inode *h_dir, *inode;
17625+ struct vfsmount *h_mnt;
17626+ struct au_wr_dir_args wr_dir_args = {
17627+ .force_btgt = -1,
17628+ .flags = AuWrDir_TMPFILE
17629+ };
17630+
17631+ /* copy-up may happen */
17632+ mutex_lock(&dir->i_mutex);
17633+
17634+ sb = dir->i_sb;
17635+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17636+ if (unlikely(err))
17637+ goto out;
17638+
17639+ err = au_di_init(dentry);
17640+ if (unlikely(err))
17641+ goto out_si;
17642+
17643+ err = -EBUSY;
17644+ parent = d_find_any_alias(dir);
17645+ AuDebugOn(!parent);
17646+ di_write_lock_parent(parent);
5527c038 17647+ if (unlikely(d_inode(parent) != dir))
38d290e6
JR
17648+ goto out_parent;
17649+
17650+ err = au_digen_test(parent, au_sigen(sb));
17651+ if (unlikely(err))
17652+ goto out_parent;
17653+
17654+ bindex = au_dbstart(parent);
17655+ au_set_dbstart(dentry, bindex);
17656+ au_set_dbend(dentry, bindex);
17657+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17658+ bindex = err;
17659+ if (unlikely(err < 0))
17660+ goto out_parent;
17661+
17662+ err = -EOPNOTSUPP;
17663+ h_dir = au_h_iptr(dir, bindex);
17664+ if (unlikely(!h_dir->i_op->tmpfile))
17665+ goto out_parent;
17666+
17667+ h_mnt = au_sbr_mnt(sb, bindex);
17668+ err = vfsub_mnt_want_write(h_mnt);
17669+ if (unlikely(err))
17670+ goto out_parent;
17671+
17672+ h_parent = au_h_dptr(parent, bindex);
5527c038 17673+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
38d290e6
JR
17674+ if (unlikely(err))
17675+ goto out_mnt;
17676+
17677+ err = -ENOMEM;
17678+ h_dentry = d_alloc(h_parent, &dentry->d_name);
17679+ if (unlikely(!h_dentry))
17680+ goto out_mnt;
17681+
17682+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
17683+ if (unlikely(err))
17684+ goto out_dentry;
17685+
17686+ au_set_dbstart(dentry, bindex);
17687+ au_set_dbend(dentry, bindex);
17688+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
17689+ inode = au_new_inode(dentry, /*must_new*/1);
17690+ if (IS_ERR(inode)) {
17691+ err = PTR_ERR(inode);
17692+ au_set_h_dptr(dentry, bindex, NULL);
17693+ au_set_dbstart(dentry, -1);
17694+ au_set_dbend(dentry, -1);
17695+ } else {
17696+ if (!inode->i_nlink)
17697+ set_nlink(inode, 1);
17698+ d_tmpfile(dentry, inode);
17699+ au_di(dentry)->di_tmpfile = 1;
17700+
17701+ /* update without i_mutex */
17702+ if (au_ibstart(dir) == au_dbstart(dentry))
17703+ au_cpup_attr_timesizes(dir);
17704+ }
17705+
17706+out_dentry:
17707+ dput(h_dentry);
17708+out_mnt:
17709+ vfsub_mnt_drop_write(h_mnt);
17710+out_parent:
17711+ di_write_unlock(parent);
17712+ dput(parent);
17713+ di_write_unlock(dentry);
17714+ if (!err)
17715+#if 0
17716+ /* verbose coding for lock class name */
17717+ au_rw_class(&au_di(dentry)->di_rwsem,
17718+ au_lc_key + AuLcNonDir_DIINFO);
17719+#else
17720+ ;
17721+#endif
17722+ else {
17723+ au_di_fin(dentry);
17724+ dentry->d_fsdata = NULL;
17725+ }
17726+out_si:
17727+ si_read_unlock(sb);
17728+out:
17729+ mutex_unlock(&dir->i_mutex);
17730+ return err;
17731+}
17732+
4a4d8108
AM
17733+/* ---------------------------------------------------------------------- */
17734+
17735+struct au_link_args {
17736+ aufs_bindex_t bdst, bsrc;
17737+ struct au_pin pin;
17738+ struct path h_path;
17739+ struct dentry *src_parent, *parent;
17740+};
17741+
17742+static int au_cpup_before_link(struct dentry *src_dentry,
17743+ struct au_link_args *a)
17744+{
17745+ int err;
17746+ struct dentry *h_src_dentry;
c2b27bf2
AM
17747+ struct au_cp_generic cpg = {
17748+ .dentry = src_dentry,
17749+ .bdst = a->bdst,
17750+ .bsrc = a->bsrc,
17751+ .len = -1,
17752+ .pin = &a->pin,
17753+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
17754+ };
4a4d8108
AM
17755+
17756+ di_read_lock_parent(a->src_parent, AuLock_IR);
17757+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
17758+ if (unlikely(err))
17759+ goto out;
17760+
17761+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
17762+ err = au_pin(&a->pin, src_dentry, a->bdst,
17763+ au_opt_udba(src_dentry->d_sb),
17764+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17765+ if (unlikely(err))
17766+ goto out;
367653fa 17767+
c2b27bf2 17768+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
17769+ au_unpin(&a->pin);
17770+
4f0767ce 17771+out:
4a4d8108
AM
17772+ di_read_unlock(a->src_parent, AuLock_IR);
17773+ return err;
17774+}
17775+
86dc4139
AM
17776+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
17777+ struct au_link_args *a)
4a4d8108
AM
17778+{
17779+ int err;
17780+ unsigned char plink;
86dc4139 17781+ aufs_bindex_t bend;
4a4d8108 17782+ struct dentry *h_src_dentry;
523b37e3 17783+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
17784+ struct super_block *sb;
17785+ struct file *h_file;
17786+
17787+ plink = 0;
17788+ h_inode = NULL;
17789+ sb = src_dentry->d_sb;
5527c038 17790+ inode = d_inode(src_dentry);
4a4d8108
AM
17791+ if (au_ibstart(inode) <= a->bdst)
17792+ h_inode = au_h_iptr(inode, a->bdst);
17793+ if (!h_inode || !h_inode->i_nlink) {
17794+ /* copyup src_dentry as the name of dentry. */
86dc4139
AM
17795+ bend = au_dbend(dentry);
17796+ if (bend < a->bsrc)
17797+ au_set_dbend(dentry, a->bsrc);
17798+ au_set_h_dptr(dentry, a->bsrc,
17799+ dget(au_h_dptr(src_dentry, a->bsrc)));
17800+ dget(a->h_path.dentry);
17801+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
17802+ AuDbg("temporary d_inode...\n");
17803+ spin_lock(&dentry->d_lock);
5527c038 17804+ dentry->d_inode = d_inode(src_dentry); /* tmp */
c1595e42 17805+ spin_unlock(&dentry->d_lock);
392086de 17806+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 17807+ if (IS_ERR(h_file))
4a4d8108 17808+ err = PTR_ERR(h_file);
86dc4139 17809+ else {
c2b27bf2
AM
17810+ struct au_cp_generic cpg = {
17811+ .dentry = dentry,
17812+ .bdst = a->bdst,
17813+ .bsrc = -1,
17814+ .len = -1,
17815+ .pin = &a->pin,
17816+ .flags = AuCpup_KEEPLINO
17817+ };
17818+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
17819+ au_h_open_post(dentry, a->bsrc, h_file);
17820+ if (!err) {
17821+ dput(a->h_path.dentry);
17822+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
17823+ } else
17824+ au_set_h_dptr(dentry, a->bdst,
17825+ a->h_path.dentry);
17826+ }
c1595e42 17827+ spin_lock(&dentry->d_lock);
86dc4139 17828+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
17829+ spin_unlock(&dentry->d_lock);
17830+ AuDbg("temporary d_inode...done\n");
86dc4139
AM
17831+ au_set_h_dptr(dentry, a->bsrc, NULL);
17832+ au_set_dbend(dentry, bend);
4a4d8108
AM
17833+ } else {
17834+ /* the inode of src_dentry already exists on a.bdst branch */
17835+ h_src_dentry = d_find_alias(h_inode);
17836+ if (!h_src_dentry && au_plink_test(inode)) {
17837+ plink = 1;
17838+ h_src_dentry = au_plink_lkup(inode, a->bdst);
17839+ err = PTR_ERR(h_src_dentry);
17840+ if (IS_ERR(h_src_dentry))
17841+ goto out;
17842+
5527c038 17843+ if (unlikely(d_is_negative(h_src_dentry))) {
4a4d8108
AM
17844+ dput(h_src_dentry);
17845+ h_src_dentry = NULL;
17846+ }
17847+
17848+ }
17849+ if (h_src_dentry) {
523b37e3 17850+ delegated = NULL;
4a4d8108 17851+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
17852+ &a->h_path, &delegated);
17853+ if (unlikely(err == -EWOULDBLOCK)) {
17854+ pr_warn("cannot retry for NFSv4 delegation"
17855+ " for an internal link\n");
17856+ iput(delegated);
17857+ }
4a4d8108
AM
17858+ dput(h_src_dentry);
17859+ } else {
17860+ AuIOErr("no dentry found for hi%lu on b%d\n",
17861+ h_inode->i_ino, a->bdst);
17862+ err = -EIO;
17863+ }
17864+ }
17865+
17866+ if (!err && !plink)
17867+ au_plink_append(inode, a->bdst, a->h_path.dentry);
17868+
17869+out:
2cbb1c4b 17870+ AuTraceErr(err);
4a4d8108
AM
17871+ return err;
17872+}
17873+
17874+int aufs_link(struct dentry *src_dentry, struct inode *dir,
17875+ struct dentry *dentry)
17876+{
17877+ int err, rerr;
17878+ struct au_dtime dt;
17879+ struct au_link_args *a;
17880+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 17881+ struct inode *inode, *delegated;
4a4d8108
AM
17882+ struct super_block *sb;
17883+ struct au_wr_dir_args wr_dir_args = {
17884+ /* .force_btgt = -1, */
17885+ .flags = AuWrDir_ADD_ENTRY
17886+ };
17887+
17888+ IMustLock(dir);
5527c038 17889+ inode = d_inode(src_dentry);
4a4d8108
AM
17890+ IMustLock(inode);
17891+
4a4d8108
AM
17892+ err = -ENOMEM;
17893+ a = kzalloc(sizeof(*a), GFP_NOFS);
17894+ if (unlikely(!a))
17895+ goto out;
17896+
17897+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
17898+ err = aufs_read_and_write_lock2(dentry, src_dentry,
17899+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
17900+ if (unlikely(err))
17901+ goto out_kfree;
38d290e6 17902+ err = au_d_linkable(src_dentry);
027c5e7a
AM
17903+ if (unlikely(err))
17904+ goto out_unlock;
17905+ err = au_d_may_add(dentry);
17906+ if (unlikely(err))
17907+ goto out_unlock;
e49829fe 17908+
4a4d8108 17909+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 17910+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
17911+
17912+ di_write_lock_parent(a->parent);
17913+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
17914+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
17915+ &wr_dir_args);
17916+ err = PTR_ERR(wh_dentry);
17917+ if (IS_ERR(wh_dentry))
027c5e7a 17918+ goto out_parent;
4a4d8108
AM
17919+
17920+ err = 0;
17921+ sb = dentry->d_sb;
17922+ a->bdst = au_dbstart(dentry);
17923+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
17924+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
17925+ a->bsrc = au_ibstart(inode);
17926+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
17927+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
17928+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b
JR
17929+ if (!h_src_dentry) {
17930+ a->bsrc = au_dbstart(src_dentry);
17931+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
17932+ AuDebugOn(!h_src_dentry);
38d290e6
JR
17933+ } else if (IS_ERR(h_src_dentry)) {
17934+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 17935+ goto out_parent;
38d290e6 17936+ }
2cbb1c4b 17937+
4a4d8108
AM
17938+ if (au_opt_test(au_mntflags(sb), PLINK)) {
17939+ if (a->bdst < a->bsrc
17940+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 17941+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
17942+ else {
17943+ delegated = NULL;
4a4d8108 17944+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
17945+ &a->h_path, &delegated);
17946+ if (unlikely(err == -EWOULDBLOCK)) {
17947+ pr_warn("cannot retry for NFSv4 delegation"
17948+ " for an internal link\n");
17949+ iput(delegated);
17950+ }
17951+ }
2cbb1c4b 17952+ dput(h_src_dentry);
4a4d8108
AM
17953+ } else {
17954+ /*
17955+ * copyup src_dentry to the branch we process,
17956+ * and then link(2) to it.
17957+ */
2cbb1c4b 17958+ dput(h_src_dentry);
4a4d8108
AM
17959+ if (a->bdst < a->bsrc
17960+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
17961+ au_unpin(&a->pin);
17962+ di_write_unlock(a->parent);
17963+ err = au_cpup_before_link(src_dentry, a);
17964+ di_write_lock_parent(a->parent);
17965+ if (!err)
17966+ err = au_pin(&a->pin, dentry, a->bdst,
17967+ au_opt_udba(sb),
17968+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17969+ if (unlikely(err))
17970+ goto out_wh;
17971+ }
17972+ if (!err) {
17973+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
17974+ err = -ENOENT;
5527c038 17975+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
523b37e3 17976+ delegated = NULL;
4a4d8108
AM
17977+ err = vfsub_link(h_src_dentry,
17978+ au_pinned_h_dir(&a->pin),
523b37e3
AM
17979+ &a->h_path, &delegated);
17980+ if (unlikely(err == -EWOULDBLOCK)) {
17981+ pr_warn("cannot retry"
17982+ " for NFSv4 delegation"
17983+ " for an internal link\n");
17984+ iput(delegated);
17985+ }
17986+ }
4a4d8108
AM
17987+ }
17988+ }
17989+ if (unlikely(err))
17990+ goto out_unpin;
17991+
17992+ if (wh_dentry) {
17993+ a->h_path.dentry = wh_dentry;
17994+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
17995+ dentry);
17996+ if (unlikely(err))
17997+ goto out_revert;
17998+ }
17999+
b912730e 18000+ au_dir_ts(dir, a->bdst);
4a4d8108 18001+ dir->i_version++;
4a4d8108
AM
18002+ inc_nlink(inode);
18003+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
18004+ d_instantiate(dentry, au_igrab(inode));
18005+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
18006+ /* some filesystem calls d_drop() */
18007+ d_drop(dentry);
076b876e
AM
18008+ /* some filesystems consume an inode even hardlink */
18009+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
18010+ goto out_unpin; /* success */
18011+
4f0767ce 18012+out_revert:
523b37e3
AM
18013+ /* no delegation since it is just created */
18014+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18015+ /*delegated*/NULL, /*force*/0);
027c5e7a 18016+ if (unlikely(rerr)) {
523b37e3 18017+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
18018+ err = -EIO;
18019+ }
4a4d8108 18020+ au_dtime_revert(&dt);
4f0767ce 18021+out_unpin:
4a4d8108 18022+ au_unpin(&a->pin);
4f0767ce 18023+out_wh:
4a4d8108 18024+ dput(wh_dentry);
027c5e7a
AM
18025+out_parent:
18026+ di_write_unlock(a->parent);
18027+ dput(a->src_parent);
4f0767ce 18028+out_unlock:
4a4d8108
AM
18029+ if (unlikely(err)) {
18030+ au_update_dbstart(dentry);
18031+ d_drop(dentry);
18032+ }
4a4d8108 18033+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 18034+out_kfree:
4a4d8108 18035+ kfree(a);
4f0767ce 18036+out:
86dc4139 18037+ AuTraceErr(err);
4a4d8108
AM
18038+ return err;
18039+}
18040+
7eafdf33 18041+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
18042+{
18043+ int err, rerr;
18044+ aufs_bindex_t bindex;
18045+ unsigned char diropq;
18046+ struct path h_path;
18047+ struct dentry *wh_dentry, *parent, *opq_dentry;
18048+ struct mutex *h_mtx;
18049+ struct super_block *sb;
18050+ struct {
18051+ struct au_pin pin;
18052+ struct au_dtime dt;
18053+ } *a; /* reduce the stack usage */
18054+ struct au_wr_dir_args wr_dir_args = {
18055+ .force_btgt = -1,
18056+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18057+ };
18058+
18059+ IMustLock(dir);
18060+
18061+ err = -ENOMEM;
18062+ a = kmalloc(sizeof(*a), GFP_NOFS);
18063+ if (unlikely(!a))
18064+ goto out;
18065+
027c5e7a
AM
18066+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18067+ if (unlikely(err))
18068+ goto out_free;
18069+ err = au_d_may_add(dentry);
18070+ if (unlikely(err))
18071+ goto out_unlock;
18072+
4a4d8108
AM
18073+ parent = dentry->d_parent; /* dir inode is locked */
18074+ di_write_lock_parent(parent);
18075+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18076+ &a->pin, &wr_dir_args);
18077+ err = PTR_ERR(wh_dentry);
18078+ if (IS_ERR(wh_dentry))
027c5e7a 18079+ goto out_parent;
4a4d8108
AM
18080+
18081+ sb = dentry->d_sb;
18082+ bindex = au_dbstart(dentry);
18083+ h_path.dentry = au_h_dptr(dentry, bindex);
18084+ h_path.mnt = au_sbr_mnt(sb, bindex);
18085+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18086+ if (unlikely(err))
027c5e7a 18087+ goto out_unpin;
4a4d8108
AM
18088+
18089+ /* make the dir opaque */
18090+ diropq = 0;
5527c038 18091+ h_mtx = &d_inode(h_path.dentry)->i_mutex;
4a4d8108
AM
18092+ if (wh_dentry
18093+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
18094+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18095+ opq_dentry = au_diropq_create(dentry, bindex);
18096+ mutex_unlock(h_mtx);
18097+ err = PTR_ERR(opq_dentry);
18098+ if (IS_ERR(opq_dentry))
18099+ goto out_dir;
18100+ dput(opq_dentry);
18101+ diropq = 1;
18102+ }
18103+
18104+ err = epilog(dir, bindex, wh_dentry, dentry);
18105+ if (!err) {
18106+ inc_nlink(dir);
027c5e7a 18107+ goto out_unpin; /* success */
4a4d8108
AM
18108+ }
18109+
18110+ /* revert */
18111+ if (diropq) {
18112+ AuLabel(revert opq);
18113+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18114+ rerr = au_diropq_remove(dentry, bindex);
18115+ mutex_unlock(h_mtx);
18116+ if (rerr) {
523b37e3
AM
18117+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18118+ dentry, err, rerr);
4a4d8108
AM
18119+ err = -EIO;
18120+ }
18121+ }
18122+
4f0767ce 18123+out_dir:
4a4d8108
AM
18124+ AuLabel(revert dir);
18125+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18126+ if (rerr) {
523b37e3
AM
18127+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18128+ dentry, err, rerr);
4a4d8108
AM
18129+ err = -EIO;
18130+ }
4a4d8108 18131+ au_dtime_revert(&a->dt);
027c5e7a 18132+out_unpin:
4a4d8108
AM
18133+ au_unpin(&a->pin);
18134+ dput(wh_dentry);
027c5e7a
AM
18135+out_parent:
18136+ di_write_unlock(parent);
18137+out_unlock:
4a4d8108
AM
18138+ if (unlikely(err)) {
18139+ au_update_dbstart(dentry);
18140+ d_drop(dentry);
18141+ }
4a4d8108 18142+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 18143+out_free:
4a4d8108 18144+ kfree(a);
4f0767ce 18145+out:
4a4d8108
AM
18146+ return err;
18147+}
7f207e10
AM
18148diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18149--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
18150+++ linux/fs/aufs/i_op.c 2015-12-10 18:46:31.223310574 +0100
18151@@ -0,0 +1,1449 @@
4a4d8108 18152+/*
2000de60 18153+ * Copyright (C) 2005-2015 Junjiro R. Okajima
4a4d8108
AM
18154+ *
18155+ * This program, aufs is free software; you can redistribute it and/or modify
18156+ * it under the terms of the GNU General Public License as published by
18157+ * the Free Software Foundation; either version 2 of the License, or
18158+ * (at your option) any later version.
18159+ *
18160+ * This program is distributed in the hope that it will be useful,
18161+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18162+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18163+ * GNU General Public License for more details.
18164+ *
18165+ * You should have received a copy of the GNU General Public License
523b37e3 18166+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 18167+ */
1facf9fc 18168+
1308ab2a 18169+/*
4a4d8108 18170+ * inode operations (except add/del/rename)
1308ab2a 18171+ */
4a4d8108
AM
18172+
18173+#include <linux/device_cgroup.h>
18174+#include <linux/fs_stack.h>
92d182d2 18175+#include <linux/mm.h>
4a4d8108
AM
18176+#include <linux/namei.h>
18177+#include <linux/security.h>
4a4d8108
AM
18178+#include "aufs.h"
18179+
1e00d052 18180+static int h_permission(struct inode *h_inode, int mask,
4a4d8108 18181+ struct vfsmount *h_mnt, int brperm)
1facf9fc 18182+{
1308ab2a 18183+ int err;
4a4d8108 18184+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 18185+
4a4d8108
AM
18186+ err = -EACCES;
18187+ if ((write_mask && IS_IMMUTABLE(h_inode))
18188+ || ((mask & MAY_EXEC)
18189+ && S_ISREG(h_inode->i_mode)
18190+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
18191+ || !(h_inode->i_mode & S_IXUGO))))
18192+ goto out;
18193+
18194+ /*
18195+ * - skip the lower fs test in the case of write to ro branch.
18196+ * - nfs dir permission write check is optimized, but a policy for
18197+ * link/rename requires a real check.
b912730e
AM
18198+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18199+ * in this case, generic_permission() returns -EOPNOTSUPP.
4a4d8108
AM
18200+ */
18201+ if ((write_mask && !au_br_writable(brperm))
18202+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18203+ && write_mask && !(mask & MAY_READ))
18204+ || !h_inode->i_op->permission) {
18205+ /* AuLabel(generic_permission); */
b912730e 18206+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
1e00d052 18207+ err = generic_permission(h_inode, mask);
b912730e
AM
18208+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18209+ err = h_inode->i_op->permission(h_inode, mask);
18210+ AuTraceErr(err);
1308ab2a 18211+ } else {
4a4d8108 18212+ /* AuLabel(h_inode->permission); */
1e00d052 18213+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
18214+ AuTraceErr(err);
18215+ }
1facf9fc 18216+
4a4d8108
AM
18217+ if (!err)
18218+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 18219+ if (!err)
4a4d8108 18220+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
18221+
18222+#if 0
18223+ if (!err) {
18224+ /* todo: do we need to call ima_path_check()? */
18225+ struct path h_path = {
18226+ .dentry =
18227+ .mnt = h_mnt
18228+ };
18229+ err = ima_path_check(&h_path,
18230+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18231+ IMA_COUNT_LEAVE);
1308ab2a 18232+ }
4a4d8108 18233+#endif
dece6358 18234+
4f0767ce 18235+out:
1308ab2a 18236+ return err;
18237+}
dece6358 18238+
1e00d052 18239+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 18240+{
18241+ int err;
4a4d8108
AM
18242+ aufs_bindex_t bindex, bend;
18243+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18244+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18245+ struct inode *h_inode;
18246+ struct super_block *sb;
18247+ struct au_branch *br;
1facf9fc 18248+
027c5e7a 18249+ /* todo: support rcu-walk? */
1e00d052 18250+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
18251+ return -ECHILD;
18252+
4a4d8108
AM
18253+ sb = inode->i_sb;
18254+ si_read_lock(sb, AuLock_FLUSH);
18255+ ii_read_lock_child(inode);
027c5e7a
AM
18256+#if 0
18257+ err = au_iigen_test(inode, au_sigen(sb));
18258+ if (unlikely(err))
18259+ goto out;
18260+#endif
dece6358 18261+
076b876e
AM
18262+ if (!isdir
18263+ || write_mask
18264+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
4a4d8108
AM
18265+ err = au_busy_or_stale();
18266+ h_inode = au_h_iptr(inode, au_ibstart(inode));
18267+ if (unlikely(!h_inode
18268+ || (h_inode->i_mode & S_IFMT)
18269+ != (inode->i_mode & S_IFMT)))
18270+ goto out;
1facf9fc 18271+
4a4d8108
AM
18272+ err = 0;
18273+ bindex = au_ibstart(inode);
18274+ br = au_sbr(sb, bindex);
86dc4139 18275+ err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm);
4a4d8108
AM
18276+ if (write_mask
18277+ && !err
18278+ && !special_file(h_inode->i_mode)) {
18279+ /* test whether the upper writable branch exists */
18280+ err = -EROFS;
18281+ for (; bindex >= 0; bindex--)
18282+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18283+ err = 0;
18284+ break;
18285+ }
18286+ }
18287+ goto out;
18288+ }
dece6358 18289+
4a4d8108 18290+ /* non-write to dir */
1308ab2a 18291+ err = 0;
4a4d8108
AM
18292+ bend = au_ibend(inode);
18293+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
18294+ h_inode = au_h_iptr(inode, bindex);
18295+ if (h_inode) {
18296+ err = au_busy_or_stale();
18297+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18298+ break;
18299+
18300+ br = au_sbr(sb, bindex);
86dc4139 18301+ err = h_permission(h_inode, mask, au_br_mnt(br),
4a4d8108
AM
18302+ br->br_perm);
18303+ }
18304+ }
1308ab2a 18305+
4f0767ce 18306+out:
4a4d8108
AM
18307+ ii_read_unlock(inode);
18308+ si_read_unlock(sb);
1308ab2a 18309+ return err;
18310+}
18311+
4a4d8108 18312+/* ---------------------------------------------------------------------- */
1facf9fc 18313+
4a4d8108 18314+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
b4510431 18315+ unsigned int flags)
4a4d8108
AM
18316+{
18317+ struct dentry *ret, *parent;
b752ccd1 18318+ struct inode *inode;
4a4d8108 18319+ struct super_block *sb;
1716fcea 18320+ int err, npositive;
dece6358 18321+
4a4d8108 18322+ IMustLock(dir);
1308ab2a 18323+
537831f9
AM
18324+ /* todo: support rcu-walk? */
18325+ ret = ERR_PTR(-ECHILD);
18326+ if (flags & LOOKUP_RCU)
18327+ goto out;
18328+
18329+ ret = ERR_PTR(-ENAMETOOLONG);
18330+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18331+ goto out;
18332+
4a4d8108 18333+ sb = dir->i_sb;
7f207e10
AM
18334+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18335+ ret = ERR_PTR(err);
18336+ if (unlikely(err))
18337+ goto out;
18338+
4a4d8108
AM
18339+ err = au_di_init(dentry);
18340+ ret = ERR_PTR(err);
18341+ if (unlikely(err))
7f207e10 18342+ goto out_si;
1308ab2a 18343+
9dbd164d 18344+ inode = NULL;
027c5e7a 18345+ npositive = 0; /* suppress a warning */
4a4d8108
AM
18346+ parent = dentry->d_parent; /* dir inode is locked */
18347+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
18348+ err = au_alive_dir(parent);
18349+ if (!err)
18350+ err = au_digen_test(parent, au_sigen(sb));
18351+ if (!err) {
18352+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
537831f9 18353+ /*type*/0);
027c5e7a
AM
18354+ err = npositive;
18355+ }
4a4d8108 18356+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
18357+ ret = ERR_PTR(err);
18358+ if (unlikely(err < 0))
18359+ goto out_unlock;
1308ab2a 18360+
4a4d8108 18361+ if (npositive) {
b752ccd1 18362+ inode = au_new_inode(dentry, /*must_new*/0);
c1595e42
JR
18363+ if (IS_ERR(inode)) {
18364+ ret = (void *)inode;
18365+ inode = NULL;
18366+ goto out_unlock;
18367+ }
9dbd164d 18368+ }
4a4d8108 18369+
c1595e42
JR
18370+ if (inode)
18371+ atomic_inc(&inode->i_count);
4a4d8108 18372+ ret = d_splice_alias(inode, dentry);
537831f9
AM
18373+#if 0
18374+ if (unlikely(d_need_lookup(dentry))) {
18375+ spin_lock(&dentry->d_lock);
18376+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18377+ spin_unlock(&dentry->d_lock);
18378+ } else
18379+#endif
c1595e42 18380+ if (inode) {
2000de60 18381+ if (!IS_ERR(ret)) {
c1595e42 18382+ iput(inode);
2000de60
JR
18383+ if (ret && ret != dentry)
18384+ ii_write_unlock(inode);
18385+ } else {
c1595e42
JR
18386+ ii_write_unlock(inode);
18387+ iput(inode);
18388+ inode = NULL;
18389+ }
7f207e10 18390+ }
1facf9fc 18391+
4f0767ce 18392+out_unlock:
4a4d8108 18393+ di_write_unlock(dentry);
2dfbb274 18394+ if (inode) {
1716fcea
AM
18395+ /* verbose coding for lock class name */
18396+ if (unlikely(S_ISLNK(inode->i_mode)))
18397+ au_rw_class(&au_di(dentry)->di_rwsem,
18398+ au_lc_key + AuLcSymlink_DIINFO);
18399+ else if (unlikely(S_ISDIR(inode->i_mode)))
18400+ au_rw_class(&au_di(dentry)->di_rwsem,
18401+ au_lc_key + AuLcDir_DIINFO);
18402+ else /* likely */
18403+ au_rw_class(&au_di(dentry)->di_rwsem,
18404+ au_lc_key + AuLcNonDir_DIINFO);
9dbd164d 18405+ }
7f207e10 18406+out_si:
4a4d8108 18407+ si_read_unlock(sb);
7f207e10 18408+out:
4a4d8108
AM
18409+ return ret;
18410+}
1facf9fc 18411+
4a4d8108 18412+/* ---------------------------------------------------------------------- */
1facf9fc 18413+
b912730e
AM
18414+struct aopen_node {
18415+ struct hlist_node hlist;
18416+ struct file *file, *h_file;
18417+};
18418+
18419+static int au_do_aopen(struct inode *inode, struct file *file)
18420+{
18421+ struct au_sphlhead *aopen;
18422+ struct aopen_node *node;
18423+ struct au_do_open_args args = {
18424+ .no_lock = 1,
18425+ .open = au_do_open_nondir
18426+ };
18427+
18428+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18429+ spin_lock(&aopen->spin);
18430+ hlist_for_each_entry(node, &aopen->head, hlist)
18431+ if (node->file == file) {
18432+ args.h_file = node->h_file;
18433+ break;
18434+ }
18435+ spin_unlock(&aopen->spin);
18436+ /* AuDebugOn(!args.h_file); */
18437+
18438+ return au_do_open(file, &args);
18439+}
18440+
18441+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18442+ struct file *file, unsigned int open_flag,
18443+ umode_t create_mode, int *opened)
18444+{
18445+ int err, h_opened = *opened;
18446+ struct dentry *parent;
18447+ struct dentry *d;
18448+ struct au_sphlhead *aopen;
18449+ struct vfsub_aopen_args args = {
18450+ .open_flag = open_flag,
18451+ .create_mode = create_mode,
18452+ .opened = &h_opened
18453+ };
18454+ struct aopen_node aopen_node = {
18455+ .file = file
18456+ };
18457+
18458+ IMustLock(dir);
18459+ AuDbg("open_flag 0x%x\n", open_flag);
18460+ AuDbgDentry(dentry);
18461+
18462+ err = 0;
18463+ if (!au_di(dentry)) {
18464+ d = aufs_lookup(dir, dentry, /*flags*/0);
18465+ if (IS_ERR(d)) {
18466+ err = PTR_ERR(d);
18467+ goto out;
18468+ } else if (d) {
18469+ /*
18470+ * obsoleted dentry found.
18471+ * another error will be returned later.
18472+ */
18473+ d_drop(d);
18474+ dput(d);
18475+ AuDbgDentry(d);
18476+ }
18477+ AuDbgDentry(dentry);
18478+ }
18479+
18480+ if (d_is_positive(dentry)
18481+ || d_unhashed(dentry)
18482+ || d_unlinked(dentry)
18483+ || !(open_flag & O_CREAT))
18484+ goto out_no_open;
18485+
18486+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18487+ if (unlikely(err))
18488+ goto out;
18489+
18490+ parent = dentry->d_parent; /* dir is locked */
18491+ di_write_lock_parent(parent);
18492+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
18493+ if (unlikely(err))
18494+ goto out_unlock;
18495+
18496+ AuDbgDentry(dentry);
18497+ if (d_is_positive(dentry))
18498+ goto out_unlock;
18499+
18500+ args.file = get_empty_filp();
18501+ err = PTR_ERR(args.file);
18502+ if (IS_ERR(args.file))
18503+ goto out_unlock;
18504+
18505+ args.file->f_flags = file->f_flags;
18506+ err = au_aopen_or_create(dir, dentry, &args);
18507+ AuTraceErr(err);
18508+ AuDbgFile(args.file);
18509+ if (unlikely(err < 0)) {
18510+ if (h_opened & FILE_OPENED)
18511+ fput(args.file);
18512+ else
18513+ put_filp(args.file);
18514+ goto out_unlock;
18515+ }
18516+
18517+ /* some filesystems don't set FILE_CREATED while succeeded? */
18518+ *opened |= FILE_CREATED;
18519+ if (h_opened & FILE_OPENED)
18520+ aopen_node.h_file = args.file;
18521+ else {
18522+ put_filp(args.file);
18523+ args.file = NULL;
18524+ }
18525+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18526+ au_sphl_add(&aopen_node.hlist, aopen);
18527+ err = finish_open(file, dentry, au_do_aopen, opened);
18528+ au_sphl_del(&aopen_node.hlist, aopen);
18529+ AuTraceErr(err);
18530+ AuDbgFile(file);
18531+ if (aopen_node.h_file)
18532+ fput(aopen_node.h_file);
18533+
18534+out_unlock:
18535+ di_write_unlock(parent);
18536+ aufs_read_unlock(dentry, AuLock_DW);
18537+ AuDbgDentry(dentry);
18538+ if (unlikely(err))
18539+ goto out;
18540+out_no_open:
18541+ if (!err && !(*opened & FILE_CREATED)) {
18542+ AuLabel(out_no_open);
18543+ dget(dentry);
18544+ err = finish_no_open(file, dentry);
18545+ }
18546+out:
18547+ AuDbg("%pd%s%s\n", dentry,
18548+ (*opened & FILE_CREATED) ? " created" : "",
18549+ (*opened & FILE_OPENED) ? " opened" : "");
18550+ AuTraceErr(err);
18551+ return err;
18552+}
18553+
18554+
18555+/* ---------------------------------------------------------------------- */
18556+
4a4d8108
AM
18557+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18558+ const unsigned char add_entry, aufs_bindex_t bcpup,
18559+ aufs_bindex_t bstart)
18560+{
18561+ int err;
18562+ struct dentry *h_parent;
18563+ struct inode *h_dir;
1facf9fc 18564+
027c5e7a 18565+ if (add_entry)
5527c038 18566+ IMustLock(d_inode(parent));
027c5e7a 18567+ else
4a4d8108
AM
18568+ di_write_lock_parent(parent);
18569+
18570+ err = 0;
18571+ if (!au_h_dptr(parent, bcpup)) {
c2b27bf2
AM
18572+ if (bstart > bcpup)
18573+ err = au_cpup_dirs(dentry, bcpup);
18574+ else if (bstart < bcpup)
4a4d8108
AM
18575+ err = au_cpdown_dirs(dentry, bcpup);
18576+ else
c2b27bf2 18577+ BUG();
4a4d8108 18578+ }
38d290e6 18579+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
4a4d8108 18580+ h_parent = au_h_dptr(parent, bcpup);
5527c038 18581+ h_dir = d_inode(h_parent);
4a4d8108 18582+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7e9cd9fe 18583+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
4a4d8108
AM
18584+ /* todo: no unlock here */
18585+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
18586+
18587+ AuDbg("bcpup %d\n", bcpup);
18588+ if (!err) {
5527c038 18589+ if (d_really_is_negative(dentry))
027c5e7a 18590+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
18591+ au_update_dbrange(dentry, /*do_put_zero*/0);
18592+ }
1308ab2a 18593+ }
1facf9fc 18594+
4a4d8108
AM
18595+ if (!add_entry)
18596+ di_write_unlock(parent);
18597+ if (!err)
18598+ err = bcpup; /* success */
1308ab2a 18599+
027c5e7a 18600+ AuTraceErr(err);
4a4d8108
AM
18601+ return err;
18602+}
1facf9fc 18603+
4a4d8108
AM
18604+/*
18605+ * decide the branch and the parent dir where we will create a new entry.
18606+ * returns new bindex or an error.
18607+ * copyup the parent dir if needed.
18608+ */
18609+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18610+ struct au_wr_dir_args *args)
18611+{
18612+ int err;
392086de 18613+ unsigned int flags;
4a4d8108 18614+ aufs_bindex_t bcpup, bstart, src_bstart;
86dc4139
AM
18615+ const unsigned char add_entry
18616+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
38d290e6 18617+ | au_ftest_wrdir(args->flags, TMPFILE);
4a4d8108
AM
18618+ struct super_block *sb;
18619+ struct dentry *parent;
18620+ struct au_sbinfo *sbinfo;
1facf9fc 18621+
4a4d8108
AM
18622+ sb = dentry->d_sb;
18623+ sbinfo = au_sbi(sb);
18624+ parent = dget_parent(dentry);
18625+ bstart = au_dbstart(dentry);
18626+ bcpup = bstart;
18627+ if (args->force_btgt < 0) {
18628+ if (src_dentry) {
18629+ src_bstart = au_dbstart(src_dentry);
18630+ if (src_bstart < bstart)
18631+ bcpup = src_bstart;
18632+ } else if (add_entry) {
392086de
AM
18633+ flags = 0;
18634+ if (au_ftest_wrdir(args->flags, ISDIR))
18635+ au_fset_wbr(flags, DIR);
18636+ err = AuWbrCreate(sbinfo, dentry, flags);
4a4d8108
AM
18637+ bcpup = err;
18638+ }
1facf9fc 18639+
5527c038 18640+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
4a4d8108
AM
18641+ if (add_entry)
18642+ err = AuWbrCopyup(sbinfo, dentry);
18643+ else {
18644+ if (!IS_ROOT(dentry)) {
18645+ di_read_lock_parent(parent, !AuLock_IR);
18646+ err = AuWbrCopyup(sbinfo, dentry);
18647+ di_read_unlock(parent, !AuLock_IR);
18648+ } else
18649+ err = AuWbrCopyup(sbinfo, dentry);
18650+ }
18651+ bcpup = err;
18652+ if (unlikely(err < 0))
18653+ goto out;
18654+ }
18655+ } else {
18656+ bcpup = args->force_btgt;
5527c038 18657+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
1308ab2a 18658+ }
027c5e7a 18659+
4a4d8108
AM
18660+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
18661+ err = bcpup;
18662+ if (bcpup == bstart)
18663+ goto out; /* success */
4a4d8108
AM
18664+
18665+ /* copyup the new parent into the branch we process */
18666+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a 18667+ if (err >= 0) {
5527c038 18668+ if (d_really_is_negative(dentry)) {
027c5e7a
AM
18669+ au_set_h_dptr(dentry, bstart, NULL);
18670+ au_set_dbstart(dentry, bcpup);
18671+ au_set_dbend(dentry, bcpup);
18672+ }
38d290e6
JR
18673+ AuDebugOn(add_entry
18674+ && !au_ftest_wrdir(args->flags, TMPFILE)
18675+ && !au_h_dptr(dentry, bcpup));
027c5e7a 18676+ }
86dc4139
AM
18677+
18678+out:
18679+ dput(parent);
18680+ return err;
18681+}
18682+
18683+/* ---------------------------------------------------------------------- */
18684+
18685+void au_pin_hdir_unlock(struct au_pin *p)
18686+{
18687+ if (p->hdir)
18688+ au_hn_imtx_unlock(p->hdir);
18689+}
18690+
c1595e42 18691+int au_pin_hdir_lock(struct au_pin *p)
86dc4139
AM
18692+{
18693+ int err;
18694+
18695+ err = 0;
18696+ if (!p->hdir)
18697+ goto out;
18698+
18699+ /* even if an error happens later, keep this lock */
18700+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
18701+
18702+ err = -EBUSY;
5527c038 18703+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
86dc4139
AM
18704+ goto out;
18705+
18706+ err = 0;
18707+ if (p->h_dentry)
18708+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
18709+ p->h_parent, p->br);
18710+
18711+out:
18712+ return err;
18713+}
18714+
18715+int au_pin_hdir_relock(struct au_pin *p)
18716+{
18717+ int err, i;
18718+ struct inode *h_i;
18719+ struct dentry *h_d[] = {
18720+ p->h_dentry,
18721+ p->h_parent
18722+ };
18723+
18724+ err = au_pin_hdir_lock(p);
18725+ if (unlikely(err))
18726+ goto out;
18727+
18728+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
18729+ if (!h_d[i])
18730+ continue;
5527c038
JR
18731+ if (d_is_positive(h_d[i])) {
18732+ h_i = d_inode(h_d[i]);
86dc4139 18733+ err = !h_i->i_nlink;
5527c038 18734+ }
86dc4139
AM
18735+ }
18736+
18737+out:
18738+ return err;
18739+}
18740+
18741+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
18742+{
18743+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
18744+ p->hdir->hi_inode->i_mutex.owner = task;
18745+#endif
18746+}
18747+
18748+void au_pin_hdir_acquire_nest(struct au_pin *p)
18749+{
18750+ if (p->hdir) {
18751+ mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map,
18752+ p->lsc_hi, 0, NULL, _RET_IP_);
18753+ au_pin_hdir_set_owner(p, current);
18754+ }
dece6358 18755+}
1facf9fc 18756+
86dc4139
AM
18757+void au_pin_hdir_release(struct au_pin *p)
18758+{
18759+ if (p->hdir) {
18760+ au_pin_hdir_set_owner(p, p->task);
18761+ mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_);
18762+ }
18763+}
1308ab2a 18764+
4a4d8108 18765+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 18766+{
4a4d8108
AM
18767+ if (pin && pin->parent)
18768+ return au_h_dptr(pin->parent, pin->bindex);
18769+ return NULL;
dece6358 18770+}
1facf9fc 18771+
4a4d8108 18772+void au_unpin(struct au_pin *p)
dece6358 18773+{
86dc4139
AM
18774+ if (p->hdir)
18775+ au_pin_hdir_unlock(p);
e49829fe 18776+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
b4510431 18777+ vfsub_mnt_drop_write(p->h_mnt);
4a4d8108
AM
18778+ if (!p->hdir)
18779+ return;
1facf9fc 18780+
4a4d8108
AM
18781+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18782+ di_read_unlock(p->parent, AuLock_IR);
18783+ iput(p->hdir->hi_inode);
18784+ dput(p->parent);
18785+ p->parent = NULL;
18786+ p->hdir = NULL;
18787+ p->h_mnt = NULL;
86dc4139 18788+ /* do not clear p->task */
4a4d8108 18789+}
1308ab2a 18790+
4a4d8108
AM
18791+int au_do_pin(struct au_pin *p)
18792+{
18793+ int err;
18794+ struct super_block *sb;
4a4d8108
AM
18795+ struct inode *h_dir;
18796+
18797+ err = 0;
18798+ sb = p->dentry->d_sb;
86dc4139 18799+ p->br = au_sbr(sb, p->bindex);
4a4d8108
AM
18800+ if (IS_ROOT(p->dentry)) {
18801+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18802+ p->h_mnt = au_br_mnt(p->br);
b4510431 18803+ err = vfsub_mnt_want_write(p->h_mnt);
4a4d8108
AM
18804+ if (unlikely(err)) {
18805+ au_fclr_pin(p->flags, MNT_WRITE);
18806+ goto out_err;
18807+ }
18808+ }
dece6358 18809+ goto out;
1facf9fc 18810+ }
18811+
86dc4139 18812+ p->h_dentry = NULL;
4a4d8108 18813+ if (p->bindex <= au_dbend(p->dentry))
86dc4139 18814+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 18815+
4a4d8108
AM
18816+ p->parent = dget_parent(p->dentry);
18817+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18818+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 18819+
4a4d8108 18820+ h_dir = NULL;
86dc4139 18821+ p->h_parent = au_h_dptr(p->parent, p->bindex);
5527c038 18822+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
4a4d8108
AM
18823+ if (p->hdir)
18824+ h_dir = p->hdir->hi_inode;
dece6358 18825+
b752ccd1
AM
18826+ /*
18827+ * udba case, or
18828+ * if DI_LOCKED is not set, then p->parent may be different
18829+ * and h_parent can be NULL.
18830+ */
86dc4139 18831+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
e49829fe 18832+ err = -EBUSY;
4a4d8108
AM
18833+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18834+ di_read_unlock(p->parent, AuLock_IR);
18835+ dput(p->parent);
18836+ p->parent = NULL;
18837+ goto out_err;
18838+ }
1308ab2a 18839+
4a4d8108 18840+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18841+ p->h_mnt = au_br_mnt(p->br);
b4510431 18842+ err = vfsub_mnt_want_write(p->h_mnt);
dece6358 18843+ if (unlikely(err)) {
4a4d8108 18844+ au_fclr_pin(p->flags, MNT_WRITE);
86dc4139
AM
18845+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18846+ di_read_unlock(p->parent, AuLock_IR);
18847+ dput(p->parent);
18848+ p->parent = NULL;
18849+ goto out_err;
dece6358
AM
18850+ }
18851+ }
4a4d8108 18852+
86dc4139
AM
18853+ au_igrab(h_dir);
18854+ err = au_pin_hdir_lock(p);
18855+ if (!err)
18856+ goto out; /* success */
18857+
076b876e
AM
18858+ au_unpin(p);
18859+
4f0767ce 18860+out_err:
4a4d8108
AM
18861+ pr_err("err %d\n", err);
18862+ err = au_busy_or_stale();
4f0767ce 18863+out:
1facf9fc 18864+ return err;
18865+}
18866+
4a4d8108
AM
18867+void au_pin_init(struct au_pin *p, struct dentry *dentry,
18868+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
18869+ unsigned int udba, unsigned char flags)
18870+{
18871+ p->dentry = dentry;
18872+ p->udba = udba;
18873+ p->lsc_di = lsc_di;
18874+ p->lsc_hi = lsc_hi;
18875+ p->flags = flags;
18876+ p->bindex = bindex;
18877+
18878+ p->parent = NULL;
18879+ p->hdir = NULL;
18880+ p->h_mnt = NULL;
86dc4139
AM
18881+
18882+ p->h_dentry = NULL;
18883+ p->h_parent = NULL;
18884+ p->br = NULL;
18885+ p->task = current;
4a4d8108
AM
18886+}
18887+
18888+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
18889+ unsigned int udba, unsigned char flags)
18890+{
18891+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
18892+ udba, flags);
18893+ return au_do_pin(pin);
18894+}
18895+
dece6358
AM
18896+/* ---------------------------------------------------------------------- */
18897+
1308ab2a 18898+/*
4a4d8108
AM
18899+ * ->setattr() and ->getattr() are called in various cases.
18900+ * chmod, stat: dentry is revalidated.
18901+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
18902+ * unhashed.
18903+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 18904+ */
027c5e7a 18905+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
c1595e42 18906+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 18907+{
4a4d8108 18908+ int err;
4a4d8108 18909+ struct dentry *parent;
1facf9fc 18910+
1308ab2a 18911+ err = 0;
027c5e7a 18912+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
18913+ parent = dget_parent(dentry);
18914+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 18915+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
18916+ di_read_unlock(parent, AuLock_IR);
18917+ dput(parent);
dece6358 18918+ }
1facf9fc 18919+
4a4d8108 18920+ AuTraceErr(err);
1308ab2a 18921+ return err;
18922+}
dece6358 18923+
c1595e42
JR
18924+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
18925+ struct au_icpup_args *a)
1308ab2a 18926+{
18927+ int err;
4a4d8108 18928+ loff_t sz;
e49829fe 18929+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
18930+ struct dentry *hi_wh, *parent;
18931+ struct inode *inode;
4a4d8108
AM
18932+ struct au_wr_dir_args wr_dir_args = {
18933+ .force_btgt = -1,
18934+ .flags = 0
18935+ };
18936+
2000de60 18937+ if (d_is_dir(dentry))
4a4d8108
AM
18938+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
18939+ /* plink or hi_wh() case */
2000de60 18940+ bstart = au_dbstart(dentry);
5527c038 18941+ inode = d_inode(dentry);
e49829fe 18942+ ibstart = au_ibstart(inode);
027c5e7a 18943+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 18944+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
18945+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
18946+ if (unlikely(err < 0))
18947+ goto out;
18948+ a->btgt = err;
18949+ if (err != bstart)
18950+ au_fset_icpup(a->flags, DID_CPUP);
18951+
18952+ err = 0;
18953+ a->pin_flags = AuPin_MNT_WRITE;
18954+ parent = NULL;
18955+ if (!IS_ROOT(dentry)) {
18956+ au_fset_pin(a->pin_flags, DI_LOCKED);
18957+ parent = dget_parent(dentry);
18958+ di_write_lock_parent(parent);
18959+ }
18960+
18961+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
18962+ if (unlikely(err))
18963+ goto out_parent;
18964+
18965+ a->h_path.dentry = au_h_dptr(dentry, bstart);
4a4d8108 18966+ sz = -1;
5527c038 18967+ a->h_inode = d_inode(a->h_path.dentry);
c1595e42
JR
18968+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
18969+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
18970+ if (ia->ia_size < i_size_read(a->h_inode))
18971+ sz = ia->ia_size;
18972+ mutex_unlock(&a->h_inode->i_mutex);
18973+ }
4a4d8108 18974+
4a4d8108 18975+ hi_wh = NULL;
027c5e7a 18976+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
18977+ hi_wh = au_hi_wh(inode, a->btgt);
18978+ if (!hi_wh) {
c2b27bf2
AM
18979+ struct au_cp_generic cpg = {
18980+ .dentry = dentry,
18981+ .bdst = a->btgt,
18982+ .bsrc = -1,
18983+ .len = sz,
18984+ .pin = &a->pin
18985+ };
18986+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
4a4d8108
AM
18987+ if (unlikely(err))
18988+ goto out_unlock;
18989+ hi_wh = au_hi_wh(inode, a->btgt);
18990+ /* todo: revalidate hi_wh? */
18991+ }
18992+ }
18993+
18994+ if (parent) {
18995+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
18996+ di_downgrade_lock(parent, AuLock_IR);
18997+ dput(parent);
18998+ parent = NULL;
18999+ }
19000+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19001+ goto out; /* success */
19002+
19003+ if (!d_unhashed(dentry)) {
c2b27bf2
AM
19004+ struct au_cp_generic cpg = {
19005+ .dentry = dentry,
19006+ .bdst = a->btgt,
19007+ .bsrc = bstart,
19008+ .len = sz,
19009+ .pin = &a->pin,
19010+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19011+ };
19012+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19013+ if (!err)
19014+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19015+ } else if (!hi_wh)
19016+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19017+ else
19018+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 19019+
4f0767ce 19020+out_unlock:
5527c038 19021+ a->h_inode = d_inode(a->h_path.dentry);
86dc4139 19022+ if (!err)
dece6358 19023+ goto out; /* success */
4a4d8108 19024+ au_unpin(&a->pin);
4f0767ce 19025+out_parent:
4a4d8108
AM
19026+ if (parent) {
19027+ di_write_unlock(parent);
19028+ dput(parent);
19029+ }
4f0767ce 19030+out:
86dc4139
AM
19031+ if (!err)
19032+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
1facf9fc 19033+ return err;
19034+}
19035+
4a4d8108 19036+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 19037+{
4a4d8108 19038+ int err;
523b37e3 19039+ struct inode *inode, *delegated;
4a4d8108
AM
19040+ struct super_block *sb;
19041+ struct file *file;
19042+ struct au_icpup_args *a;
1facf9fc 19043+
5527c038 19044+ inode = d_inode(dentry);
4a4d8108 19045+ IMustLock(inode);
dece6358 19046+
4a4d8108
AM
19047+ err = -ENOMEM;
19048+ a = kzalloc(sizeof(*a), GFP_NOFS);
19049+ if (unlikely(!a))
19050+ goto out;
1facf9fc 19051+
4a4d8108
AM
19052+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19053+ ia->ia_valid &= ~ATTR_MODE;
dece6358 19054+
4a4d8108
AM
19055+ file = NULL;
19056+ sb = dentry->d_sb;
e49829fe
JR
19057+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19058+ if (unlikely(err))
19059+ goto out_kfree;
19060+
4a4d8108
AM
19061+ if (ia->ia_valid & ATTR_FILE) {
19062+ /* currently ftruncate(2) only */
7e9cd9fe 19063+ AuDebugOn(!d_is_reg(dentry));
4a4d8108
AM
19064+ file = ia->ia_file;
19065+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19066+ if (unlikely(err))
19067+ goto out_si;
19068+ ia->ia_file = au_hf_top(file);
19069+ a->udba = AuOpt_UDBA_NONE;
19070+ } else {
19071+ /* fchmod() doesn't pass ia_file */
19072+ a->udba = au_opt_udba(sb);
027c5e7a
AM
19073+ di_write_lock_child(dentry);
19074+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
19075+ if (d_unhashed(dentry))
19076+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
19077+ if (a->udba != AuOpt_UDBA_NONE) {
19078+ AuDebugOn(IS_ROOT(dentry));
19079+ err = au_reval_for_attr(dentry, au_sigen(sb));
19080+ if (unlikely(err))
19081+ goto out_dentry;
19082+ }
dece6358 19083+ }
dece6358 19084+
4a4d8108
AM
19085+ err = au_pin_and_icpup(dentry, ia, a);
19086+ if (unlikely(err < 0))
19087+ goto out_dentry;
19088+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19089+ ia->ia_file = NULL;
19090+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 19091+ }
dece6358 19092+
4a4d8108
AM
19093+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19094+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19095+ == (ATTR_MODE | ATTR_CTIME)) {
7eafdf33 19096+ err = security_path_chmod(&a->h_path, ia->ia_mode);
4a4d8108
AM
19097+ if (unlikely(err))
19098+ goto out_unlock;
19099+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19100+ && (ia->ia_valid & ATTR_CTIME)) {
86dc4139 19101+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
4a4d8108
AM
19102+ if (unlikely(err))
19103+ goto out_unlock;
19104+ }
dece6358 19105+
4a4d8108
AM
19106+ if (ia->ia_valid & ATTR_SIZE) {
19107+ struct file *f;
1308ab2a 19108+
953406b4 19109+ if (ia->ia_size < i_size_read(inode))
4a4d8108 19110+ /* unmap only */
953406b4 19111+ truncate_setsize(inode, ia->ia_size);
1308ab2a 19112+
4a4d8108
AM
19113+ f = NULL;
19114+ if (ia->ia_valid & ATTR_FILE)
19115+ f = ia->ia_file;
19116+ mutex_unlock(&a->h_inode->i_mutex);
19117+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
19118+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
523b37e3
AM
19119+ } else {
19120+ delegated = NULL;
19121+ while (1) {
19122+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19123+ if (delegated) {
19124+ err = break_deleg_wait(&delegated);
19125+ if (!err)
19126+ continue;
19127+ }
19128+ break;
19129+ }
19130+ }
4a4d8108
AM
19131+ if (!err)
19132+ au_cpup_attr_changeable(inode);
1308ab2a 19133+
4f0767ce 19134+out_unlock:
4a4d8108
AM
19135+ mutex_unlock(&a->h_inode->i_mutex);
19136+ au_unpin(&a->pin);
027c5e7a
AM
19137+ if (unlikely(err))
19138+ au_update_dbstart(dentry);
4f0767ce 19139+out_dentry:
4a4d8108
AM
19140+ di_write_unlock(dentry);
19141+ if (file) {
19142+ fi_write_unlock(file);
19143+ ia->ia_file = file;
19144+ ia->ia_valid |= ATTR_FILE;
19145+ }
4f0767ce 19146+out_si:
4a4d8108 19147+ si_read_unlock(sb);
e49829fe 19148+out_kfree:
4a4d8108 19149+ kfree(a);
4f0767ce 19150+out:
4a4d8108
AM
19151+ AuTraceErr(err);
19152+ return err;
1facf9fc 19153+}
19154+
c1595e42
JR
19155+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19156+static int au_h_path_to_set_attr(struct dentry *dentry,
19157+ struct au_icpup_args *a, struct path *h_path)
19158+{
19159+ int err;
19160+ struct super_block *sb;
19161+
19162+ sb = dentry->d_sb;
19163+ a->udba = au_opt_udba(sb);
19164+ /* no d_unlinked(), to set UDBA_NONE for root */
19165+ if (d_unhashed(dentry))
19166+ a->udba = AuOpt_UDBA_NONE;
19167+ if (a->udba != AuOpt_UDBA_NONE) {
19168+ AuDebugOn(IS_ROOT(dentry));
19169+ err = au_reval_for_attr(dentry, au_sigen(sb));
19170+ if (unlikely(err))
19171+ goto out;
19172+ }
19173+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19174+ if (unlikely(err < 0))
19175+ goto out;
19176+
19177+ h_path->dentry = a->h_path.dentry;
19178+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19179+
19180+out:
19181+ return err;
19182+}
19183+
19184+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg)
19185+{
19186+ int err;
19187+ struct path h_path;
19188+ struct super_block *sb;
19189+ struct au_icpup_args *a;
19190+ struct inode *inode, *h_inode;
19191+
5527c038 19192+ inode = d_inode(dentry);
c1595e42
JR
19193+ IMustLock(inode);
19194+
19195+ err = -ENOMEM;
19196+ a = kzalloc(sizeof(*a), GFP_NOFS);
19197+ if (unlikely(!a))
19198+ goto out;
19199+
19200+ sb = dentry->d_sb;
19201+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19202+ if (unlikely(err))
19203+ goto out_kfree;
19204+
19205+ h_path.dentry = NULL; /* silence gcc */
19206+ di_write_lock_child(dentry);
19207+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19208+ if (unlikely(err))
19209+ goto out_di;
19210+
19211+ mutex_unlock(&a->h_inode->i_mutex);
19212+ switch (arg->type) {
19213+ case AU_XATTR_SET:
19214+ err = vfsub_setxattr(h_path.dentry,
19215+ arg->u.set.name, arg->u.set.value,
19216+ arg->u.set.size, arg->u.set.flags);
19217+ break;
19218+ case AU_XATTR_REMOVE:
19219+ err = vfsub_removexattr(h_path.dentry, arg->u.remove.name);
19220+ break;
19221+ case AU_ACL_SET:
19222+ err = -EOPNOTSUPP;
5527c038 19223+ h_inode = d_inode(h_path.dentry);
c1595e42
JR
19224+ if (h_inode->i_op->set_acl)
19225+ err = h_inode->i_op->set_acl(h_inode,
19226+ arg->u.acl_set.acl,
19227+ arg->u.acl_set.type);
19228+ break;
19229+ }
19230+ if (!err)
19231+ au_cpup_attr_timesizes(inode);
19232+
19233+ au_unpin(&a->pin);
19234+ if (unlikely(err))
19235+ au_update_dbstart(dentry);
19236+
19237+out_di:
19238+ di_write_unlock(dentry);
19239+ si_read_unlock(sb);
19240+out_kfree:
19241+ kfree(a);
19242+out:
19243+ AuTraceErr(err);
19244+ return err;
19245+}
19246+#endif
19247+
4a4d8108
AM
19248+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19249+ unsigned int nlink)
1facf9fc 19250+{
9dbd164d
AM
19251+ unsigned int n;
19252+
4a4d8108 19253+ inode->i_mode = st->mode;
86dc4139
AM
19254+ /* don't i_[ug]id_write() here */
19255+ inode->i_uid = st->uid;
19256+ inode->i_gid = st->gid;
4a4d8108
AM
19257+ inode->i_atime = st->atime;
19258+ inode->i_mtime = st->mtime;
19259+ inode->i_ctime = st->ctime;
1facf9fc 19260+
4a4d8108
AM
19261+ au_cpup_attr_nlink(inode, /*force*/0);
19262+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
19263+ n = inode->i_nlink;
19264+ n -= nlink;
19265+ n += st->nlink;
f6b6e03d 19266+ smp_mb(); /* for i_nlink */
7eafdf33 19267+ /* 0 can happen */
92d182d2 19268+ set_nlink(inode, n);
4a4d8108 19269+ }
1facf9fc 19270+
4a4d8108
AM
19271+ spin_lock(&inode->i_lock);
19272+ inode->i_blocks = st->blocks;
19273+ i_size_write(inode, st->size);
19274+ spin_unlock(&inode->i_lock);
1facf9fc 19275+}
19276+
c1595e42
JR
19277+/*
19278+ * common routine for aufs_getattr() and aufs_getxattr().
19279+ * returns zero or negative (an error).
19280+ * @dentry will be read-locked in success.
19281+ */
19282+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
1facf9fc 19283+{
4a4d8108 19284+ int err;
076b876e 19285+ unsigned int mnt_flags, sigen;
c1595e42 19286+ unsigned char udba_none;
4a4d8108 19287+ aufs_bindex_t bindex;
4a4d8108
AM
19288+ struct super_block *sb, *h_sb;
19289+ struct inode *inode;
1facf9fc 19290+
c1595e42
JR
19291+ h_path->mnt = NULL;
19292+ h_path->dentry = NULL;
19293+
19294+ err = 0;
4a4d8108 19295+ sb = dentry->d_sb;
4a4d8108
AM
19296+ mnt_flags = au_mntflags(sb);
19297+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 19298+
4a4d8108 19299+ /* support fstat(2) */
027c5e7a 19300+ if (!d_unlinked(dentry) && !udba_none) {
076b876e 19301+ sigen = au_sigen(sb);
027c5e7a
AM
19302+ err = au_digen_test(dentry, sigen);
19303+ if (!err) {
4a4d8108 19304+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a 19305+ err = au_dbrange_test(dentry);
c1595e42
JR
19306+ if (unlikely(err)) {
19307+ di_read_unlock(dentry, AuLock_IR);
19308+ goto out;
19309+ }
027c5e7a 19310+ } else {
4a4d8108
AM
19311+ AuDebugOn(IS_ROOT(dentry));
19312+ di_write_lock_child(dentry);
027c5e7a
AM
19313+ err = au_dbrange_test(dentry);
19314+ if (!err)
19315+ err = au_reval_for_attr(dentry, sigen);
c1595e42
JR
19316+ if (!err)
19317+ di_downgrade_lock(dentry, AuLock_IR);
19318+ else {
19319+ di_write_unlock(dentry);
19320+ goto out;
19321+ }
4a4d8108
AM
19322+ }
19323+ } else
19324+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 19325+
5527c038 19326+ inode = d_inode(dentry);
4a4d8108 19327+ bindex = au_ibstart(inode);
c1595e42
JR
19328+ h_path->mnt = au_sbr_mnt(sb, bindex);
19329+ h_sb = h_path->mnt->mnt_sb;
19330+ if (!force
19331+ && !au_test_fs_bad_iattr(h_sb)
19332+ && udba_none)
19333+ goto out; /* success */
1facf9fc 19334+
4a4d8108 19335+ if (au_dbstart(dentry) == bindex)
c1595e42 19336+ h_path->dentry = au_h_dptr(dentry, bindex);
4a4d8108 19337+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
c1595e42
JR
19338+ h_path->dentry = au_plink_lkup(inode, bindex);
19339+ if (IS_ERR(h_path->dentry))
19340+ /* pretending success */
19341+ h_path->dentry = NULL;
19342+ else
19343+ dput(h_path->dentry);
4a4d8108 19344+ }
c1595e42
JR
19345+
19346+out:
19347+ return err;
19348+}
19349+
19350+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19351+ struct dentry *dentry, struct kstat *st)
19352+{
19353+ int err;
19354+ unsigned char positive;
19355+ struct path h_path;
19356+ struct inode *inode;
19357+ struct super_block *sb;
19358+
5527c038 19359+ inode = d_inode(dentry);
c1595e42
JR
19360+ sb = dentry->d_sb;
19361+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19362+ if (unlikely(err))
19363+ goto out;
19364+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19365+ if (unlikely(err))
19366+ goto out_si;
c06a8ce3 19367+ if (unlikely(!h_path.dentry))
c1595e42 19368+ /* illegally overlapped or something */
4a4d8108
AM
19369+ goto out_fill; /* pretending success */
19370+
5527c038 19371+ positive = d_is_positive(h_path.dentry);
4a4d8108 19372+ if (positive)
c06a8ce3 19373+ err = vfs_getattr(&h_path, st);
4a4d8108
AM
19374+ if (!err) {
19375+ if (positive)
c06a8ce3 19376+ au_refresh_iattr(inode, st,
5527c038 19377+ d_inode(h_path.dentry)->i_nlink);
4a4d8108 19378+ goto out_fill; /* success */
1facf9fc 19379+ }
7f207e10 19380+ AuTraceErr(err);
c1595e42 19381+ goto out_di;
4a4d8108 19382+
4f0767ce 19383+out_fill:
4a4d8108 19384+ generic_fillattr(inode, st);
c1595e42 19385+out_di:
4a4d8108 19386+ di_read_unlock(dentry, AuLock_IR);
c1595e42 19387+out_si:
4a4d8108 19388+ si_read_unlock(sb);
7f207e10
AM
19389+out:
19390+ AuTraceErr(err);
4a4d8108 19391+ return err;
1facf9fc 19392+}
19393+
19394+/* ---------------------------------------------------------------------- */
19395+
4a4d8108
AM
19396+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
19397+ int bufsiz)
1facf9fc 19398+{
19399+ int err;
4a4d8108
AM
19400+ struct super_block *sb;
19401+ struct dentry *h_dentry;
5527c038 19402+ struct inode *inode, *h_inode;
1facf9fc 19403+
4a4d8108
AM
19404+ err = -EINVAL;
19405+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
19406+ h_inode = d_inode(h_dentry);
19407+ if (unlikely(!h_inode->i_op->readlink))
4a4d8108 19408+ goto out;
1facf9fc 19409+
4a4d8108
AM
19410+ err = security_inode_readlink(h_dentry);
19411+ if (unlikely(err))
dece6358 19412+ goto out;
1facf9fc 19413+
4a4d8108 19414+ sb = dentry->d_sb;
5527c038
JR
19415+ inode = d_inode(dentry);
19416+ if (!au_test_ro(sb, bindex, inode)) {
4a4d8108 19417+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
5527c038 19418+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 19419+ }
5527c038 19420+ err = h_inode->i_op->readlink(h_dentry, buf, bufsiz);
1facf9fc 19421+
4f0767ce 19422+out:
4a4d8108
AM
19423+ return err;
19424+}
1facf9fc 19425+
4a4d8108
AM
19426+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
19427+{
19428+ int err;
1facf9fc 19429+
027c5e7a
AM
19430+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19431+ if (unlikely(err))
19432+ goto out;
19433+ err = au_d_hashed_positive(dentry);
19434+ if (!err)
19435+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
4a4d8108 19436+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 19437+
027c5e7a 19438+out:
4a4d8108
AM
19439+ return err;
19440+}
1facf9fc 19441+
4a4d8108
AM
19442+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
19443+{
19444+ int err;
4a4d8108 19445+ mm_segment_t old_fs;
b752ccd1
AM
19446+ union {
19447+ char *k;
19448+ char __user *u;
19449+ } buf;
1facf9fc 19450+
4a4d8108 19451+ err = -ENOMEM;
537831f9 19452+ buf.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 19453+ if (unlikely(!buf.k))
4a4d8108 19454+ goto out;
1facf9fc 19455+
027c5e7a
AM
19456+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19457+ if (unlikely(err))
19458+ goto out_name;
19459+
19460+ err = au_d_hashed_positive(dentry);
19461+ if (!err) {
19462+ old_fs = get_fs();
19463+ set_fs(KERNEL_DS);
19464+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
19465+ set_fs(old_fs);
19466+ }
4a4d8108 19467+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 19468+
4a4d8108 19469+ if (err >= 0) {
b752ccd1 19470+ buf.k[err] = 0;
4a4d8108 19471+ /* will be freed by put_link */
b752ccd1 19472+ nd_set_link(nd, buf.k);
4a4d8108 19473+ return NULL; /* success */
1308ab2a 19474+ }
1facf9fc 19475+
027c5e7a 19476+out_name:
537831f9 19477+ free_page((unsigned long)buf.k);
4f0767ce 19478+out:
4a4d8108
AM
19479+ AuTraceErr(err);
19480+ return ERR_PTR(err);
19481+}
1facf9fc 19482+
4a4d8108
AM
19483+static void aufs_put_link(struct dentry *dentry __maybe_unused,
19484+ struct nameidata *nd, void *cookie __maybe_unused)
19485+{
537831f9
AM
19486+ char *p;
19487+
19488+ p = nd_get_link(nd);
19489+ if (!IS_ERR_OR_NULL(p))
19490+ free_page((unsigned long)p);
4a4d8108 19491+}
1facf9fc 19492+
4a4d8108 19493+/* ---------------------------------------------------------------------- */
1facf9fc 19494+
0c3ec466 19495+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
4a4d8108 19496+{
0c3ec466
AM
19497+ int err;
19498+ struct super_block *sb;
19499+ struct inode *h_inode;
19500+
19501+ sb = inode->i_sb;
19502+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19503+ lockdep_off();
19504+ si_read_lock(sb, AuLock_FLUSH);
19505+ ii_write_lock_child(inode);
19506+ lockdep_on();
19507+ h_inode = au_h_iptr(inode, au_ibstart(inode));
19508+ err = vfsub_update_time(h_inode, ts, flags);
19509+ lockdep_off();
38d290e6
JR
19510+ if (!err)
19511+ au_cpup_attr_timesizes(inode);
0c3ec466
AM
19512+ ii_write_unlock(inode);
19513+ si_read_unlock(sb);
19514+ lockdep_on();
38d290e6
JR
19515+
19516+ if (!err && (flags & S_VERSION))
19517+ inode_inc_iversion(inode);
19518+
0c3ec466 19519+ return err;
4a4d8108 19520+}
1facf9fc 19521+
4a4d8108 19522+/* ---------------------------------------------------------------------- */
1308ab2a 19523+
ab036dbd
AM
19524+/* no getattr version will be set by module.c:aufs_init() */
19525+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19526+ aufs_iop[] = {
19527+ [AuIop_SYMLINK] = {
19528+ .permission = aufs_permission,
c1595e42 19529+#ifdef CONFIG_FS_POSIX_ACL
ab036dbd
AM
19530+ .get_acl = aufs_get_acl,
19531+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
c1595e42
JR
19532+#endif
19533+
ab036dbd
AM
19534+ .setattr = aufs_setattr,
19535+ .getattr = aufs_getattr,
0c3ec466 19536+
c1595e42 19537+#ifdef CONFIG_AUFS_XATTR
ab036dbd
AM
19538+ .setxattr = aufs_setxattr,
19539+ .getxattr = aufs_getxattr,
19540+ .listxattr = aufs_listxattr,
19541+ .removexattr = aufs_removexattr,
c1595e42
JR
19542+#endif
19543+
ab036dbd
AM
19544+ .readlink = aufs_readlink,
19545+ .follow_link = aufs_follow_link,
19546+ .put_link = aufs_put_link,
4a4d8108 19547+
ab036dbd
AM
19548+ /* .update_time = aufs_update_time */
19549+ },
19550+ [AuIop_DIR] = {
19551+ .create = aufs_create,
19552+ .lookup = aufs_lookup,
19553+ .link = aufs_link,
19554+ .unlink = aufs_unlink,
19555+ .symlink = aufs_symlink,
19556+ .mkdir = aufs_mkdir,
19557+ .rmdir = aufs_rmdir,
19558+ .mknod = aufs_mknod,
19559+ .rename = aufs_rename,
19560+
19561+ .permission = aufs_permission,
c1595e42 19562+#ifdef CONFIG_FS_POSIX_ACL
ab036dbd
AM
19563+ .get_acl = aufs_get_acl,
19564+ .set_acl = aufs_set_acl,
c1595e42
JR
19565+#endif
19566+
ab036dbd
AM
19567+ .setattr = aufs_setattr,
19568+ .getattr = aufs_getattr,
0c3ec466 19569+
c1595e42 19570+#ifdef CONFIG_AUFS_XATTR
ab036dbd
AM
19571+ .setxattr = aufs_setxattr,
19572+ .getxattr = aufs_getxattr,
19573+ .listxattr = aufs_listxattr,
19574+ .removexattr = aufs_removexattr,
c1595e42
JR
19575+#endif
19576+
ab036dbd
AM
19577+ .update_time = aufs_update_time,
19578+ .atomic_open = aufs_atomic_open,
19579+ .tmpfile = aufs_tmpfile
19580+ },
19581+ [AuIop_OTHER] = {
19582+ .permission = aufs_permission,
c1595e42 19583+#ifdef CONFIG_FS_POSIX_ACL
ab036dbd
AM
19584+ .get_acl = aufs_get_acl,
19585+ .set_acl = aufs_set_acl,
c1595e42
JR
19586+#endif
19587+
ab036dbd
AM
19588+ .setattr = aufs_setattr,
19589+ .getattr = aufs_getattr,
0c3ec466 19590+
c1595e42 19591+#ifdef CONFIG_AUFS_XATTR
ab036dbd
AM
19592+ .setxattr = aufs_setxattr,
19593+ .getxattr = aufs_getxattr,
19594+ .listxattr = aufs_listxattr,
19595+ .removexattr = aufs_removexattr,
c1595e42
JR
19596+#endif
19597+
ab036dbd
AM
19598+ .update_time = aufs_update_time
19599+ }
4a4d8108 19600+};
7f207e10
AM
19601diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19602--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 19603+++ linux/fs/aufs/i_op_del.c 2015-09-24 10:47:58.254719746 +0200
5527c038 19604@@ -0,0 +1,510 @@
1facf9fc 19605+/*
2000de60 19606+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 19607+ *
19608+ * This program, aufs is free software; you can redistribute it and/or modify
19609+ * it under the terms of the GNU General Public License as published by
19610+ * the Free Software Foundation; either version 2 of the License, or
19611+ * (at your option) any later version.
dece6358
AM
19612+ *
19613+ * This program is distributed in the hope that it will be useful,
19614+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19615+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19616+ * GNU General Public License for more details.
19617+ *
19618+ * You should have received a copy of the GNU General Public License
523b37e3 19619+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 19620+ */
19621+
19622+/*
4a4d8108 19623+ * inode operations (del entry)
1308ab2a 19624+ */
dece6358 19625+
1308ab2a 19626+#include "aufs.h"
dece6358 19627+
4a4d8108
AM
19628+/*
19629+ * decide if a new whiteout for @dentry is necessary or not.
19630+ * when it is necessary, prepare the parent dir for the upper branch whose
19631+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19632+ * be done by caller.
19633+ * return value:
19634+ * 0: wh is unnecessary
19635+ * plus: wh is necessary
19636+ * minus: error
19637+ */
19638+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 19639+{
4a4d8108
AM
19640+ int need_wh, err;
19641+ aufs_bindex_t bstart;
19642+ struct super_block *sb;
dece6358 19643+
4a4d8108
AM
19644+ sb = dentry->d_sb;
19645+ bstart = au_dbstart(dentry);
19646+ if (*bcpup < 0) {
19647+ *bcpup = bstart;
5527c038 19648+ if (au_test_ro(sb, bstart, d_inode(dentry))) {
4a4d8108
AM
19649+ err = AuWbrCopyup(au_sbi(sb), dentry);
19650+ *bcpup = err;
19651+ if (unlikely(err < 0))
19652+ goto out;
19653+ }
19654+ } else
19655+ AuDebugOn(bstart < *bcpup
5527c038 19656+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
4a4d8108 19657+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 19658+
4a4d8108
AM
19659+ if (*bcpup != bstart) {
19660+ err = au_cpup_dirs(dentry, *bcpup);
19661+ if (unlikely(err))
19662+ goto out;
19663+ need_wh = 1;
19664+ } else {
027c5e7a 19665+ struct au_dinfo *dinfo, *tmp;
4a4d8108 19666+
027c5e7a
AM
19667+ need_wh = -ENOMEM;
19668+ dinfo = au_di(dentry);
19669+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19670+ if (tmp) {
19671+ au_di_cp(tmp, dinfo);
19672+ au_di_swap(tmp, dinfo);
19673+ /* returns the number of positive dentries */
537831f9 19674+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0);
027c5e7a
AM
19675+ au_di_swap(tmp, dinfo);
19676+ au_rw_write_unlock(&tmp->di_rwsem);
19677+ au_di_free(tmp);
4a4d8108
AM
19678+ }
19679+ }
19680+ AuDbg("need_wh %d\n", need_wh);
19681+ err = need_wh;
19682+
4f0767ce 19683+out:
4a4d8108 19684+ return err;
1facf9fc 19685+}
19686+
4a4d8108
AM
19687+/*
19688+ * simple tests for the del-entry operations.
19689+ * following the checks in vfs, plus the parent-child relationship.
19690+ */
19691+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19692+ struct dentry *h_parent, int isdir)
1facf9fc 19693+{
4a4d8108
AM
19694+ int err;
19695+ umode_t h_mode;
19696+ struct dentry *h_dentry, *h_latest;
1308ab2a 19697+ struct inode *h_inode;
1facf9fc 19698+
4a4d8108 19699+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 19700+ if (d_really_is_positive(dentry)) {
4a4d8108 19701+ err = -ENOENT;
5527c038
JR
19702+ if (unlikely(d_is_negative(h_dentry)))
19703+ goto out;
19704+ h_inode = d_inode(h_dentry);
19705+ if (unlikely(!h_inode->i_nlink))
4a4d8108 19706+ goto out;
1facf9fc 19707+
4a4d8108
AM
19708+ h_mode = h_inode->i_mode;
19709+ if (!isdir) {
19710+ err = -EISDIR;
19711+ if (unlikely(S_ISDIR(h_mode)))
19712+ goto out;
19713+ } else if (unlikely(!S_ISDIR(h_mode))) {
19714+ err = -ENOTDIR;
19715+ goto out;
19716+ }
19717+ } else {
19718+ /* rename(2) case */
19719+ err = -EIO;
5527c038 19720+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
19721+ goto out;
19722+ }
1facf9fc 19723+
4a4d8108
AM
19724+ err = -ENOENT;
19725+ /* expected parent dir is locked */
19726+ if (unlikely(h_parent != h_dentry->d_parent))
19727+ goto out;
19728+ err = 0;
19729+
19730+ /*
19731+ * rmdir a dir may break the consistency on some filesystem.
19732+ * let's try heavy test.
19733+ */
19734+ err = -EACCES;
076b876e 19735+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
5527c038 19736+ && au_test_h_perm(d_inode(h_parent),
076b876e 19737+ MAY_EXEC | MAY_WRITE)))
4a4d8108
AM
19738+ goto out;
19739+
076b876e 19740+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
4a4d8108
AM
19741+ err = -EIO;
19742+ if (IS_ERR(h_latest))
19743+ goto out;
19744+ if (h_latest == h_dentry)
19745+ err = 0;
19746+ dput(h_latest);
19747+
4f0767ce 19748+out:
4a4d8108 19749+ return err;
1308ab2a 19750+}
1facf9fc 19751+
4a4d8108
AM
19752+/*
19753+ * decide the branch where we operate for @dentry. the branch index will be set
19754+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
19755+ * dir for reverting.
19756+ * when a new whiteout is necessary, create it.
19757+ */
19758+static struct dentry*
19759+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
19760+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 19761+{
4a4d8108
AM
19762+ struct dentry *wh_dentry;
19763+ struct super_block *sb;
19764+ struct path h_path;
19765+ int err, need_wh;
19766+ unsigned int udba;
19767+ aufs_bindex_t bcpup;
dece6358 19768+
4a4d8108
AM
19769+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
19770+ wh_dentry = ERR_PTR(need_wh);
19771+ if (unlikely(need_wh < 0))
19772+ goto out;
19773+
19774+ sb = dentry->d_sb;
19775+ udba = au_opt_udba(sb);
19776+ bcpup = *rbcpup;
19777+ err = au_pin(pin, dentry, bcpup, udba,
19778+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19779+ wh_dentry = ERR_PTR(err);
19780+ if (unlikely(err))
19781+ goto out;
19782+
19783+ h_path.dentry = au_pinned_h_parent(pin);
19784+ if (udba != AuOpt_UDBA_NONE
19785+ && au_dbstart(dentry) == bcpup) {
19786+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
19787+ wh_dentry = ERR_PTR(err);
19788+ if (unlikely(err))
19789+ goto out_unpin;
19790+ }
19791+
19792+ h_path.mnt = au_sbr_mnt(sb, bcpup);
19793+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
19794+ wh_dentry = NULL;
19795+ if (!need_wh)
19796+ goto out; /* success, no need to create whiteout */
19797+
19798+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
19799+ if (IS_ERR(wh_dentry))
19800+ goto out_unpin;
19801+
19802+ /* returns with the parent is locked and wh_dentry is dget-ed */
19803+ goto out; /* success */
19804+
4f0767ce 19805+out_unpin:
4a4d8108 19806+ au_unpin(pin);
4f0767ce 19807+out:
4a4d8108 19808+ return wh_dentry;
1facf9fc 19809+}
19810+
4a4d8108
AM
19811+/*
19812+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
19813+ * in order to be revertible and save time for removing many child whiteouts
19814+ * under the dir.
19815+ * returns 1 when there are too many child whiteout and caller should remove
19816+ * them asynchronously. returns 0 when the number of children is enough small to
19817+ * remove now or the branch fs is a remote fs.
19818+ * otherwise return an error.
19819+ */
19820+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
19821+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 19822+{
4a4d8108
AM
19823+ int rmdir_later, err, dirwh;
19824+ struct dentry *h_dentry;
19825+ struct super_block *sb;
5527c038 19826+ struct inode *inode;
4a4d8108
AM
19827+
19828+ sb = dentry->d_sb;
19829+ SiMustAnyLock(sb);
19830+ h_dentry = au_h_dptr(dentry, bindex);
19831+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
19832+ if (unlikely(err))
19833+ goto out;
19834+
19835+ /* stop monitoring */
5527c038
JR
19836+ inode = d_inode(dentry);
19837+ au_hn_free(au_hi(inode, bindex));
4a4d8108
AM
19838+
19839+ if (!au_test_fs_remote(h_dentry->d_sb)) {
19840+ dirwh = au_sbi(sb)->si_dirwh;
19841+ rmdir_later = (dirwh <= 1);
19842+ if (!rmdir_later)
19843+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
19844+ dirwh);
19845+ if (rmdir_later)
19846+ return rmdir_later;
19847+ }
1facf9fc 19848+
4a4d8108
AM
19849+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
19850+ if (unlikely(err)) {
523b37e3
AM
19851+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
19852+ h_dentry, bindex, err);
4a4d8108
AM
19853+ err = 0;
19854+ }
dece6358 19855+
4f0767ce 19856+out:
4a4d8108
AM
19857+ AuTraceErr(err);
19858+ return err;
19859+}
1308ab2a 19860+
4a4d8108
AM
19861+/*
19862+ * final procedure for deleting a entry.
19863+ * maintain dentry and iattr.
19864+ */
19865+static void epilog(struct inode *dir, struct dentry *dentry,
19866+ aufs_bindex_t bindex)
19867+{
19868+ struct inode *inode;
1308ab2a 19869+
5527c038 19870+ inode = d_inode(dentry);
4a4d8108
AM
19871+ d_drop(dentry);
19872+ inode->i_ctime = dir->i_ctime;
1308ab2a 19873+
b912730e 19874+ au_dir_ts(dir, bindex);
4a4d8108 19875+ dir->i_version++;
1facf9fc 19876+}
19877+
4a4d8108
AM
19878+/*
19879+ * when an error happened, remove the created whiteout and revert everything.
19880+ */
7f207e10
AM
19881+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
19882+ aufs_bindex_t bwh, struct dentry *wh_dentry,
19883+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 19884+{
4a4d8108
AM
19885+ int rerr;
19886+ struct path h_path = {
19887+ .dentry = wh_dentry,
7f207e10 19888+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 19889+ };
dece6358 19890+
7f207e10 19891+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
19892+ if (!rerr) {
19893+ au_set_dbwh(dentry, bwh);
19894+ au_dtime_revert(dt);
19895+ return 0;
19896+ }
dece6358 19897+
523b37e3 19898+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
4a4d8108 19899+ return -EIO;
1facf9fc 19900+}
19901+
4a4d8108 19902+/* ---------------------------------------------------------------------- */
1facf9fc 19903+
4a4d8108 19904+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 19905+{
4a4d8108
AM
19906+ int err;
19907+ aufs_bindex_t bwh, bindex, bstart;
523b37e3 19908+ struct inode *inode, *h_dir, *delegated;
4a4d8108 19909+ struct dentry *parent, *wh_dentry;
c2b27bf2
AM
19910+ /* to reuduce stack size */
19911+ struct {
19912+ struct au_dtime dt;
19913+ struct au_pin pin;
19914+ struct path h_path;
19915+ } *a;
1facf9fc 19916+
4a4d8108 19917+ IMustLock(dir);
027c5e7a 19918+
c2b27bf2
AM
19919+ err = -ENOMEM;
19920+ a = kmalloc(sizeof(*a), GFP_NOFS);
19921+ if (unlikely(!a))
19922+ goto out;
19923+
027c5e7a
AM
19924+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
19925+ if (unlikely(err))
c2b27bf2 19926+ goto out_free;
027c5e7a
AM
19927+ err = au_d_hashed_positive(dentry);
19928+ if (unlikely(err))
19929+ goto out_unlock;
5527c038 19930+ inode = d_inode(dentry);
4a4d8108 19931+ IMustLock(inode);
027c5e7a 19932+ err = -EISDIR;
2000de60 19933+ if (unlikely(d_is_dir(dentry)))
027c5e7a 19934+ goto out_unlock; /* possible? */
1facf9fc 19935+
4a4d8108
AM
19936+ bstart = au_dbstart(dentry);
19937+ bwh = au_dbwh(dentry);
19938+ bindex = -1;
027c5e7a
AM
19939+ parent = dentry->d_parent; /* dir inode is locked */
19940+ di_write_lock_parent(parent);
c2b27bf2
AM
19941+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
19942+ &a->pin);
4a4d8108
AM
19943+ err = PTR_ERR(wh_dentry);
19944+ if (IS_ERR(wh_dentry))
027c5e7a 19945+ goto out_parent;
1facf9fc 19946+
c2b27bf2
AM
19947+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
19948+ a->h_path.dentry = au_h_dptr(dentry, bstart);
19949+ dget(a->h_path.dentry);
4a4d8108 19950+ if (bindex == bstart) {
c2b27bf2 19951+ h_dir = au_pinned_h_dir(&a->pin);
523b37e3
AM
19952+ delegated = NULL;
19953+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
19954+ if (unlikely(err == -EWOULDBLOCK)) {
19955+ pr_warn("cannot retry for NFSv4 delegation"
19956+ " for an internal unlink\n");
19957+ iput(delegated);
19958+ }
4a4d8108
AM
19959+ } else {
19960+ /* dir inode is locked */
5527c038 19961+ h_dir = d_inode(wh_dentry->d_parent);
4a4d8108
AM
19962+ IMustLock(h_dir);
19963+ err = 0;
19964+ }
dece6358 19965+
4a4d8108 19966+ if (!err) {
7f207e10 19967+ vfsub_drop_nlink(inode);
4a4d8108
AM
19968+ epilog(dir, dentry, bindex);
19969+
19970+ /* update target timestamps */
19971+ if (bindex == bstart) {
c2b27bf2
AM
19972+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
19973+ /*ignore*/
5527c038 19974+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
4a4d8108
AM
19975+ } else
19976+ /* todo: this timestamp may be reverted later */
19977+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 19978+ goto out_unpin; /* success */
1facf9fc 19979+ }
19980+
4a4d8108
AM
19981+ /* revert */
19982+ if (wh_dentry) {
19983+ int rerr;
19984+
c2b27bf2
AM
19985+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
19986+ &a->dt);
4a4d8108
AM
19987+ if (rerr)
19988+ err = rerr;
dece6358 19989+ }
1facf9fc 19990+
027c5e7a 19991+out_unpin:
c2b27bf2 19992+ au_unpin(&a->pin);
4a4d8108 19993+ dput(wh_dentry);
c2b27bf2 19994+ dput(a->h_path.dentry);
027c5e7a 19995+out_parent:
4a4d8108 19996+ di_write_unlock(parent);
027c5e7a 19997+out_unlock:
4a4d8108 19998+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
19999+out_free:
20000+ kfree(a);
027c5e7a 20001+out:
4a4d8108 20002+ return err;
dece6358
AM
20003+}
20004+
4a4d8108 20005+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 20006+{
4a4d8108
AM
20007+ int err, rmdir_later;
20008+ aufs_bindex_t bwh, bindex, bstart;
4a4d8108
AM
20009+ struct inode *inode;
20010+ struct dentry *parent, *wh_dentry, *h_dentry;
20011+ struct au_whtmp_rmdir *args;
c2b27bf2
AM
20012+ /* to reuduce stack size */
20013+ struct {
20014+ struct au_dtime dt;
20015+ struct au_pin pin;
20016+ } *a;
1facf9fc 20017+
4a4d8108 20018+ IMustLock(dir);
027c5e7a 20019+
c2b27bf2
AM
20020+ err = -ENOMEM;
20021+ a = kmalloc(sizeof(*a), GFP_NOFS);
20022+ if (unlikely(!a))
20023+ goto out;
20024+
027c5e7a
AM
20025+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20026+ if (unlikely(err))
c2b27bf2 20027+ goto out_free;
53392da6
AM
20028+ err = au_alive_dir(dentry);
20029+ if (unlikely(err))
027c5e7a 20030+ goto out_unlock;
5527c038 20031+ inode = d_inode(dentry);
4a4d8108 20032+ IMustLock(inode);
027c5e7a 20033+ err = -ENOTDIR;
2000de60 20034+ if (unlikely(!d_is_dir(dentry)))
027c5e7a 20035+ goto out_unlock; /* possible? */
dece6358 20036+
4a4d8108
AM
20037+ err = -ENOMEM;
20038+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20039+ if (unlikely(!args))
20040+ goto out_unlock;
dece6358 20041+
4a4d8108
AM
20042+ parent = dentry->d_parent; /* dir inode is locked */
20043+ di_write_lock_parent(parent);
20044+ err = au_test_empty(dentry, &args->whlist);
20045+ if (unlikely(err))
027c5e7a 20046+ goto out_parent;
1facf9fc 20047+
4a4d8108
AM
20048+ bstart = au_dbstart(dentry);
20049+ bwh = au_dbwh(dentry);
20050+ bindex = -1;
c2b27bf2
AM
20051+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20052+ &a->pin);
4a4d8108
AM
20053+ err = PTR_ERR(wh_dentry);
20054+ if (IS_ERR(wh_dentry))
027c5e7a 20055+ goto out_parent;
1facf9fc 20056+
4a4d8108
AM
20057+ h_dentry = au_h_dptr(dentry, bstart);
20058+ dget(h_dentry);
20059+ rmdir_later = 0;
20060+ if (bindex == bstart) {
20061+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
20062+ if (err > 0) {
20063+ rmdir_later = err;
20064+ err = 0;
20065+ }
20066+ } else {
20067+ /* stop monitoring */
20068+ au_hn_free(au_hi(inode, bstart));
20069+
20070+ /* dir inode is locked */
5527c038 20071+ IMustLock(d_inode(wh_dentry->d_parent));
1facf9fc 20072+ err = 0;
20073+ }
20074+
4a4d8108 20075+ if (!err) {
027c5e7a 20076+ vfsub_dead_dir(inode);
4a4d8108
AM
20077+ au_set_dbdiropq(dentry, -1);
20078+ epilog(dir, dentry, bindex);
1308ab2a 20079+
4a4d8108
AM
20080+ if (rmdir_later) {
20081+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
20082+ args = NULL;
20083+ }
1308ab2a 20084+
4a4d8108 20085+ goto out_unpin; /* success */
1facf9fc 20086+ }
20087+
4a4d8108
AM
20088+ /* revert */
20089+ AuLabel(revert);
20090+ if (wh_dentry) {
20091+ int rerr;
1308ab2a 20092+
c2b27bf2
AM
20093+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20094+ &a->dt);
4a4d8108
AM
20095+ if (rerr)
20096+ err = rerr;
1facf9fc 20097+ }
20098+
4f0767ce 20099+out_unpin:
c2b27bf2 20100+ au_unpin(&a->pin);
4a4d8108
AM
20101+ dput(wh_dentry);
20102+ dput(h_dentry);
027c5e7a 20103+out_parent:
4a4d8108
AM
20104+ di_write_unlock(parent);
20105+ if (args)
20106+ au_whtmp_rmdir_free(args);
4f0767ce 20107+out_unlock:
4a4d8108 20108+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20109+out_free:
20110+ kfree(a);
4f0767ce 20111+out:
4a4d8108
AM
20112+ AuTraceErr(err);
20113+ return err;
dece6358 20114+}
7f207e10
AM
20115diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20116--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
20117+++ linux/fs/aufs/i_op_ren.c 2015-12-10 17:59:16.836166410 +0100
20118@@ -0,0 +1,1015 @@
1facf9fc 20119+/*
2000de60 20120+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 20121+ *
20122+ * This program, aufs is free software; you can redistribute it and/or modify
20123+ * it under the terms of the GNU General Public License as published by
20124+ * the Free Software Foundation; either version 2 of the License, or
20125+ * (at your option) any later version.
dece6358
AM
20126+ *
20127+ * This program is distributed in the hope that it will be useful,
20128+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20129+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20130+ * GNU General Public License for more details.
20131+ *
20132+ * You should have received a copy of the GNU General Public License
523b37e3 20133+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 20134+ */
20135+
20136+/*
4a4d8108
AM
20137+ * inode operation (rename entry)
20138+ * todo: this is crazy monster
1facf9fc 20139+ */
20140+
20141+#include "aufs.h"
20142+
4a4d8108
AM
20143+enum { AuSRC, AuDST, AuSrcDst };
20144+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 20145+
4a4d8108
AM
20146+#define AuRen_ISDIR 1
20147+#define AuRen_ISSAMEDIR (1 << 1)
20148+#define AuRen_WHSRC (1 << 2)
20149+#define AuRen_WHDST (1 << 3)
20150+#define AuRen_MNT_WRITE (1 << 4)
20151+#define AuRen_DT_DSTDIR (1 << 5)
20152+#define AuRen_DIROPQ (1 << 6)
4a4d8108 20153+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
20154+#define au_fset_ren(flags, name) \
20155+ do { (flags) |= AuRen_##name; } while (0)
20156+#define au_fclr_ren(flags, name) \
20157+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 20158+
4a4d8108
AM
20159+struct au_ren_args {
20160+ struct {
20161+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20162+ *wh_dentry;
20163+ struct inode *dir, *inode;
20164+ struct au_hinode *hdir;
20165+ struct au_dtime dt[AuParentChild];
20166+ aufs_bindex_t bstart;
20167+ } sd[AuSrcDst];
1facf9fc 20168+
4a4d8108
AM
20169+#define src_dentry sd[AuSRC].dentry
20170+#define src_dir sd[AuSRC].dir
20171+#define src_inode sd[AuSRC].inode
20172+#define src_h_dentry sd[AuSRC].h_dentry
20173+#define src_parent sd[AuSRC].parent
20174+#define src_h_parent sd[AuSRC].h_parent
20175+#define src_wh_dentry sd[AuSRC].wh_dentry
20176+#define src_hdir sd[AuSRC].hdir
20177+#define src_h_dir sd[AuSRC].hdir->hi_inode
20178+#define src_dt sd[AuSRC].dt
20179+#define src_bstart sd[AuSRC].bstart
1facf9fc 20180+
4a4d8108
AM
20181+#define dst_dentry sd[AuDST].dentry
20182+#define dst_dir sd[AuDST].dir
20183+#define dst_inode sd[AuDST].inode
20184+#define dst_h_dentry sd[AuDST].h_dentry
20185+#define dst_parent sd[AuDST].parent
20186+#define dst_h_parent sd[AuDST].h_parent
20187+#define dst_wh_dentry sd[AuDST].wh_dentry
20188+#define dst_hdir sd[AuDST].hdir
20189+#define dst_h_dir sd[AuDST].hdir->hi_inode
20190+#define dst_dt sd[AuDST].dt
20191+#define dst_bstart sd[AuDST].bstart
20192+
20193+ struct dentry *h_trap;
20194+ struct au_branch *br;
20195+ struct au_hinode *src_hinode;
20196+ struct path h_path;
20197+ struct au_nhash whlist;
027c5e7a 20198+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 20199+
1308ab2a 20200+ unsigned int flags;
1facf9fc 20201+
4a4d8108
AM
20202+ struct au_whtmp_rmdir *thargs;
20203+ struct dentry *h_dst;
20204+};
1308ab2a 20205+
4a4d8108 20206+/* ---------------------------------------------------------------------- */
1308ab2a 20207+
4a4d8108
AM
20208+/*
20209+ * functions for reverting.
20210+ * when an error happened in a single rename systemcall, we should revert
ab036dbd 20211+ * everything as if nothing happened.
4a4d8108
AM
20212+ * we don't need to revert the copied-up/down the parent dir since they are
20213+ * harmless.
20214+ */
1facf9fc 20215+
4a4d8108
AM
20216+#define RevertFailure(fmt, ...) do { \
20217+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20218+ ##__VA_ARGS__, err, rerr); \
20219+ err = -EIO; \
20220+} while (0)
1facf9fc 20221+
4a4d8108 20222+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 20223+{
4a4d8108 20224+ int rerr;
1facf9fc 20225+
4a4d8108
AM
20226+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20227+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
20228+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 20229+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108 20230+ if (rerr)
523b37e3 20231+ RevertFailure("remove diropq %pd", a->src_dentry);
4a4d8108 20232+}
1facf9fc 20233+
4a4d8108
AM
20234+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20235+{
20236+ int rerr;
523b37e3 20237+ struct inode *delegated;
1facf9fc 20238+
b4510431
AM
20239+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20240+ a->src_h_parent);
4a4d8108
AM
20241+ rerr = PTR_ERR(a->h_path.dentry);
20242+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20243+ RevertFailure("lkup one %pd", a->src_dentry);
4a4d8108 20244+ return;
1facf9fc 20245+ }
20246+
523b37e3 20247+ delegated = NULL;
4a4d8108
AM
20248+ rerr = vfsub_rename(a->dst_h_dir,
20249+ au_h_dptr(a->src_dentry, a->btgt),
523b37e3
AM
20250+ a->src_h_dir, &a->h_path, &delegated);
20251+ if (unlikely(rerr == -EWOULDBLOCK)) {
20252+ pr_warn("cannot retry for NFSv4 delegation"
20253+ " for an internal rename\n");
20254+ iput(delegated);
20255+ }
4a4d8108
AM
20256+ d_drop(a->h_path.dentry);
20257+ dput(a->h_path.dentry);
20258+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20259+ if (rerr)
523b37e3 20260+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20261+}
20262+
4a4d8108 20263+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 20264+{
4a4d8108 20265+ int rerr;
523b37e3 20266+ struct inode *delegated;
dece6358 20267+
b4510431
AM
20268+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20269+ a->dst_h_parent);
4a4d8108
AM
20270+ rerr = PTR_ERR(a->h_path.dentry);
20271+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20272+ RevertFailure("lkup one %pd", a->dst_dentry);
4a4d8108
AM
20273+ return;
20274+ }
5527c038 20275+ if (d_is_positive(a->h_path.dentry)) {
4a4d8108
AM
20276+ d_drop(a->h_path.dentry);
20277+ dput(a->h_path.dentry);
20278+ return;
dece6358
AM
20279+ }
20280+
523b37e3
AM
20281+ delegated = NULL;
20282+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20283+ &delegated);
20284+ if (unlikely(rerr == -EWOULDBLOCK)) {
20285+ pr_warn("cannot retry for NFSv4 delegation"
20286+ " for an internal rename\n");
20287+ iput(delegated);
20288+ }
4a4d8108
AM
20289+ d_drop(a->h_path.dentry);
20290+ dput(a->h_path.dentry);
20291+ if (!rerr)
20292+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20293+ else
523b37e3 20294+ RevertFailure("rename %pd", a->h_dst);
4a4d8108 20295+}
1308ab2a 20296+
4a4d8108
AM
20297+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20298+{
20299+ int rerr;
1308ab2a 20300+
4a4d8108
AM
20301+ a->h_path.dentry = a->src_wh_dentry;
20302+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 20303+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108 20304+ if (rerr)
523b37e3 20305+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20306+}
4a4d8108 20307+#undef RevertFailure
1facf9fc 20308+
1308ab2a 20309+/* ---------------------------------------------------------------------- */
20310+
4a4d8108
AM
20311+/*
20312+ * when we have to copyup the renaming entry, do it with the rename-target name
20313+ * in order to minimize the cost (the later actual rename is unnecessary).
20314+ * otherwise rename it on the target branch.
20315+ */
20316+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20317+{
dece6358 20318+ int err;
4a4d8108 20319+ struct dentry *d;
523b37e3 20320+ struct inode *delegated;
1facf9fc 20321+
4a4d8108
AM
20322+ d = a->src_dentry;
20323+ if (au_dbstart(d) == a->btgt) {
20324+ a->h_path.dentry = a->dst_h_dentry;
20325+ if (au_ftest_ren(a->flags, DIROPQ)
20326+ && au_dbdiropq(d) == a->btgt)
20327+ au_fclr_ren(a->flags, DIROPQ);
20328+ AuDebugOn(au_dbstart(d) != a->btgt);
523b37e3 20329+ delegated = NULL;
4a4d8108 20330+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
523b37e3
AM
20331+ a->dst_h_dir, &a->h_path, &delegated);
20332+ if (unlikely(err == -EWOULDBLOCK)) {
20333+ pr_warn("cannot retry for NFSv4 delegation"
20334+ " for an internal rename\n");
20335+ iput(delegated);
20336+ }
c2b27bf2 20337+ } else
86dc4139 20338+ BUG();
1308ab2a 20339+
027c5e7a
AM
20340+ if (!err && a->h_dst)
20341+ /* it will be set to dinfo later */
20342+ dget(a->h_dst);
1facf9fc 20343+
dece6358
AM
20344+ return err;
20345+}
1facf9fc 20346+
4a4d8108
AM
20347+/* cf. aufs_rmdir() */
20348+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 20349+{
4a4d8108
AM
20350+ int err;
20351+ struct inode *dir;
1facf9fc 20352+
4a4d8108
AM
20353+ dir = a->dst_dir;
20354+ SiMustAnyLock(dir->i_sb);
20355+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20356+ au_sbi(dir->i_sb)->si_dirwh)
20357+ || au_test_fs_remote(a->h_dst->d_sb)) {
20358+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20359+ if (unlikely(err))
523b37e3
AM
20360+ pr_warn("failed removing whtmp dir %pd (%d), "
20361+ "ignored.\n", a->h_dst, err);
4a4d8108
AM
20362+ } else {
20363+ au_nhash_wh_free(&a->thargs->whlist);
20364+ a->thargs->whlist = a->whlist;
20365+ a->whlist.nh_num = 0;
20366+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20367+ dput(a->h_dst);
20368+ a->thargs = NULL;
20369+ }
20370+
20371+ return 0;
1308ab2a 20372+}
1facf9fc 20373+
4a4d8108
AM
20374+/* make it 'opaque' dir. */
20375+static int au_ren_diropq(struct au_ren_args *a)
20376+{
20377+ int err;
20378+ struct dentry *diropq;
1facf9fc 20379+
4a4d8108 20380+ err = 0;
027c5e7a 20381+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
20382+ a->src_hinode = au_hi(a->src_inode, a->btgt);
20383+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20384+ diropq = au_diropq_create(a->src_dentry, a->btgt);
20385+ au_hn_imtx_unlock(a->src_hinode);
20386+ if (IS_ERR(diropq))
20387+ err = PTR_ERR(diropq);
076b876e
AM
20388+ else
20389+ dput(diropq);
1facf9fc 20390+
4a4d8108
AM
20391+ return err;
20392+}
1facf9fc 20393+
4a4d8108
AM
20394+static int do_rename(struct au_ren_args *a)
20395+{
20396+ int err;
20397+ struct dentry *d, *h_d;
1facf9fc 20398+
4a4d8108
AM
20399+ /* prepare workqueue args for asynchronous rmdir */
20400+ h_d = a->dst_h_dentry;
5527c038 20401+ if (au_ftest_ren(a->flags, ISDIR) && d_is_positive(h_d)) {
4a4d8108
AM
20402+ err = -ENOMEM;
20403+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
20404+ if (unlikely(!a->thargs))
20405+ goto out;
20406+ a->h_dst = dget(h_d);
20407+ }
1facf9fc 20408+
4a4d8108
AM
20409+ /* create whiteout for src_dentry */
20410+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
20411+ a->src_bwh = au_dbwh(a->src_dentry);
20412+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
20413+ a->src_wh_dentry
20414+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
20415+ err = PTR_ERR(a->src_wh_dentry);
20416+ if (IS_ERR(a->src_wh_dentry))
20417+ goto out_thargs;
20418+ }
1facf9fc 20419+
4a4d8108
AM
20420+ /* lookup whiteout for dentry */
20421+ if (au_ftest_ren(a->flags, WHDST)) {
20422+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
20423+ a->br);
20424+ err = PTR_ERR(h_d);
20425+ if (IS_ERR(h_d))
20426+ goto out_whsrc;
5527c038 20427+ if (d_is_negative(h_d))
4a4d8108
AM
20428+ dput(h_d);
20429+ else
20430+ a->dst_wh_dentry = h_d;
20431+ }
1facf9fc 20432+
4a4d8108
AM
20433+ /* rename dentry to tmpwh */
20434+ if (a->thargs) {
20435+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20436+ if (unlikely(err))
20437+ goto out_whdst;
dece6358 20438+
4a4d8108
AM
20439+ d = a->dst_dentry;
20440+ au_set_h_dptr(d, a->btgt, NULL);
86dc4139 20441+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
4a4d8108
AM
20442+ if (unlikely(err))
20443+ goto out_whtmp;
20444+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20445+ }
1facf9fc 20446+
5527c038 20447+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_bstart != a->btgt);
1facf9fc 20448+
4a4d8108
AM
20449+ /* rename by vfs_rename or cpup */
20450+ d = a->dst_dentry;
20451+ if (au_ftest_ren(a->flags, ISDIR)
20452+ && (a->dst_wh_dentry
20453+ || au_dbdiropq(d) == a->btgt
20454+ /* hide the lower to keep xino */
20455+ || a->btgt < au_dbend(d)
20456+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
20457+ au_fset_ren(a->flags, DIROPQ);
20458+ err = au_ren_or_cpup(a);
20459+ if (unlikely(err))
20460+ /* leave the copied-up one */
20461+ goto out_whtmp;
1308ab2a 20462+
4a4d8108
AM
20463+ /* make dir opaque */
20464+ if (au_ftest_ren(a->flags, DIROPQ)) {
20465+ err = au_ren_diropq(a);
20466+ if (unlikely(err))
20467+ goto out_rename;
20468+ }
1308ab2a 20469+
4a4d8108
AM
20470+ /* update target timestamps */
20471+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
20472+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20473+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
5527c038 20474+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
1facf9fc 20475+
4a4d8108
AM
20476+ /* remove whiteout for dentry */
20477+ if (a->dst_wh_dentry) {
20478+ a->h_path.dentry = a->dst_wh_dentry;
20479+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20480+ a->dst_dentry);
20481+ if (unlikely(err))
20482+ goto out_diropq;
20483+ }
1facf9fc 20484+
4a4d8108
AM
20485+ /* remove whtmp */
20486+ if (a->thargs)
20487+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 20488+
076b876e 20489+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
4a4d8108
AM
20490+ err = 0;
20491+ goto out_success;
20492+
4f0767ce 20493+out_diropq:
4a4d8108
AM
20494+ if (au_ftest_ren(a->flags, DIROPQ))
20495+ au_ren_rev_diropq(err, a);
4f0767ce 20496+out_rename:
7e9cd9fe 20497+ au_ren_rev_rename(err, a);
027c5e7a 20498+ dput(a->h_dst);
4f0767ce 20499+out_whtmp:
4a4d8108
AM
20500+ if (a->thargs)
20501+ au_ren_rev_whtmp(err, a);
4f0767ce 20502+out_whdst:
4a4d8108
AM
20503+ dput(a->dst_wh_dentry);
20504+ a->dst_wh_dentry = NULL;
4f0767ce 20505+out_whsrc:
4a4d8108
AM
20506+ if (a->src_wh_dentry)
20507+ au_ren_rev_whsrc(err, a);
4f0767ce 20508+out_success:
4a4d8108
AM
20509+ dput(a->src_wh_dentry);
20510+ dput(a->dst_wh_dentry);
4f0767ce 20511+out_thargs:
4a4d8108
AM
20512+ if (a->thargs) {
20513+ dput(a->h_dst);
20514+ au_whtmp_rmdir_free(a->thargs);
20515+ a->thargs = NULL;
20516+ }
4f0767ce 20517+out:
4a4d8108 20518+ return err;
dece6358 20519+}
1facf9fc 20520+
1308ab2a 20521+/* ---------------------------------------------------------------------- */
1facf9fc 20522+
4a4d8108
AM
20523+/*
20524+ * test if @dentry dir can be rename destination or not.
20525+ * success means, it is a logically empty dir.
20526+ */
20527+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 20528+{
4a4d8108 20529+ return au_test_empty(dentry, whlist);
1308ab2a 20530+}
1facf9fc 20531+
4a4d8108
AM
20532+/*
20533+ * test if @dentry dir can be rename source or not.
20534+ * if it can, return 0 and @children is filled.
20535+ * success means,
20536+ * - it is a logically empty dir.
20537+ * - or, it exists on writable branch and has no children including whiteouts
20538+ * on the lower branch.
20539+ */
20540+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20541+{
20542+ int err;
20543+ unsigned int rdhash;
20544+ aufs_bindex_t bstart;
1facf9fc 20545+
4a4d8108
AM
20546+ bstart = au_dbstart(dentry);
20547+ if (bstart != btgt) {
20548+ struct au_nhash whlist;
dece6358 20549+
4a4d8108
AM
20550+ SiMustAnyLock(dentry->d_sb);
20551+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20552+ if (!rdhash)
20553+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20554+ dentry));
20555+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20556+ if (unlikely(err))
20557+ goto out;
20558+ err = au_test_empty(dentry, &whlist);
20559+ au_nhash_wh_free(&whlist);
20560+ goto out;
20561+ }
dece6358 20562+
4a4d8108
AM
20563+ if (bstart == au_dbtaildir(dentry))
20564+ return 0; /* success */
dece6358 20565+
4a4d8108 20566+ err = au_test_empty_lower(dentry);
1facf9fc 20567+
4f0767ce 20568+out:
4a4d8108
AM
20569+ if (err == -ENOTEMPTY) {
20570+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20571+ " is not supported\n");
20572+ err = -EXDEV;
20573+ }
20574+ return err;
20575+}
1308ab2a 20576+
4a4d8108
AM
20577+/* side effect: sets whlist and h_dentry */
20578+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 20579+{
4a4d8108
AM
20580+ int err;
20581+ unsigned int rdhash;
20582+ struct dentry *d;
1facf9fc 20583+
4a4d8108
AM
20584+ d = a->dst_dentry;
20585+ SiMustAnyLock(d->d_sb);
1facf9fc 20586+
4a4d8108
AM
20587+ err = 0;
20588+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
20589+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20590+ if (!rdhash)
20591+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20592+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20593+ if (unlikely(err))
20594+ goto out;
1308ab2a 20595+
4a4d8108
AM
20596+ au_set_dbstart(d, a->dst_bstart);
20597+ err = may_rename_dstdir(d, &a->whlist);
20598+ au_set_dbstart(d, a->btgt);
20599+ }
20600+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
20601+ if (unlikely(err))
20602+ goto out;
20603+
20604+ d = a->src_dentry;
20605+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
20606+ if (au_ftest_ren(a->flags, ISDIR)) {
20607+ err = may_rename_srcdir(d, a->btgt);
20608+ if (unlikely(err)) {
20609+ au_nhash_wh_free(&a->whlist);
20610+ a->whlist.nh_num = 0;
20611+ }
20612+ }
4f0767ce 20613+out:
4a4d8108 20614+ return err;
1facf9fc 20615+}
20616+
4a4d8108 20617+/* ---------------------------------------------------------------------- */
1facf9fc 20618+
4a4d8108
AM
20619+/*
20620+ * simple tests for rename.
20621+ * following the checks in vfs, plus the parent-child relationship.
20622+ */
20623+static int au_may_ren(struct au_ren_args *a)
20624+{
20625+ int err, isdir;
20626+ struct inode *h_inode;
1facf9fc 20627+
4a4d8108
AM
20628+ if (a->src_bstart == a->btgt) {
20629+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20630+ au_ftest_ren(a->flags, ISDIR));
20631+ if (unlikely(err))
20632+ goto out;
20633+ err = -EINVAL;
20634+ if (unlikely(a->src_h_dentry == a->h_trap))
20635+ goto out;
20636+ }
1facf9fc 20637+
4a4d8108
AM
20638+ err = 0;
20639+ if (a->dst_bstart != a->btgt)
20640+ goto out;
1facf9fc 20641+
027c5e7a
AM
20642+ err = -ENOTEMPTY;
20643+ if (unlikely(a->dst_h_dentry == a->h_trap))
20644+ goto out;
20645+
4a4d8108 20646+ err = -EIO;
4a4d8108 20647+ isdir = !!au_ftest_ren(a->flags, ISDIR);
5527c038
JR
20648+ if (d_really_is_negative(a->dst_dentry)) {
20649+ if (d_is_negative(a->dst_h_dentry))
20650+ err = au_may_add(a->dst_dentry, a->btgt,
20651+ a->dst_h_parent, isdir);
4a4d8108 20652+ } else {
5527c038 20653+ if (unlikely(d_is_negative(a->dst_h_dentry)))
4a4d8108 20654+ goto out;
5527c038
JR
20655+ h_inode = d_inode(a->dst_h_dentry);
20656+ if (h_inode->i_nlink)
20657+ err = au_may_del(a->dst_dentry, a->btgt,
20658+ a->dst_h_parent, isdir);
4a4d8108 20659+ }
1facf9fc 20660+
4f0767ce 20661+out:
4a4d8108
AM
20662+ if (unlikely(err == -ENOENT || err == -EEXIST))
20663+ err = -EIO;
20664+ AuTraceErr(err);
20665+ return err;
20666+}
1facf9fc 20667+
1308ab2a 20668+/* ---------------------------------------------------------------------- */
1facf9fc 20669+
4a4d8108
AM
20670+/*
20671+ * locking order
20672+ * (VFS)
20673+ * - src_dir and dir by lock_rename()
20674+ * - inode if exitsts
20675+ * (aufs)
20676+ * - lock all
20677+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
20678+ * + si_read_lock
20679+ * + di_write_lock2_child()
20680+ * + di_write_lock_child()
20681+ * + ii_write_lock_child()
20682+ * + di_write_lock_child2()
20683+ * + ii_write_lock_child2()
20684+ * + src_parent and parent
20685+ * + di_write_lock_parent()
20686+ * + ii_write_lock_parent()
20687+ * + di_write_lock_parent2()
20688+ * + ii_write_lock_parent2()
20689+ * + lower src_dir and dir by vfsub_lock_rename()
20690+ * + verify the every relationships between child and parent. if any
20691+ * of them failed, unlock all and return -EBUSY.
20692+ */
20693+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 20694+{
4a4d8108
AM
20695+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
20696+ a->dst_h_parent, a->dst_hdir);
86dc4139
AM
20697+ if (au_ftest_ren(a->flags, MNT_WRITE))
20698+ vfsub_mnt_drop_write(au_br_mnt(a->br));
1308ab2a 20699+}
20700+
4a4d8108 20701+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 20702+{
4a4d8108
AM
20703+ int err;
20704+ unsigned int udba;
1308ab2a 20705+
4a4d8108
AM
20706+ err = 0;
20707+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
20708+ a->src_hdir = au_hi(a->src_dir, a->btgt);
20709+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
20710+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
86dc4139
AM
20711+
20712+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
20713+ if (unlikely(err))
20714+ goto out;
20715+ au_fset_ren(a->flags, MNT_WRITE);
4a4d8108
AM
20716+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
20717+ a->dst_h_parent, a->dst_hdir);
20718+ udba = au_opt_udba(a->src_dentry->d_sb);
5527c038
JR
20719+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
20720+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
4a4d8108
AM
20721+ err = au_busy_or_stale();
20722+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
20723+ err = au_h_verify(a->src_h_dentry, udba,
5527c038 20724+ d_inode(a->src_h_parent), a->src_h_parent,
4a4d8108
AM
20725+ a->br);
20726+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
20727+ err = au_h_verify(a->dst_h_dentry, udba,
5527c038 20728+ d_inode(a->dst_h_parent), a->dst_h_parent,
4a4d8108 20729+ a->br);
86dc4139 20730+ if (!err)
4a4d8108 20731+ goto out; /* success */
4a4d8108
AM
20732+
20733+ err = au_busy_or_stale();
4a4d8108 20734+ au_ren_unlock(a);
86dc4139 20735+
4f0767ce 20736+out:
4a4d8108 20737+ return err;
1facf9fc 20738+}
20739+
20740+/* ---------------------------------------------------------------------- */
20741+
4a4d8108 20742+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 20743+{
4a4d8108 20744+ struct inode *dir;
dece6358 20745+
4a4d8108
AM
20746+ dir = a->dst_dir;
20747+ dir->i_version++;
20748+ if (au_ftest_ren(a->flags, ISDIR)) {
20749+ /* is this updating defined in POSIX? */
20750+ au_cpup_attr_timesizes(a->src_inode);
20751+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 20752+ }
027c5e7a 20753+
b912730e 20754+ au_dir_ts(dir, a->btgt);
dece6358 20755+
4a4d8108
AM
20756+ if (au_ftest_ren(a->flags, ISSAMEDIR))
20757+ return;
dece6358 20758+
4a4d8108
AM
20759+ dir = a->src_dir;
20760+ dir->i_version++;
20761+ if (au_ftest_ren(a->flags, ISDIR))
20762+ au_cpup_attr_nlink(dir, /*force*/1);
b912730e 20763+ au_dir_ts(dir, a->btgt);
1facf9fc 20764+}
20765+
4a4d8108 20766+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 20767+{
4a4d8108
AM
20768+ aufs_bindex_t bend, bindex;
20769+ struct dentry *d, *h_d;
20770+ struct inode *i, *h_i;
20771+ struct super_block *sb;
dece6358 20772+
027c5e7a
AM
20773+ d = a->dst_dentry;
20774+ d_drop(d);
20775+ if (a->h_dst)
20776+ /* already dget-ed by au_ren_or_cpup() */
20777+ au_set_h_dptr(d, a->btgt, a->h_dst);
20778+
20779+ i = a->dst_inode;
20780+ if (i) {
20781+ if (!au_ftest_ren(a->flags, ISDIR))
20782+ vfsub_drop_nlink(i);
20783+ else {
20784+ vfsub_dead_dir(i);
20785+ au_cpup_attr_timesizes(i);
20786+ }
20787+ au_update_dbrange(d, /*do_put_zero*/1);
20788+ } else {
20789+ bend = a->btgt;
20790+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
20791+ au_set_h_dptr(d, bindex, NULL);
20792+ bend = au_dbend(d);
20793+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
20794+ au_set_h_dptr(d, bindex, NULL);
20795+ au_update_dbrange(d, /*do_put_zero*/0);
20796+ }
20797+
4a4d8108
AM
20798+ d = a->src_dentry;
20799+ au_set_dbwh(d, -1);
20800+ bend = au_dbend(d);
20801+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20802+ h_d = au_h_dptr(d, bindex);
20803+ if (h_d)
20804+ au_set_h_dptr(d, bindex, NULL);
20805+ }
20806+ au_set_dbend(d, a->btgt);
20807+
20808+ sb = d->d_sb;
20809+ i = a->src_inode;
20810+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
20811+ return; /* success */
20812+
20813+ bend = au_ibend(i);
20814+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20815+ h_i = au_h_iptr(i, bindex);
20816+ if (h_i) {
20817+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
20818+ /* ignore this error */
20819+ au_set_h_iptr(i, bindex, NULL, 0);
20820+ }
20821+ }
20822+ au_set_ibend(i, a->btgt);
1308ab2a 20823+}
dece6358 20824+
4a4d8108
AM
20825+/* ---------------------------------------------------------------------- */
20826+
20827+/* mainly for link(2) and rename(2) */
20828+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 20829+{
4a4d8108
AM
20830+ aufs_bindex_t bdiropq, bwh;
20831+ struct dentry *parent;
20832+ struct au_branch *br;
20833+
20834+ parent = dentry->d_parent;
5527c038 20835+ IMustLock(d_inode(parent)); /* dir is locked */
4a4d8108
AM
20836+
20837+ bdiropq = au_dbdiropq(parent);
20838+ bwh = au_dbwh(dentry);
20839+ br = au_sbr(dentry->d_sb, btgt);
20840+ if (au_br_rdonly(br)
20841+ || (0 <= bdiropq && bdiropq < btgt)
20842+ || (0 <= bwh && bwh < btgt))
20843+ btgt = -1;
20844+
20845+ AuDbg("btgt %d\n", btgt);
20846+ return btgt;
1facf9fc 20847+}
20848+
4a4d8108
AM
20849+/* sets src_bstart, dst_bstart and btgt */
20850+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 20851+{
4a4d8108
AM
20852+ int err;
20853+ struct au_wr_dir_args wr_dir_args = {
20854+ /* .force_btgt = -1, */
20855+ .flags = AuWrDir_ADD_ENTRY
20856+ };
dece6358 20857+
4a4d8108
AM
20858+ a->src_bstart = au_dbstart(a->src_dentry);
20859+ a->dst_bstart = au_dbstart(a->dst_dentry);
20860+ if (au_ftest_ren(a->flags, ISDIR))
20861+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
20862+ wr_dir_args.force_btgt = a->src_bstart;
20863+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
20864+ wr_dir_args.force_btgt = a->dst_bstart;
20865+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
20866+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
20867+ a->btgt = err;
dece6358 20868+
4a4d8108 20869+ return err;
1facf9fc 20870+}
20871+
4a4d8108 20872+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 20873+{
4a4d8108
AM
20874+ a->h_path.dentry = a->src_h_parent;
20875+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
20876+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
20877+ a->h_path.dentry = a->dst_h_parent;
20878+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
20879+ }
1facf9fc 20880+
4a4d8108
AM
20881+ au_fclr_ren(a->flags, DT_DSTDIR);
20882+ if (!au_ftest_ren(a->flags, ISDIR))
20883+ return;
dece6358 20884+
4a4d8108
AM
20885+ a->h_path.dentry = a->src_h_dentry;
20886+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
5527c038 20887+ if (d_is_positive(a->dst_h_dentry)) {
4a4d8108
AM
20888+ au_fset_ren(a->flags, DT_DSTDIR);
20889+ a->h_path.dentry = a->dst_h_dentry;
20890+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
20891+ }
1308ab2a 20892+}
dece6358 20893+
4a4d8108 20894+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 20895+{
4a4d8108
AM
20896+ struct dentry *h_d;
20897+ struct mutex *h_mtx;
20898+
20899+ au_dtime_revert(a->src_dt + AuPARENT);
20900+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
20901+ au_dtime_revert(a->dst_dt + AuPARENT);
20902+
20903+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
20904+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
5527c038 20905+ h_mtx = &d_inode(h_d)->i_mutex;
4a4d8108
AM
20906+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
20907+ au_dtime_revert(a->src_dt + AuCHILD);
20908+ mutex_unlock(h_mtx);
20909+
20910+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
20911+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
5527c038 20912+ h_mtx = &d_inode(h_d)->i_mutex;
4a4d8108
AM
20913+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
20914+ au_dtime_revert(a->dst_dt + AuCHILD);
20915+ mutex_unlock(h_mtx);
1facf9fc 20916+ }
20917+ }
20918+}
20919+
4a4d8108
AM
20920+/* ---------------------------------------------------------------------- */
20921+
20922+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
20923+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 20924+{
e49829fe 20925+ int err, flags;
4a4d8108
AM
20926+ /* reduce stack space */
20927+ struct au_ren_args *a;
20928+
523b37e3 20929+ AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry);
4a4d8108
AM
20930+ IMustLock(_src_dir);
20931+ IMustLock(_dst_dir);
20932+
20933+ err = -ENOMEM;
20934+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
20935+ a = kzalloc(sizeof(*a), GFP_NOFS);
20936+ if (unlikely(!a))
20937+ goto out;
20938+
20939+ a->src_dir = _src_dir;
20940+ a->src_dentry = _src_dentry;
5527c038
JR
20941+ a->src_inode = NULL;
20942+ if (d_really_is_positive(a->src_dentry))
20943+ a->src_inode = d_inode(a->src_dentry);
4a4d8108
AM
20944+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
20945+ a->dst_dir = _dst_dir;
20946+ a->dst_dentry = _dst_dentry;
5527c038
JR
20947+ a->dst_inode = NULL;
20948+ if (d_really_is_positive(a->dst_dentry))
20949+ a->dst_inode = d_inode(a->dst_dentry);
4a4d8108
AM
20950+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
20951+ if (a->dst_inode) {
20952+ IMustLock(a->dst_inode);
20953+ au_igrab(a->dst_inode);
1facf9fc 20954+ }
1facf9fc 20955+
4a4d8108 20956+ err = -ENOTDIR;
027c5e7a 20957+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
2000de60 20958+ if (d_is_dir(a->src_dentry)) {
4a4d8108 20959+ au_fset_ren(a->flags, ISDIR);
5527c038 20960+ if (unlikely(d_really_is_positive(a->dst_dentry)
2000de60 20961+ && !d_is_dir(a->dst_dentry)))
4a4d8108 20962+ goto out_free;
ab036dbd
AM
20963+ flags |= AuLock_DIRS;
20964+ }
20965+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, flags);
e49829fe
JR
20966+ if (unlikely(err))
20967+ goto out_free;
1facf9fc 20968+
027c5e7a
AM
20969+ err = au_d_hashed_positive(a->src_dentry);
20970+ if (unlikely(err))
20971+ goto out_unlock;
20972+ err = -ENOENT;
20973+ if (a->dst_inode) {
20974+ /*
20975+ * If it is a dir, VFS unhash dst_dentry before this
20976+ * function. It means we cannot rely upon d_unhashed().
20977+ */
20978+ if (unlikely(!a->dst_inode->i_nlink))
20979+ goto out_unlock;
20980+ if (!S_ISDIR(a->dst_inode->i_mode)) {
20981+ err = au_d_hashed_positive(a->dst_dentry);
20982+ if (unlikely(err))
20983+ goto out_unlock;
20984+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
20985+ goto out_unlock;
20986+ } else if (unlikely(d_unhashed(a->dst_dentry)))
20987+ goto out_unlock;
20988+
7eafdf33
AM
20989+ /*
20990+ * is it possible?
ab036dbd 20991+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
7eafdf33
AM
20992+ * there may exist a problem somewhere else.
20993+ */
20994+ err = -EINVAL;
5527c038 20995+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
7eafdf33
AM
20996+ goto out_unlock;
20997+
4a4d8108
AM
20998+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
20999+ di_write_lock_parent(a->dst_parent);
1facf9fc 21000+
4a4d8108
AM
21001+ /* which branch we process */
21002+ err = au_ren_wbr(a);
21003+ if (unlikely(err < 0))
027c5e7a 21004+ goto out_parent;
4a4d8108 21005+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
86dc4139 21006+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21007+
4a4d8108
AM
21008+ /* are they available to be renamed */
21009+ err = au_ren_may_dir(a);
21010+ if (unlikely(err))
21011+ goto out_children;
1facf9fc 21012+
4a4d8108
AM
21013+ /* prepare the writable parent dir on the same branch */
21014+ if (a->dst_bstart == a->btgt) {
21015+ au_fset_ren(a->flags, WHDST);
21016+ } else {
21017+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21018+ if (unlikely(err))
21019+ goto out_children;
21020+ }
1facf9fc 21021+
4a4d8108
AM
21022+ if (a->src_dir != a->dst_dir) {
21023+ /*
21024+ * this temporary unlock is safe,
21025+ * because both dir->i_mutex are locked.
21026+ */
21027+ di_write_unlock(a->dst_parent);
21028+ di_write_lock_parent(a->src_parent);
21029+ err = au_wr_dir_need_wh(a->src_dentry,
21030+ au_ftest_ren(a->flags, ISDIR),
21031+ &a->btgt);
21032+ di_write_unlock(a->src_parent);
21033+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
21034+ au_fclr_ren(a->flags, ISSAMEDIR);
21035+ } else
21036+ err = au_wr_dir_need_wh(a->src_dentry,
21037+ au_ftest_ren(a->flags, ISDIR),
21038+ &a->btgt);
21039+ if (unlikely(err < 0))
21040+ goto out_children;
21041+ if (err)
21042+ au_fset_ren(a->flags, WHSRC);
1facf9fc 21043+
86dc4139
AM
21044+ /* cpup src */
21045+ if (a->src_bstart != a->btgt) {
86dc4139
AM
21046+ struct au_pin pin;
21047+
21048+ err = au_pin(&pin, a->src_dentry, a->btgt,
21049+ au_opt_udba(a->src_dentry->d_sb),
21050+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 21051+ if (!err) {
c2b27bf2
AM
21052+ struct au_cp_generic cpg = {
21053+ .dentry = a->src_dentry,
21054+ .bdst = a->btgt,
21055+ .bsrc = a->src_bstart,
21056+ .len = -1,
21057+ .pin = &pin,
21058+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21059+ };
367653fa 21060+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
c2b27bf2 21061+ err = au_sio_cpup_simple(&cpg);
367653fa 21062+ au_unpin(&pin);
86dc4139 21063+ }
86dc4139
AM
21064+ if (unlikely(err))
21065+ goto out_children;
21066+ a->src_bstart = a->btgt;
21067+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21068+ au_fset_ren(a->flags, WHSRC);
21069+ }
21070+
4a4d8108
AM
21071+ /* lock them all */
21072+ err = au_ren_lock(a);
21073+ if (unlikely(err))
86dc4139 21074+ /* leave the copied-up one */
4a4d8108 21075+ goto out_children;
1facf9fc 21076+
4a4d8108
AM
21077+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21078+ err = au_may_ren(a);
21079+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21080+ err = -ENAMETOOLONG;
21081+ if (unlikely(err))
21082+ goto out_hdir;
1facf9fc 21083+
4a4d8108
AM
21084+ /* store timestamps to be revertible */
21085+ au_ren_dt(a);
1facf9fc 21086+
4a4d8108
AM
21087+ /* here we go */
21088+ err = do_rename(a);
21089+ if (unlikely(err))
21090+ goto out_dt;
21091+
21092+ /* update dir attributes */
21093+ au_ren_refresh_dir(a);
21094+
21095+ /* dput/iput all lower dentries */
21096+ au_ren_refresh(a);
21097+
21098+ goto out_hdir; /* success */
21099+
4f0767ce 21100+out_dt:
4a4d8108 21101+ au_ren_rev_dt(err, a);
4f0767ce 21102+out_hdir:
4a4d8108 21103+ au_ren_unlock(a);
4f0767ce 21104+out_children:
4a4d8108 21105+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
21106+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
21107+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
21108+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
21109+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 21110+ }
027c5e7a 21111+out_parent:
4a4d8108
AM
21112+ if (!err)
21113+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
21114+ else {
21115+ au_update_dbstart(a->dst_dentry);
21116+ if (!a->dst_inode)
21117+ d_drop(a->dst_dentry);
21118+ }
4a4d8108
AM
21119+ if (au_ftest_ren(a->flags, ISSAMEDIR))
21120+ di_write_unlock(a->dst_parent);
21121+ else
21122+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 21123+out_unlock:
4a4d8108 21124+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 21125+out_free:
4a4d8108
AM
21126+ iput(a->dst_inode);
21127+ if (a->thargs)
21128+ au_whtmp_rmdir_free(a->thargs);
21129+ kfree(a);
4f0767ce 21130+out:
4a4d8108
AM
21131+ AuTraceErr(err);
21132+ return err;
1308ab2a 21133+}
7f207e10
AM
21134diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21135--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
ab036dbd 21136+++ linux/fs/aufs/Kconfig 2015-09-24 10:47:58.248052907 +0200
c1595e42 21137@@ -0,0 +1,185 @@
4a4d8108
AM
21138+config AUFS_FS
21139+ tristate "Aufs (Advanced multi layered unification filesystem) support"
4a4d8108
AM
21140+ help
21141+ Aufs is a stackable unification filesystem such as Unionfs,
21142+ which unifies several directories and provides a merged single
21143+ directory.
21144+ In the early days, aufs was entirely re-designed and
21145+ re-implemented Unionfs Version 1.x series. Introducing many
21146+ original ideas, approaches and improvements, it becomes totally
21147+ different from Unionfs while keeping the basic features.
1facf9fc 21148+
4a4d8108
AM
21149+if AUFS_FS
21150+choice
21151+ prompt "Maximum number of branches"
21152+ default AUFS_BRANCH_MAX_127
21153+ help
21154+ Specifies the maximum number of branches (or member directories)
21155+ in a single aufs. The larger value consumes more system
21156+ resources and has a minor impact to performance.
21157+config AUFS_BRANCH_MAX_127
21158+ bool "127"
21159+ help
21160+ Specifies the maximum number of branches (or member directories)
21161+ in a single aufs. The larger value consumes more system
21162+ resources and has a minor impact to performance.
21163+config AUFS_BRANCH_MAX_511
21164+ bool "511"
21165+ help
21166+ Specifies the maximum number of branches (or member directories)
21167+ in a single aufs. The larger value consumes more system
21168+ resources and has a minor impact to performance.
21169+config AUFS_BRANCH_MAX_1023
21170+ bool "1023"
21171+ help
21172+ Specifies the maximum number of branches (or member directories)
21173+ in a single aufs. The larger value consumes more system
21174+ resources and has a minor impact to performance.
21175+config AUFS_BRANCH_MAX_32767
21176+ bool "32767"
21177+ help
21178+ Specifies the maximum number of branches (or member directories)
21179+ in a single aufs. The larger value consumes more system
21180+ resources and has a minor impact to performance.
21181+endchoice
1facf9fc 21182+
e49829fe
JR
21183+config AUFS_SBILIST
21184+ bool
21185+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21186+ default y
21187+ help
21188+ Automatic configuration for internal use.
21189+ When aufs supports Magic SysRq or /proc, enabled automatically.
21190+
4a4d8108
AM
21191+config AUFS_HNOTIFY
21192+ bool "Detect direct branch access (bypassing aufs)"
21193+ help
21194+ If you want to modify files on branches directly, eg. bypassing aufs,
21195+ and want aufs to detect the changes of them fully, then enable this
21196+ option and use 'udba=notify' mount option.
7f207e10 21197+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
21198+ It will have a negative impact to the performance.
21199+ See detail in aufs.5.
dece6358 21200+
4a4d8108
AM
21201+choice
21202+ prompt "method" if AUFS_HNOTIFY
21203+ default AUFS_HFSNOTIFY
21204+config AUFS_HFSNOTIFY
21205+ bool "fsnotify"
21206+ select FSNOTIFY
4a4d8108 21207+endchoice
1facf9fc 21208+
4a4d8108
AM
21209+config AUFS_EXPORT
21210+ bool "NFS-exportable aufs"
2cbb1c4b 21211+ depends on EXPORTFS
4a4d8108
AM
21212+ help
21213+ If you want to export your mounted aufs via NFS, then enable this
21214+ option. There are several requirements for this configuration.
21215+ See detail in aufs.5.
1facf9fc 21216+
4a4d8108
AM
21217+config AUFS_INO_T_64
21218+ bool
21219+ depends on AUFS_EXPORT
21220+ depends on 64BIT && !(ALPHA || S390)
21221+ default y
21222+ help
21223+ Automatic configuration for internal use.
21224+ /* typedef unsigned long/int __kernel_ino_t */
21225+ /* alpha and s390x are int */
1facf9fc 21226+
c1595e42
JR
21227+config AUFS_XATTR
21228+ bool "support for XATTR/EA (including Security Labels)"
21229+ help
21230+ If your branch fs supports XATTR/EA and you want to make them
21231+ available in aufs too, then enable this opsion and specify the
21232+ branch attributes for EA.
21233+ See detail in aufs.5.
21234+
076b876e
AM
21235+config AUFS_FHSM
21236+ bool "File-based Hierarchical Storage Management"
21237+ help
21238+ Hierarchical Storage Management (or HSM) is a well-known feature
21239+ in the storage world. Aufs provides this feature as file-based.
21240+ with multiple branches.
21241+ These multiple branches are prioritized, ie. the topmost one
21242+ should be the fastest drive and be used heavily.
21243+
4a4d8108
AM
21244+config AUFS_RDU
21245+ bool "Readdir in userspace"
21246+ help
21247+ Aufs has two methods to provide a merged view for a directory,
21248+ by a user-space library and by kernel-space natively. The latter
21249+ is always enabled but sometimes large and slow.
21250+ If you enable this option, install the library in aufs2-util
21251+ package, and set some environment variables for your readdir(3),
21252+ then the work will be handled in user-space which generally
21253+ shows better performance in most cases.
21254+ See detail in aufs.5.
1facf9fc 21255+
4a4d8108
AM
21256+config AUFS_SHWH
21257+ bool "Show whiteouts"
21258+ help
21259+ If you want to make the whiteouts in aufs visible, then enable
21260+ this option and specify 'shwh' mount option. Although it may
21261+ sounds like philosophy or something, but in technically it
21262+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 21263+
4a4d8108
AM
21264+config AUFS_BR_RAMFS
21265+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21266+ help
21267+ If you want to use ramfs as an aufs branch fs, then enable this
21268+ option. Generally tmpfs is recommended.
21269+ Aufs prohibited them to be a branch fs by default, because
21270+ initramfs becomes unusable after switch_root or something
21271+ generally. If you sets initramfs as an aufs branch and boot your
21272+ system by switch_root, you will meet a problem easily since the
21273+ files in initramfs may be inaccessible.
21274+ Unless you are going to use ramfs as an aufs branch fs without
21275+ switch_root or something, leave it N.
1facf9fc 21276+
4a4d8108
AM
21277+config AUFS_BR_FUSE
21278+ bool "Fuse fs as an aufs branch"
21279+ depends on FUSE_FS
21280+ select AUFS_POLL
21281+ help
21282+ If you want to use fuse-based userspace filesystem as an aufs
21283+ branch fs, then enable this option.
21284+ It implements the internal poll(2) operation which is
21285+ implemented by fuse only (curretnly).
1facf9fc 21286+
4a4d8108
AM
21287+config AUFS_POLL
21288+ bool
21289+ help
21290+ Automatic configuration for internal use.
1facf9fc 21291+
4a4d8108
AM
21292+config AUFS_BR_HFSPLUS
21293+ bool "Hfsplus as an aufs branch"
21294+ depends on HFSPLUS_FS
21295+ default y
21296+ help
21297+ If you want to use hfsplus fs as an aufs branch fs, then enable
21298+ this option. This option introduces a small overhead at
21299+ copying-up a file on hfsplus.
1facf9fc 21300+
4a4d8108
AM
21301+config AUFS_BDEV_LOOP
21302+ bool
21303+ depends on BLK_DEV_LOOP
21304+ default y
21305+ help
21306+ Automatic configuration for internal use.
21307+ Convert =[ym] into =y.
1308ab2a 21308+
4a4d8108
AM
21309+config AUFS_DEBUG
21310+ bool "Debug aufs"
21311+ help
21312+ Enable this to compile aufs internal debug code.
21313+ It will have a negative impact to the performance.
21314+
21315+config AUFS_MAGIC_SYSRQ
21316+ bool
21317+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21318+ default y
21319+ help
21320+ Automatic configuration for internal use.
21321+ When aufs supports Magic SysRq, enabled automatically.
21322+endif
7f207e10
AM
21323diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21324--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
21325+++ linux/fs/aufs/loop.c 2015-11-11 17:21:46.918863802 +0100
21326@@ -0,0 +1,146 @@
1facf9fc 21327+/*
2000de60 21328+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21329+ *
21330+ * This program, aufs is free software; you can redistribute it and/or modify
21331+ * it under the terms of the GNU General Public License as published by
21332+ * the Free Software Foundation; either version 2 of the License, or
21333+ * (at your option) any later version.
dece6358
AM
21334+ *
21335+ * This program is distributed in the hope that it will be useful,
21336+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21337+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21338+ * GNU General Public License for more details.
21339+ *
21340+ * You should have received a copy of the GNU General Public License
523b37e3 21341+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21342+ */
21343+
21344+/*
21345+ * support for loopback block device as a branch
21346+ */
21347+
1facf9fc 21348+#include "aufs.h"
21349+
392086de
AM
21350+/* added into drivers/block/loop.c */
21351+static struct file *(*backing_file_func)(struct super_block *sb);
21352+
1facf9fc 21353+/*
21354+ * test if two lower dentries have overlapping branches.
21355+ */
b752ccd1 21356+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 21357+{
b752ccd1 21358+ struct super_block *h_sb;
392086de
AM
21359+ struct file *backing_file;
21360+
21361+ if (unlikely(!backing_file_func)) {
21362+ /* don't load "loop" module here */
21363+ backing_file_func = symbol_get(loop_backing_file);
21364+ if (unlikely(!backing_file_func))
21365+ /* "loop" module is not loaded */
21366+ return 0;
21367+ }
1facf9fc 21368+
b752ccd1 21369+ h_sb = h_adding->d_sb;
392086de
AM
21370+ backing_file = backing_file_func(h_sb);
21371+ if (!backing_file)
1facf9fc 21372+ return 0;
21373+
2000de60 21374+ h_adding = backing_file->f_path.dentry;
b752ccd1
AM
21375+ /*
21376+ * h_adding can be local NFS.
21377+ * in this case aufs cannot detect the loop.
21378+ */
21379+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 21380+ return 1;
b752ccd1 21381+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 21382+}
21383+
21384+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21385+int au_test_loopback_kthread(void)
21386+{
b752ccd1
AM
21387+ int ret;
21388+ struct task_struct *tsk = current;
a2a7ad62 21389+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
21390+
21391+ ret = 0;
21392+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
21393+ get_task_comm(comm, tsk);
21394+ c = comm[4];
b752ccd1 21395+ ret = ('0' <= c && c <= '9'
a2a7ad62 21396+ && !strncmp(comm, "loop", 4));
b752ccd1 21397+ }
1facf9fc 21398+
b752ccd1 21399+ return ret;
1facf9fc 21400+}
87a755f4
AM
21401+
21402+/* ---------------------------------------------------------------------- */
21403+
21404+#define au_warn_loopback_step 16
21405+static int au_warn_loopback_nelem = au_warn_loopback_step;
21406+static unsigned long *au_warn_loopback_array;
21407+
21408+void au_warn_loopback(struct super_block *h_sb)
21409+{
21410+ int i, new_nelem;
21411+ unsigned long *a, magic;
21412+ static DEFINE_SPINLOCK(spin);
21413+
21414+ magic = h_sb->s_magic;
21415+ spin_lock(&spin);
21416+ a = au_warn_loopback_array;
21417+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21418+ if (a[i] == magic) {
21419+ spin_unlock(&spin);
21420+ return;
21421+ }
21422+
21423+ /* h_sb is new to us, print it */
21424+ if (i < au_warn_loopback_nelem) {
21425+ a[i] = magic;
21426+ goto pr;
21427+ }
21428+
21429+ /* expand the array */
21430+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21431+ a = au_kzrealloc(au_warn_loopback_array,
21432+ au_warn_loopback_nelem * sizeof(unsigned long),
21433+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
21434+ if (a) {
21435+ au_warn_loopback_nelem = new_nelem;
21436+ au_warn_loopback_array = a;
21437+ a[i] = magic;
21438+ goto pr;
21439+ }
21440+
21441+ spin_unlock(&spin);
21442+ AuWarn1("realloc failed, ignored\n");
21443+ return;
21444+
21445+pr:
21446+ spin_unlock(&spin);
0c3ec466
AM
21447+ pr_warn("you may want to try another patch for loopback file "
21448+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
21449+}
21450+
21451+int au_loopback_init(void)
21452+{
21453+ int err;
21454+ struct super_block *sb __maybe_unused;
21455+
ab036dbd 21456+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
21457+
21458+ err = 0;
21459+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21460+ sizeof(unsigned long), GFP_NOFS);
21461+ if (unlikely(!au_warn_loopback_array))
21462+ err = -ENOMEM;
21463+
21464+ return err;
21465+}
21466+
21467+void au_loopback_fin(void)
21468+{
ab036dbd
AM
21469+ if (backing_file_func)
21470+ symbol_put(loop_backing_file);
87a755f4
AM
21471+ kfree(au_warn_loopback_array);
21472+}
7f207e10
AM
21473diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21474--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 21475+++ linux/fs/aufs/loop.h 2015-09-24 10:47:58.254719746 +0200
523b37e3 21476@@ -0,0 +1,52 @@
1facf9fc 21477+/*
2000de60 21478+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21479+ *
21480+ * This program, aufs is free software; you can redistribute it and/or modify
21481+ * it under the terms of the GNU General Public License as published by
21482+ * the Free Software Foundation; either version 2 of the License, or
21483+ * (at your option) any later version.
dece6358
AM
21484+ *
21485+ * This program is distributed in the hope that it will be useful,
21486+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21487+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21488+ * GNU General Public License for more details.
21489+ *
21490+ * You should have received a copy of the GNU General Public License
523b37e3 21491+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21492+ */
21493+
21494+/*
21495+ * support for loopback mount as a branch
21496+ */
21497+
21498+#ifndef __AUFS_LOOP_H__
21499+#define __AUFS_LOOP_H__
21500+
21501+#ifdef __KERNEL__
21502+
dece6358
AM
21503+struct dentry;
21504+struct super_block;
1facf9fc 21505+
21506+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
21507+/* drivers/block/loop.c */
21508+struct file *loop_backing_file(struct super_block *sb);
21509+
1facf9fc 21510+/* loop.c */
b752ccd1 21511+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 21512+int au_test_loopback_kthread(void);
87a755f4
AM
21513+void au_warn_loopback(struct super_block *h_sb);
21514+
21515+int au_loopback_init(void);
21516+void au_loopback_fin(void);
1facf9fc 21517+#else
4a4d8108 21518+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 21519+ struct dentry *h_adding)
4a4d8108 21520+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
21521+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21522+
21523+AuStubInt0(au_loopback_init, void)
21524+AuStubVoid(au_loopback_fin, void)
1facf9fc 21525+#endif /* BLK_DEV_LOOP */
21526+
21527+#endif /* __KERNEL__ */
21528+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
21529diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21530--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
ab036dbd 21531+++ linux/fs/aufs/magic.mk 2015-09-24 10:47:58.254719746 +0200
7e9cd9fe 21532@@ -0,0 +1,30 @@
1facf9fc 21533+
21534+# defined in ${srctree}/fs/fuse/inode.c
21535+# tristate
21536+ifdef CONFIG_FUSE_FS
21537+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21538+endif
21539+
1facf9fc 21540+# defined in ${srctree}/fs/xfs/xfs_sb.h
21541+# tristate
21542+ifdef CONFIG_XFS_FS
21543+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21544+endif
21545+
21546+# defined in ${srctree}/fs/configfs/mount.c
21547+# tristate
21548+ifdef CONFIG_CONFIGFS_FS
21549+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21550+endif
21551+
1facf9fc 21552+# defined in ${srctree}/fs/ubifs/ubifs.h
21553+# tristate
21554+ifdef CONFIG_UBIFS_FS
21555+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21556+endif
4a4d8108
AM
21557+
21558+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
21559+# tristate
21560+ifdef CONFIG_HFSPLUS_FS
21561+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
21562+endif
7f207e10
AM
21563diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
21564--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
ab036dbd 21565+++ linux/fs/aufs/Makefile 2015-09-24 10:47:58.248052907 +0200
c1595e42 21566@@ -0,0 +1,44 @@
4a4d8108
AM
21567+
21568+include ${src}/magic.mk
21569+ifeq (${CONFIG_AUFS_FS},m)
21570+include ${src}/conf.mk
21571+endif
21572+-include ${src}/priv_def.mk
21573+
21574+# cf. include/linux/kernel.h
21575+# enable pr_debug
21576+ccflags-y += -DDEBUG
f6c5ef8b
AM
21577+# sparse requires the full pathname
21578+ifdef M
523b37e3 21579+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
f6c5ef8b 21580+else
523b37e3 21581+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
f6c5ef8b 21582+endif
4a4d8108
AM
21583+
21584+obj-$(CONFIG_AUFS_FS) += aufs.o
21585+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
21586+ wkq.o vfsub.o dcsub.o \
e49829fe 21587+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
21588+ dinfo.o dentry.o \
21589+ dynop.o \
21590+ finfo.o file.o f_op.o \
21591+ dir.o vdir.o \
21592+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
c2b27bf2 21593+ mvdown.o ioctl.o
4a4d8108
AM
21594+
21595+# all are boolean
e49829fe 21596+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
21597+aufs-$(CONFIG_SYSFS) += sysfs.o
21598+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
21599+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
21600+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
21601+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108 21602+aufs-$(CONFIG_AUFS_EXPORT) += export.o
c1595e42
JR
21603+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
21604+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
076b876e 21605+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
4a4d8108
AM
21606+aufs-$(CONFIG_AUFS_POLL) += poll.o
21607+aufs-$(CONFIG_AUFS_RDU) += rdu.o
4a4d8108
AM
21608+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
21609+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
21610+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
21611diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
21612--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
21613+++ linux/fs/aufs/module.c 2015-12-10 18:46:31.223310574 +0100
21614@@ -0,0 +1,222 @@
1facf9fc 21615+/*
2000de60 21616+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21617+ *
21618+ * This program, aufs is free software; you can redistribute it and/or modify
21619+ * it under the terms of the GNU General Public License as published by
21620+ * the Free Software Foundation; either version 2 of the License, or
21621+ * (at your option) any later version.
dece6358
AM
21622+ *
21623+ * This program is distributed in the hope that it will be useful,
21624+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21625+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21626+ * GNU General Public License for more details.
21627+ *
21628+ * You should have received a copy of the GNU General Public License
523b37e3 21629+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21630+ */
21631+
21632+/*
21633+ * module global variables and operations
21634+ */
21635+
21636+#include <linux/module.h>
21637+#include <linux/seq_file.h>
21638+#include "aufs.h"
21639+
21640+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
21641+{
21642+ if (new_sz <= nused)
21643+ return p;
21644+
21645+ p = krealloc(p, new_sz, gfp);
21646+ if (p)
21647+ memset(p + nused, 0, new_sz - nused);
21648+ return p;
21649+}
21650+
21651+/* ---------------------------------------------------------------------- */
21652+
21653+/*
21654+ * aufs caches
21655+ */
21656+struct kmem_cache *au_cachep[AuCache_Last];
21657+static int __init au_cache_init(void)
21658+{
4a4d8108 21659+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 21660+ if (au_cachep[AuCache_DINFO])
027c5e7a 21661+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
21662+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
21663+ au_icntnr_init_once);
1facf9fc 21664+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
21665+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
21666+ au_fi_init_once);
1facf9fc 21667+ if (au_cachep[AuCache_FINFO])
21668+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
21669+ if (au_cachep[AuCache_VDIR])
21670+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
21671+ if (au_cachep[AuCache_DEHSTR])
21672+ return 0;
21673+
21674+ return -ENOMEM;
21675+}
21676+
21677+static void au_cache_fin(void)
21678+{
21679+ int i;
4a4d8108 21680+
537831f9
AM
21681+ /*
21682+ * Make sure all delayed rcu free inodes are flushed before we
21683+ * destroy cache.
21684+ */
21685+ rcu_barrier();
21686+
7eafdf33
AM
21687+ /* excluding AuCache_HNOTIFY */
21688+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
21689+ for (i = 0; i < AuCache_HNOTIFY; i++)
1facf9fc 21690+ if (au_cachep[i]) {
21691+ kmem_cache_destroy(au_cachep[i]);
21692+ au_cachep[i] = NULL;
21693+ }
21694+}
21695+
21696+/* ---------------------------------------------------------------------- */
21697+
21698+int au_dir_roflags;
21699+
e49829fe 21700+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
21701+/*
21702+ * iterate_supers_type() doesn't protect us from
21703+ * remounting (branch management)
21704+ */
e49829fe
JR
21705+struct au_splhead au_sbilist;
21706+#endif
21707+
9dbd164d
AM
21708+struct lock_class_key au_lc_key[AuLcKey_Last];
21709+
1facf9fc 21710+/*
21711+ * functions for module interface.
21712+ */
21713+MODULE_LICENSE("GPL");
21714+/* MODULE_LICENSE("GPL v2"); */
dece6358 21715+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 21716+MODULE_DESCRIPTION(AUFS_NAME
21717+ " -- Advanced multi layered unification filesystem");
21718+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 21719+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 21720+
1facf9fc 21721+/* this module parameter has no meaning when SYSFS is disabled */
21722+int sysaufs_brs = 1;
21723+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
21724+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
21725+
076b876e
AM
21726+/* this module parameter has no meaning when USER_NS is disabled */
21727+static bool au_userns;
21728+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
21729+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
21730+
1facf9fc 21731+/* ---------------------------------------------------------------------- */
21732+
21733+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
21734+
21735+int au_seq_path(struct seq_file *seq, struct path *path)
21736+{
ab036dbd
AM
21737+ int err;
21738+
21739+ err = seq_path(seq, path, au_esc_chars);
21740+ if (err > 0)
21741+ err = 0;
21742+ else if (err < 0)
21743+ err = -ENOMEM;
21744+
21745+ return err;
1facf9fc 21746+}
21747+
21748+/* ---------------------------------------------------------------------- */
21749+
21750+static int __init aufs_init(void)
21751+{
21752+ int err, i;
21753+ char *p;
21754+
21755+ p = au_esc_chars;
21756+ for (i = 1; i <= ' '; i++)
21757+ *p++ = i;
21758+ *p++ = '\\';
21759+ *p++ = '\x7f';
21760+ *p = 0;
21761+
21762+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
21763+
ab036dbd
AM
21764+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
21765+ for (i = 0; i < AuIop_Last; i++)
21766+ aufs_iop_nogetattr[i].getattr = NULL;
21767+
e49829fe 21768+ au_sbilist_init();
1facf9fc 21769+ sysaufs_brs_init();
21770+ au_debug_init();
4a4d8108 21771+ au_dy_init();
1facf9fc 21772+ err = sysaufs_init();
21773+ if (unlikely(err))
21774+ goto out;
e49829fe 21775+ err = au_procfs_init();
4f0767ce 21776+ if (unlikely(err))
953406b4 21777+ goto out_sysaufs;
e49829fe
JR
21778+ err = au_wkq_init();
21779+ if (unlikely(err))
21780+ goto out_procfs;
87a755f4 21781+ err = au_loopback_init();
1facf9fc 21782+ if (unlikely(err))
21783+ goto out_wkq;
87a755f4
AM
21784+ err = au_hnotify_init();
21785+ if (unlikely(err))
21786+ goto out_loopback;
1facf9fc 21787+ err = au_sysrq_init();
21788+ if (unlikely(err))
21789+ goto out_hin;
21790+ err = au_cache_init();
21791+ if (unlikely(err))
21792+ goto out_sysrq;
076b876e
AM
21793+
21794+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 21795+ err = register_filesystem(&aufs_fs_type);
21796+ if (unlikely(err))
21797+ goto out_cache;
076b876e 21798+
4a4d8108
AM
21799+ /* since we define pr_fmt, call printk directly */
21800+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 21801+ goto out; /* success */
21802+
4f0767ce 21803+out_cache:
1facf9fc 21804+ au_cache_fin();
4f0767ce 21805+out_sysrq:
1facf9fc 21806+ au_sysrq_fin();
4f0767ce 21807+out_hin:
4a4d8108 21808+ au_hnotify_fin();
87a755f4
AM
21809+out_loopback:
21810+ au_loopback_fin();
4f0767ce 21811+out_wkq:
1facf9fc 21812+ au_wkq_fin();
e49829fe
JR
21813+out_procfs:
21814+ au_procfs_fin();
4f0767ce 21815+out_sysaufs:
1facf9fc 21816+ sysaufs_fin();
4a4d8108 21817+ au_dy_fin();
4f0767ce 21818+out:
1facf9fc 21819+ return err;
21820+}
21821+
21822+static void __exit aufs_exit(void)
21823+{
21824+ unregister_filesystem(&aufs_fs_type);
21825+ au_cache_fin();
21826+ au_sysrq_fin();
4a4d8108 21827+ au_hnotify_fin();
87a755f4 21828+ au_loopback_fin();
1facf9fc 21829+ au_wkq_fin();
e49829fe 21830+ au_procfs_fin();
1facf9fc 21831+ sysaufs_fin();
4a4d8108 21832+ au_dy_fin();
1facf9fc 21833+}
21834+
21835+module_init(aufs_init);
21836+module_exit(aufs_exit);
7f207e10
AM
21837diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
21838--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 21839+++ linux/fs/aufs/module.h 2015-09-24 10:47:58.254719746 +0200
523b37e3 21840@@ -0,0 +1,104 @@
1facf9fc 21841+/*
2000de60 21842+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 21843+ *
21844+ * This program, aufs is free software; you can redistribute it and/or modify
21845+ * it under the terms of the GNU General Public License as published by
21846+ * the Free Software Foundation; either version 2 of the License, or
21847+ * (at your option) any later version.
dece6358
AM
21848+ *
21849+ * This program is distributed in the hope that it will be useful,
21850+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21851+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21852+ * GNU General Public License for more details.
21853+ *
21854+ * You should have received a copy of the GNU General Public License
523b37e3 21855+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21856+ */
21857+
21858+/*
21859+ * module initialization and module-global
21860+ */
21861+
21862+#ifndef __AUFS_MODULE_H__
21863+#define __AUFS_MODULE_H__
21864+
21865+#ifdef __KERNEL__
21866+
21867+#include <linux/slab.h>
21868+
dece6358
AM
21869+struct path;
21870+struct seq_file;
21871+
1facf9fc 21872+/* module parameters */
1facf9fc 21873+extern int sysaufs_brs;
21874+
21875+/* ---------------------------------------------------------------------- */
21876+
21877+extern int au_dir_roflags;
21878+
9dbd164d
AM
21879+enum {
21880+ AuLcNonDir_FIINFO,
21881+ AuLcNonDir_DIINFO,
21882+ AuLcNonDir_IIINFO,
21883+
21884+ AuLcDir_FIINFO,
21885+ AuLcDir_DIINFO,
21886+ AuLcDir_IIINFO,
21887+
21888+ AuLcSymlink_DIINFO,
21889+ AuLcSymlink_IIINFO,
21890+
21891+ AuLcKey_Last
21892+};
21893+extern struct lock_class_key au_lc_key[AuLcKey_Last];
21894+
1facf9fc 21895+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
21896+int au_seq_path(struct seq_file *seq, struct path *path);
21897+
e49829fe
JR
21898+#ifdef CONFIG_PROC_FS
21899+/* procfs.c */
21900+int __init au_procfs_init(void);
21901+void au_procfs_fin(void);
21902+#else
21903+AuStubInt0(au_procfs_init, void);
21904+AuStubVoid(au_procfs_fin, void);
21905+#endif
21906+
4f0767ce
JR
21907+/* ---------------------------------------------------------------------- */
21908+
21909+/* kmem cache */
1facf9fc 21910+enum {
21911+ AuCache_DINFO,
21912+ AuCache_ICNTNR,
21913+ AuCache_FINFO,
21914+ AuCache_VDIR,
21915+ AuCache_DEHSTR,
7eafdf33 21916+ AuCache_HNOTIFY, /* must be last */
1facf9fc 21917+ AuCache_Last
21918+};
21919+
4a4d8108
AM
21920+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
21921+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
21922+#define AuCacheCtor(type, ctor) \
21923+ kmem_cache_create(#type, sizeof(struct type), \
21924+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 21925+
21926+extern struct kmem_cache *au_cachep[];
21927+
21928+#define AuCacheFuncs(name, index) \
4a4d8108 21929+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 21930+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 21931+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 21932+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
21933+
21934+AuCacheFuncs(dinfo, DINFO);
21935+AuCacheFuncs(icntnr, ICNTNR);
21936+AuCacheFuncs(finfo, FINFO);
21937+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
21938+AuCacheFuncs(vdir_dehstr, DEHSTR);
21939+#ifdef CONFIG_AUFS_HNOTIFY
21940+AuCacheFuncs(hnotify, HNOTIFY);
21941+#endif
1facf9fc 21942+
4a4d8108
AM
21943+#endif /* __KERNEL__ */
21944+#endif /* __AUFS_MODULE_H__ */
c2b27bf2
AM
21945diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
21946--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
21947+++ linux/fs/aufs/mvdown.c 2015-12-10 17:59:16.839499823 +0100
21948@@ -0,0 +1,703 @@
c2b27bf2 21949+/*
2000de60 21950+ * Copyright (C) 2011-2015 Junjiro R. Okajima
c2b27bf2
AM
21951+ *
21952+ * This program, aufs is free software; you can redistribute it and/or modify
21953+ * it under the terms of the GNU General Public License as published by
21954+ * the Free Software Foundation; either version 2 of the License, or
21955+ * (at your option) any later version.
21956+ *
21957+ * This program is distributed in the hope that it will be useful,
21958+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21959+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21960+ * GNU General Public License for more details.
21961+ *
21962+ * You should have received a copy of the GNU General Public License
523b37e3
AM
21963+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
21964+ */
21965+
21966+/*
21967+ * move-down, opposite of copy-up
c2b27bf2
AM
21968+ */
21969+
21970+#include "aufs.h"
21971+
c2b27bf2
AM
21972+struct au_mvd_args {
21973+ struct {
c2b27bf2
AM
21974+ struct super_block *h_sb;
21975+ struct dentry *h_parent;
21976+ struct au_hinode *hdir;
392086de 21977+ struct inode *h_dir, *h_inode;
c1595e42 21978+ struct au_pin pin;
c2b27bf2
AM
21979+ } info[AUFS_MVDOWN_NARRAY];
21980+
21981+ struct aufs_mvdown mvdown;
21982+ struct dentry *dentry, *parent;
21983+ struct inode *inode, *dir;
21984+ struct super_block *sb;
21985+ aufs_bindex_t bopq, bwh, bfound;
21986+ unsigned char rename_lock;
c2b27bf2
AM
21987+};
21988+
392086de 21989+#define mvd_errno mvdown.au_errno
076b876e
AM
21990+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
21991+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
21992+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
21993+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 21994+
392086de
AM
21995+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
21996+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
21997+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
21998+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
21999+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 22000+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
22001+
22002+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22003+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22004+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22005+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22006+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 22007+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
22008+
22009+#define AU_MVD_PR(flag, ...) do { \
22010+ if (flag) \
22011+ pr_err(__VA_ARGS__); \
22012+ } while (0)
22013+
076b876e
AM
22014+static int find_lower_writable(struct au_mvd_args *a)
22015+{
22016+ struct super_block *sb;
22017+ aufs_bindex_t bindex, bend;
22018+ struct au_branch *br;
22019+
22020+ sb = a->sb;
22021+ bindex = a->mvd_bsrc;
22022+ bend = au_sbend(sb);
22023+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
22024+ for (bindex++; bindex <= bend; bindex++) {
22025+ br = au_sbr(sb, bindex);
22026+ if (au_br_fhsm(br->br_perm)
22027+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22028+ return bindex;
22029+ }
22030+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
22031+ for (bindex++; bindex <= bend; bindex++) {
22032+ br = au_sbr(sb, bindex);
22033+ if (!au_br_rdonly(br))
22034+ return bindex;
22035+ }
22036+ else
22037+ for (bindex++; bindex <= bend; bindex++) {
22038+ br = au_sbr(sb, bindex);
22039+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22040+ if (au_br_rdonly(br))
22041+ a->mvdown.flags
22042+ |= AUFS_MVDOWN_ROLOWER_R;
22043+ return bindex;
22044+ }
22045+ }
22046+
22047+ return -1;
22048+}
22049+
c2b27bf2 22050+/* make the parent dir on bdst */
392086de 22051+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22052+{
22053+ int err;
22054+
22055+ err = 0;
22056+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22057+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22058+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22059+ a->mvd_h_dst_parent = NULL;
22060+ if (au_dbend(a->parent) >= a->mvd_bdst)
22061+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22062+ if (!a->mvd_h_dst_parent) {
22063+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22064+ if (unlikely(err)) {
392086de 22065+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
22066+ goto out;
22067+ }
22068+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22069+ }
22070+
22071+out:
22072+ AuTraceErr(err);
22073+ return err;
22074+}
22075+
22076+/* lock them all */
392086de 22077+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22078+{
22079+ int err;
22080+ struct dentry *h_trap;
22081+
22082+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22083+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
22084+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22085+ au_opt_udba(a->sb),
22086+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22087+ AuTraceErr(err);
22088+ if (unlikely(err)) {
22089+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22090+ goto out;
22091+ }
22092+
c2b27bf2
AM
22093+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22094+ a->rename_lock = 0;
c1595e42
JR
22095+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22096+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22097+ au_opt_udba(a->sb),
22098+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22099+ err = au_do_pin(&a->mvd_pin_src);
22100+ AuTraceErr(err);
5527c038 22101+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22102+ if (unlikely(err)) {
22103+ AU_MVD_PR(dmsg, "pin_src failed\n");
22104+ goto out_dst;
22105+ }
22106+ goto out; /* success */
c2b27bf2
AM
22107+ }
22108+
c2b27bf2 22109+ a->rename_lock = 1;
c1595e42
JR
22110+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22111+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22112+ au_opt_udba(a->sb),
22113+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22114+ AuTraceErr(err);
5527c038 22115+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22116+ if (unlikely(err)) {
22117+ AU_MVD_PR(dmsg, "pin_src failed\n");
22118+ au_pin_hdir_lock(&a->mvd_pin_dst);
22119+ goto out_dst;
22120+ }
22121+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
22122+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22123+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22124+ if (h_trap) {
22125+ err = (h_trap != a->mvd_h_src_parent);
22126+ if (err)
22127+ err = (h_trap != a->mvd_h_dst_parent);
22128+ }
22129+ BUG_ON(err); /* it should never happen */
c1595e42
JR
22130+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22131+ err = -EBUSY;
22132+ AuTraceErr(err);
22133+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22134+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22135+ au_pin_hdir_lock(&a->mvd_pin_src);
22136+ au_unpin(&a->mvd_pin_src);
22137+ au_pin_hdir_lock(&a->mvd_pin_dst);
22138+ goto out_dst;
22139+ }
22140+ goto out; /* success */
c2b27bf2 22141+
c1595e42
JR
22142+out_dst:
22143+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22144+out:
22145+ AuTraceErr(err);
22146+ return err;
22147+}
22148+
392086de 22149+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 22150+{
c1595e42
JR
22151+ if (!a->rename_lock)
22152+ au_unpin(&a->mvd_pin_src);
22153+ else {
c2b27bf2
AM
22154+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22155+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
22156+ au_pin_hdir_lock(&a->mvd_pin_src);
22157+ au_unpin(&a->mvd_pin_src);
22158+ au_pin_hdir_lock(&a->mvd_pin_dst);
22159+ }
22160+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22161+}
22162+
22163+/* copy-down the file */
392086de 22164+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22165+{
22166+ int err;
22167+ struct au_cp_generic cpg = {
22168+ .dentry = a->dentry,
22169+ .bdst = a->mvd_bdst,
22170+ .bsrc = a->mvd_bsrc,
22171+ .len = -1,
c1595e42 22172+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
22173+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22174+ };
22175+
22176+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
22177+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22178+ au_fset_cpup(cpg.flags, OVERWRITE);
22179+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22180+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
22181+ err = au_sio_cpdown_simple(&cpg);
22182+ if (unlikely(err))
392086de 22183+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
22184+
22185+ AuTraceErr(err);
22186+ return err;
22187+}
22188+
22189+/*
22190+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22191+ * were sleeping
22192+ */
392086de 22193+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22194+{
22195+ int err;
22196+ struct path h_path;
22197+ struct au_branch *br;
523b37e3 22198+ struct inode *delegated;
c2b27bf2
AM
22199+
22200+ br = au_sbr(a->sb, a->mvd_bdst);
22201+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22202+ err = PTR_ERR(h_path.dentry);
22203+ if (IS_ERR(h_path.dentry)) {
392086de 22204+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
22205+ goto out;
22206+ }
22207+
22208+ err = 0;
5527c038 22209+ if (d_is_positive(h_path.dentry)) {
c2b27bf2 22210+ h_path.mnt = au_br_mnt(br);
523b37e3 22211+ delegated = NULL;
5527c038 22212+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
523b37e3
AM
22213+ &delegated, /*force*/0);
22214+ if (unlikely(err == -EWOULDBLOCK)) {
22215+ pr_warn("cannot retry for NFSv4 delegation"
22216+ " for an internal unlink\n");
22217+ iput(delegated);
22218+ }
c2b27bf2 22219+ if (unlikely(err))
392086de 22220+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
22221+ }
22222+ dput(h_path.dentry);
22223+
22224+out:
22225+ AuTraceErr(err);
22226+ return err;
22227+}
22228+
22229+/*
22230+ * unlink the topmost h_dentry
c2b27bf2 22231+ */
392086de 22232+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22233+{
22234+ int err;
22235+ struct path h_path;
523b37e3 22236+ struct inode *delegated;
c2b27bf2
AM
22237+
22238+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22239+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
22240+ delegated = NULL;
22241+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22242+ if (unlikely(err == -EWOULDBLOCK)) {
22243+ pr_warn("cannot retry for NFSv4 delegation"
22244+ " for an internal unlink\n");
22245+ iput(delegated);
22246+ }
c2b27bf2 22247+ if (unlikely(err))
392086de 22248+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
22249+
22250+ AuTraceErr(err);
22251+ return err;
22252+}
22253+
076b876e
AM
22254+/* Since mvdown succeeded, we ignore an error of this function */
22255+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22256+{
22257+ int err;
22258+ struct au_branch *br;
22259+
22260+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22261+ br = au_sbr(a->sb, a->mvd_bsrc);
22262+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22263+ if (!err) {
22264+ br = au_sbr(a->sb, a->mvd_bdst);
22265+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22266+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22267+ }
22268+ if (!err)
22269+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22270+ else
22271+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22272+}
22273+
c2b27bf2
AM
22274+/*
22275+ * copy-down the file and unlink the bsrc file.
22276+ * - unlink the bdst whout if exist
22277+ * - copy-down the file (with whtmp name and rename)
22278+ * - unlink the bsrc file
22279+ */
392086de 22280+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22281+{
22282+ int err;
22283+
392086de 22284+ err = au_do_mkdir(dmsg, a);
c2b27bf2 22285+ if (!err)
392086de 22286+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
22287+ if (unlikely(err))
22288+ goto out;
22289+
22290+ /*
22291+ * do not revert the activities we made on bdst since they should be
22292+ * harmless in aufs.
22293+ */
22294+
392086de 22295+ err = au_do_cpdown(dmsg, a);
c2b27bf2 22296+ if (!err)
392086de
AM
22297+ err = au_do_unlink_wh(dmsg, a);
22298+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22299+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
22300+ if (unlikely(err))
22301+ goto out_unlock;
22302+
c1595e42
JR
22303+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22304+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
22305+ if (find_lower_writable(a) < 0)
22306+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22307+
22308+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22309+ au_do_stfs(dmsg, a);
22310+
c2b27bf2 22311+ /* maintain internal array */
392086de
AM
22312+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22313+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
22314+ au_set_dbstart(a->dentry, a->mvd_bdst);
22315+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
22316+ au_set_ibstart(a->inode, a->mvd_bdst);
ab036dbd
AM
22317+ } else {
22318+ /* hide the lower */
22319+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
22320+ au_set_dbend(a->dentry, a->mvd_bsrc);
22321+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
22322+ au_set_ibend(a->inode, a->mvd_bsrc);
392086de 22323+ }
c2b27bf2
AM
22324+ if (au_dbend(a->dentry) < a->mvd_bdst)
22325+ au_set_dbend(a->dentry, a->mvd_bdst);
c2b27bf2
AM
22326+ if (au_ibend(a->inode) < a->mvd_bdst)
22327+ au_set_ibend(a->inode, a->mvd_bdst);
22328+
22329+out_unlock:
392086de 22330+ au_do_unlock(dmsg, a);
c2b27bf2
AM
22331+out:
22332+ AuTraceErr(err);
22333+ return err;
22334+}
22335+
22336+/* ---------------------------------------------------------------------- */
22337+
c2b27bf2 22338+/* make sure the file is idle */
392086de 22339+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22340+{
22341+ int err, plinked;
c2b27bf2
AM
22342+
22343+ err = 0;
c2b27bf2
AM
22344+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
22345+ if (au_dbstart(a->dentry) == a->mvd_bsrc
c1595e42 22346+ && au_dcount(a->dentry) == 1
c2b27bf2 22347+ && atomic_read(&a->inode->i_count) == 1
392086de 22348+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
22349+ && (!plinked || !au_plink_test(a->inode))
22350+ && a->inode->i_nlink == 1)
22351+ goto out;
22352+
22353+ err = -EBUSY;
392086de 22354+ AU_MVD_PR(dmsg,
c1595e42
JR
22355+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
22356+ a->mvd_bsrc, au_dbstart(a->dentry), au_dcount(a->dentry),
c2b27bf2 22357+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 22358+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
22359+ plinked, plinked ? au_plink_test(a->inode) : 0);
22360+
22361+out:
22362+ AuTraceErr(err);
22363+ return err;
22364+}
22365+
22366+/* make sure the parent dir is fine */
392086de 22367+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
22368+ struct au_mvd_args *a)
22369+{
22370+ int err;
22371+ aufs_bindex_t bindex;
22372+
22373+ err = 0;
22374+ if (unlikely(au_alive_dir(a->parent))) {
22375+ err = -ENOENT;
392086de 22376+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
22377+ goto out;
22378+ }
22379+
22380+ a->bopq = au_dbdiropq(a->parent);
22381+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22382+ AuDbg("b%d\n", bindex);
22383+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22384+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22385+ err = -EINVAL;
392086de
AM
22386+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22387+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
22388+ a->bopq, a->mvd_bdst);
22389+ }
22390+
22391+out:
22392+ AuTraceErr(err);
22393+ return err;
22394+}
22395+
392086de 22396+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
22397+ struct au_mvd_args *a)
22398+{
22399+ int err;
22400+ struct au_dinfo *dinfo, *tmp;
22401+
22402+ /* lookup the next lower positive entry */
22403+ err = -ENOMEM;
22404+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
22405+ if (unlikely(!tmp))
22406+ goto out;
22407+
22408+ a->bfound = -1;
22409+ a->bwh = -1;
22410+ dinfo = au_di(a->dentry);
22411+ au_di_cp(tmp, dinfo);
22412+ au_di_swap(tmp, dinfo);
22413+
22414+ /* returns the number of positive dentries */
22415+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1, /*type*/0);
22416+ if (!err)
22417+ a->bwh = au_dbwh(a->dentry);
22418+ else if (err > 0)
22419+ a->bfound = au_dbstart(a->dentry);
22420+
22421+ au_di_swap(tmp, dinfo);
22422+ au_rw_write_unlock(&tmp->di_rwsem);
22423+ au_di_free(tmp);
22424+ if (unlikely(err < 0))
392086de 22425+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
22426+
22427+ /*
22428+ * here, we have these cases.
22429+ * bfound == -1
22430+ * no positive dentry under bsrc. there are more sub-cases.
22431+ * bwh < 0
22432+ * there no whiteout, we can safely move-down.
22433+ * bwh <= bsrc
22434+ * impossible
22435+ * bsrc < bwh && bwh < bdst
22436+ * there is a whiteout on RO branch. cannot proceed.
22437+ * bwh == bdst
22438+ * there is a whiteout on the RW target branch. it should
22439+ * be removed.
22440+ * bdst < bwh
22441+ * there is a whiteout somewhere unrelated branch.
22442+ * -1 < bfound && bfound <= bsrc
22443+ * impossible.
22444+ * bfound < bdst
22445+ * found, but it is on RO branch between bsrc and bdst. cannot
22446+ * proceed.
22447+ * bfound == bdst
22448+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
22449+ * error.
22450+ * bdst < bfound
22451+ * found, after we create the file on bdst, it will be hidden.
22452+ */
22453+
22454+ AuDebugOn(a->bfound == -1
22455+ && a->bwh != -1
22456+ && a->bwh <= a->mvd_bsrc);
22457+ AuDebugOn(-1 < a->bfound
22458+ && a->bfound <= a->mvd_bsrc);
22459+
22460+ err = -EINVAL;
22461+ if (a->bfound == -1
22462+ && a->mvd_bsrc < a->bwh
22463+ && a->bwh != -1
22464+ && a->bwh < a->mvd_bdst) {
392086de
AM
22465+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
22466+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
22467+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
22468+ goto out;
22469+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
22470+ a->mvd_errno = EAU_MVDOWN_UPPER;
22471+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
22472+ a->mvd_bdst, a->bfound);
22473+ goto out;
22474+ }
22475+
22476+ err = 0; /* success */
22477+
22478+out:
22479+ AuTraceErr(err);
22480+ return err;
22481+}
22482+
392086de 22483+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22484+{
22485+ int err;
22486+
392086de
AM
22487+ err = 0;
22488+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22489+ && a->bfound == a->mvd_bdst)
22490+ err = -EEXIST;
c2b27bf2
AM
22491+ AuTraceErr(err);
22492+ return err;
22493+}
22494+
392086de 22495+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22496+{
22497+ int err;
22498+ struct au_branch *br;
22499+
22500+ err = -EISDIR;
22501+ if (unlikely(S_ISDIR(a->inode->i_mode)))
22502+ goto out;
22503+
22504+ err = -EINVAL;
392086de
AM
22505+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
22506+ a->mvd_bsrc = au_ibstart(a->inode);
22507+ else {
22508+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
22509+ if (unlikely(a->mvd_bsrc < 0
22510+ || (a->mvd_bsrc < au_dbstart(a->dentry)
22511+ || au_dbend(a->dentry) < a->mvd_bsrc
22512+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
22513+ || (a->mvd_bsrc < au_ibstart(a->inode)
22514+ || au_ibend(a->inode) < a->mvd_bsrc
22515+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
22516+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
22517+ AU_MVD_PR(dmsg, "no upper\n");
22518+ goto out;
22519+ }
22520+ }
c2b27bf2 22521+ if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) {
392086de
AM
22522+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22523+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
22524+ goto out;
22525+ }
392086de 22526+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
22527+ br = au_sbr(a->sb, a->mvd_bsrc);
22528+ err = au_br_rdonly(br);
392086de
AM
22529+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
22530+ if (unlikely(err))
22531+ goto out;
22532+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
22533+ || IS_APPEND(a->mvd_h_src_inode))) {
22534+ if (err)
22535+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
22536+ /* go on */
22537+ } else
c2b27bf2
AM
22538+ goto out;
22539+
22540+ err = -EINVAL;
392086de
AM
22541+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
22542+ a->mvd_bdst = find_lower_writable(a);
22543+ if (unlikely(a->mvd_bdst < 0)) {
22544+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22545+ AU_MVD_PR(dmsg, "no writable lower branch\n");
22546+ goto out;
22547+ }
22548+ } else {
22549+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
22550+ if (unlikely(a->mvd_bdst < 0
22551+ || au_sbend(a->sb) < a->mvd_bdst)) {
22552+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
22553+ AU_MVD_PR(dmsg, "no lower brid\n");
22554+ goto out;
22555+ }
c2b27bf2
AM
22556+ }
22557+
392086de 22558+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 22559+ if (!err)
392086de 22560+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 22561+ if (!err)
392086de 22562+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 22563+ if (!err)
392086de 22564+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
22565+ if (!err)
22566+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
22567+
22568+out:
22569+ AuTraceErr(err);
22570+ return err;
22571+}
22572+
22573+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
22574+{
392086de
AM
22575+ int err, e;
22576+ unsigned char dmsg;
22577+ struct au_mvd_args *args;
ab036dbd 22578+ struct inode *inode;
c2b27bf2 22579+
ab036dbd 22580+ inode = d_inode(dentry);
c2b27bf2
AM
22581+ err = -EPERM;
22582+ if (unlikely(!capable(CAP_SYS_ADMIN)))
22583+ goto out;
22584+
392086de
AM
22585+ err = -ENOMEM;
22586+ args = kmalloc(sizeof(*args), GFP_NOFS);
22587+ if (unlikely(!args))
22588+ goto out;
22589+
22590+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
22591+ if (!err)
22592+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
22593+ if (unlikely(err)) {
22594+ err = -EFAULT;
392086de
AM
22595+ AuTraceErr(err);
22596+ goto out_free;
c2b27bf2 22597+ }
392086de
AM
22598+ AuDbg("flags 0x%x\n", args->mvdown.flags);
22599+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
22600+ args->mvdown.au_errno = 0;
22601+ args->dentry = dentry;
ab036dbd 22602+ args->inode = inode;
392086de 22603+ args->sb = dentry->d_sb;
c2b27bf2 22604+
392086de
AM
22605+ err = -ENOENT;
22606+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
22607+ args->parent = dget_parent(dentry);
5527c038 22608+ args->dir = d_inode(args->parent);
392086de
AM
22609+ mutex_lock_nested(&args->dir->i_mutex, I_MUTEX_PARENT);
22610+ dput(args->parent);
22611+ if (unlikely(args->parent != dentry->d_parent)) {
22612+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
22613+ goto out_dir;
22614+ }
22615+
ab036dbd
AM
22616+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
22617+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
22618+ if (unlikely(err))
22619+ goto out_inode;
22620+
392086de
AM
22621+ di_write_lock_parent(args->parent);
22622+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
22623+ if (unlikely(err))
22624+ goto out_parent;
22625+
392086de 22626+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
22627+ if (unlikely(err))
22628+ goto out_parent;
c2b27bf2 22629+
392086de 22630+ au_cpup_attr_timesizes(args->dir);
ab036dbd
AM
22631+ au_cpup_attr_timesizes(inode);
22632+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
22633+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
22634+ /* au_digen_dec(dentry); */
22635+
22636+out_parent:
392086de 22637+ di_write_unlock(args->parent);
c2b27bf2
AM
22638+ aufs_read_unlock(dentry, AuLock_DW);
22639+out_inode:
ab036dbd 22640+ mutex_unlock(&inode->i_mutex);
c2b27bf2 22641+out_dir:
392086de
AM
22642+ mutex_unlock(&args->dir->i_mutex);
22643+out_free:
22644+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
22645+ if (unlikely(e))
22646+ err = -EFAULT;
22647+ kfree(args);
c2b27bf2
AM
22648+out:
22649+ AuTraceErr(err);
22650+ return err;
22651+}
22652diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
22653--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
22654+++ linux/fs/aufs/opts.c 2015-12-10 17:59:16.839499823 +0100
22655@@ -0,0 +1,1859 @@
1facf9fc 22656+/*
2000de60 22657+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 22658+ *
22659+ * This program, aufs is free software; you can redistribute it and/or modify
22660+ * it under the terms of the GNU General Public License as published by
22661+ * the Free Software Foundation; either version 2 of the License, or
22662+ * (at your option) any later version.
dece6358
AM
22663+ *
22664+ * This program is distributed in the hope that it will be useful,
22665+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22666+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22667+ * GNU General Public License for more details.
22668+ *
22669+ * You should have received a copy of the GNU General Public License
523b37e3 22670+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22671+ */
22672+
22673+/*
22674+ * mount options/flags
22675+ */
22676+
dece6358 22677+#include <linux/namei.h>
1facf9fc 22678+#include <linux/types.h> /* a distribution requires */
22679+#include <linux/parser.h>
22680+#include "aufs.h"
22681+
22682+/* ---------------------------------------------------------------------- */
22683+
22684+enum {
22685+ Opt_br,
7e9cd9fe
AM
22686+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
22687+ Opt_idel, Opt_imod,
22688+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 22689+ Opt_rdblk_def, Opt_rdhash_def,
7e9cd9fe 22690+ Opt_xino, Opt_noxino,
1facf9fc 22691+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
22692+ Opt_trunc_xino_path, Opt_itrunc_xino,
22693+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 22694+ Opt_shwh, Opt_noshwh,
1facf9fc 22695+ Opt_plink, Opt_noplink, Opt_list_plink,
22696+ Opt_udba,
4a4d8108 22697+ Opt_dio, Opt_nodio,
1facf9fc 22698+ Opt_diropq_a, Opt_diropq_w,
22699+ Opt_warn_perm, Opt_nowarn_perm,
22700+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 22701+ Opt_fhsm_sec,
1facf9fc 22702+ Opt_verbose, Opt_noverbose,
22703+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 22704+ Opt_dirperm1, Opt_nodirperm1,
c1595e42 22705+ Opt_acl, Opt_noacl,
1facf9fc 22706+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
22707+};
22708+
22709+static match_table_t options = {
22710+ {Opt_br, "br=%s"},
22711+ {Opt_br, "br:%s"},
22712+
22713+ {Opt_add, "add=%d:%s"},
22714+ {Opt_add, "add:%d:%s"},
22715+ {Opt_add, "ins=%d:%s"},
22716+ {Opt_add, "ins:%d:%s"},
22717+ {Opt_append, "append=%s"},
22718+ {Opt_append, "append:%s"},
22719+ {Opt_prepend, "prepend=%s"},
22720+ {Opt_prepend, "prepend:%s"},
22721+
22722+ {Opt_del, "del=%s"},
22723+ {Opt_del, "del:%s"},
22724+ /* {Opt_idel, "idel:%d"}, */
22725+ {Opt_mod, "mod=%s"},
22726+ {Opt_mod, "mod:%s"},
22727+ /* {Opt_imod, "imod:%d:%s"}, */
22728+
22729+ {Opt_dirwh, "dirwh=%d"},
22730+
22731+ {Opt_xino, "xino=%s"},
22732+ {Opt_noxino, "noxino"},
22733+ {Opt_trunc_xino, "trunc_xino"},
22734+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
22735+ {Opt_notrunc_xino, "notrunc_xino"},
22736+ {Opt_trunc_xino_path, "trunc_xino=%s"},
22737+ {Opt_itrunc_xino, "itrunc_xino=%d"},
22738+ /* {Opt_zxino, "zxino=%s"}, */
22739+ {Opt_trunc_xib, "trunc_xib"},
22740+ {Opt_notrunc_xib, "notrunc_xib"},
22741+
e49829fe 22742+#ifdef CONFIG_PROC_FS
1facf9fc 22743+ {Opt_plink, "plink"},
e49829fe
JR
22744+#else
22745+ {Opt_ignore_silent, "plink"},
22746+#endif
22747+
1facf9fc 22748+ {Opt_noplink, "noplink"},
e49829fe 22749+
1facf9fc 22750+#ifdef CONFIG_AUFS_DEBUG
22751+ {Opt_list_plink, "list_plink"},
22752+#endif
22753+
22754+ {Opt_udba, "udba=%s"},
22755+
4a4d8108
AM
22756+ {Opt_dio, "dio"},
22757+ {Opt_nodio, "nodio"},
22758+
076b876e
AM
22759+#ifdef CONFIG_AUFS_FHSM
22760+ {Opt_fhsm_sec, "fhsm_sec=%d"},
22761+#else
22762+ {Opt_ignore_silent, "fhsm_sec=%d"},
22763+#endif
22764+
1facf9fc 22765+ {Opt_diropq_a, "diropq=always"},
22766+ {Opt_diropq_a, "diropq=a"},
22767+ {Opt_diropq_w, "diropq=whiteouted"},
22768+ {Opt_diropq_w, "diropq=w"},
22769+
22770+ {Opt_warn_perm, "warn_perm"},
22771+ {Opt_nowarn_perm, "nowarn_perm"},
22772+
22773+ /* keep them temporary */
1facf9fc 22774+ {Opt_ignore_silent, "nodlgt"},
1facf9fc 22775+ {Opt_ignore_silent, "clean_plink"},
22776+
dece6358
AM
22777+#ifdef CONFIG_AUFS_SHWH
22778+ {Opt_shwh, "shwh"},
22779+#endif
22780+ {Opt_noshwh, "noshwh"},
22781+
076b876e
AM
22782+ {Opt_dirperm1, "dirperm1"},
22783+ {Opt_nodirperm1, "nodirperm1"},
22784+
1facf9fc 22785+ {Opt_verbose, "verbose"},
22786+ {Opt_verbose, "v"},
22787+ {Opt_noverbose, "noverbose"},
22788+ {Opt_noverbose, "quiet"},
22789+ {Opt_noverbose, "q"},
22790+ {Opt_noverbose, "silent"},
22791+
22792+ {Opt_sum, "sum"},
22793+ {Opt_nosum, "nosum"},
22794+ {Opt_wsum, "wsum"},
22795+
22796+ {Opt_rdcache, "rdcache=%d"},
22797+ {Opt_rdblk, "rdblk=%d"},
dece6358 22798+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 22799+ {Opt_rdhash, "rdhash=%d"},
dece6358 22800+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 22801+
22802+ {Opt_wbr_create, "create=%s"},
22803+ {Opt_wbr_create, "create_policy=%s"},
22804+ {Opt_wbr_copyup, "cpup=%s"},
22805+ {Opt_wbr_copyup, "copyup=%s"},
22806+ {Opt_wbr_copyup, "copyup_policy=%s"},
22807+
c1595e42
JR
22808+ /* generic VFS flag */
22809+#ifdef CONFIG_FS_POSIX_ACL
22810+ {Opt_acl, "acl"},
22811+ {Opt_noacl, "noacl"},
22812+#else
22813+ {Opt_ignore_silent, "acl"},
22814+ {Opt_ignore_silent, "noacl"},
22815+#endif
22816+
1facf9fc 22817+ /* internal use for the scripts */
22818+ {Opt_ignore_silent, "si=%s"},
22819+
22820+ {Opt_br, "dirs=%s"},
22821+ {Opt_ignore, "debug=%d"},
22822+ {Opt_ignore, "delete=whiteout"},
22823+ {Opt_ignore, "delete=all"},
22824+ {Opt_ignore, "imap=%s"},
22825+
1308ab2a 22826+ /* temporary workaround, due to old mount(8)? */
22827+ {Opt_ignore_silent, "relatime"},
22828+
1facf9fc 22829+ {Opt_err, NULL}
22830+};
22831+
22832+/* ---------------------------------------------------------------------- */
22833+
076b876e 22834+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 22835+{
076b876e
AM
22836+ struct match_token *p;
22837+
22838+ p = tbl;
22839+ while (p->pattern) {
22840+ if (p->token == val)
22841+ return p->pattern;
22842+ p++;
1facf9fc 22843+ }
22844+ BUG();
22845+ return "??";
22846+}
22847+
076b876e
AM
22848+static const char *au_optstr(int *val, match_table_t tbl)
22849+{
22850+ struct match_token *p;
22851+ int v;
22852+
22853+ v = *val;
2000de60
JR
22854+ if (!v)
22855+ goto out;
076b876e 22856+ p = tbl;
2000de60
JR
22857+ while (p->pattern) {
22858+ if (p->token
22859+ && (v & p->token) == p->token) {
076b876e
AM
22860+ *val &= ~p->token;
22861+ return p->pattern;
22862+ }
22863+ p++;
22864+ }
2000de60
JR
22865+
22866+out:
076b876e
AM
22867+ return NULL;
22868+}
22869+
1facf9fc 22870+/* ---------------------------------------------------------------------- */
22871+
1e00d052 22872+static match_table_t brperm = {
1facf9fc 22873+ {AuBrPerm_RO, AUFS_BRPERM_RO},
22874+ {AuBrPerm_RR, AUFS_BRPERM_RR},
22875+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
22876+ {0, NULL}
22877+};
1facf9fc 22878+
86dc4139 22879+static match_table_t brattr = {
076b876e
AM
22880+ /* general */
22881+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
22882+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 22883+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 22884+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
2000de60 22885+#ifdef CONFIG_AUFS_FHSM
076b876e 22886+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
2000de60
JR
22887+#endif
22888+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
22889+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
22890+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
22891+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
22892+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
22893+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
22894+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
2000de60 22895+#endif
076b876e
AM
22896+
22897+ /* ro/rr branch */
1e00d052 22898+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
22899+
22900+ /* rw branch */
22901+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 22902+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 22903+
1e00d052 22904+ {0, NULL}
1facf9fc 22905+};
22906+
1e00d052
AM
22907+static int br_attr_val(char *str, match_table_t table, substring_t args[])
22908+{
22909+ int attr, v;
22910+ char *p;
22911+
22912+ attr = 0;
22913+ do {
22914+ p = strchr(str, '+');
22915+ if (p)
22916+ *p = 0;
22917+ v = match_token(str, table, args);
076b876e
AM
22918+ if (v) {
22919+ if (v & AuBrAttr_CMOO_Mask)
22920+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 22921+ attr |= v;
076b876e 22922+ } else {
1e00d052
AM
22923+ if (p)
22924+ *p = '+';
0c3ec466 22925+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
22926+ break;
22927+ }
22928+ if (p)
22929+ str = p + 1;
22930+ } while (p);
22931+
22932+ return attr;
22933+}
22934+
076b876e
AM
22935+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
22936+{
22937+ int sz;
22938+ const char *p;
22939+ char *q;
22940+
076b876e
AM
22941+ q = str->a;
22942+ *q = 0;
22943+ p = au_optstr(&perm, brattr);
22944+ if (p) {
22945+ sz = strlen(p);
22946+ memcpy(q, p, sz + 1);
22947+ q += sz;
22948+ } else
22949+ goto out;
22950+
22951+ do {
22952+ p = au_optstr(&perm, brattr);
22953+ if (p) {
22954+ *q++ = '+';
22955+ sz = strlen(p);
22956+ memcpy(q, p, sz + 1);
22957+ q += sz;
22958+ }
22959+ } while (p);
22960+
22961+out:
c1595e42 22962+ return q - str->a;
076b876e
AM
22963+}
22964+
4a4d8108 22965+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 22966+{
076b876e
AM
22967+ int val, bad, sz;
22968+ char *p;
1facf9fc 22969+ substring_t args[MAX_OPT_ARGS];
076b876e 22970+ au_br_perm_str_t attr;
1facf9fc 22971+
1e00d052
AM
22972+ p = strchr(perm, '+');
22973+ if (p)
22974+ *p = 0;
22975+ val = match_token(perm, brperm, args);
22976+ if (!val) {
22977+ if (p)
22978+ *p = '+';
0c3ec466 22979+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
22980+ val = AuBrPerm_RO;
22981+ goto out;
22982+ }
22983+ if (!p)
22984+ goto out;
22985+
076b876e
AM
22986+ val |= br_attr_val(p + 1, brattr, args);
22987+
22988+ bad = 0;
86dc4139 22989+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
22990+ case AuBrPerm_RO:
22991+ case AuBrPerm_RR:
076b876e
AM
22992+ bad = val & AuBrWAttr_Mask;
22993+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
22994+ break;
22995+ case AuBrPerm_RW:
076b876e
AM
22996+ bad = val & AuBrRAttr_Mask;
22997+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
22998+ break;
22999+ }
c1595e42
JR
23000+
23001+ /*
23002+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23003+ * does not treat it as an error, just warning.
23004+ * this is a tiny guard for the user operation.
23005+ */
23006+ if (val & AuBrAttr_UNPIN) {
23007+ bad |= AuBrAttr_UNPIN;
23008+ val &= ~AuBrAttr_UNPIN;
23009+ }
23010+
076b876e
AM
23011+ if (unlikely(bad)) {
23012+ sz = au_do_optstr_br_attr(&attr, bad);
23013+ AuDebugOn(!sz);
23014+ pr_warn("ignored branch attribute %s\n", attr.a);
23015+ }
1e00d052
AM
23016+
23017+out:
1facf9fc 23018+ return val;
23019+}
23020+
076b876e 23021+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 23022+{
076b876e
AM
23023+ au_br_perm_str_t attr;
23024+ const char *p;
23025+ char *q;
1e00d052
AM
23026+ int sz;
23027+
076b876e
AM
23028+ q = str->a;
23029+ p = au_optstr(&perm, brperm);
23030+ AuDebugOn(!p || !*p);
23031+ sz = strlen(p);
23032+ memcpy(q, p, sz + 1);
23033+ q += sz;
1e00d052 23034+
076b876e
AM
23035+ sz = au_do_optstr_br_attr(&attr, perm);
23036+ if (sz) {
23037+ *q++ = '+';
23038+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
23039+ }
23040+
076b876e 23041+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 23042+}
23043+
23044+/* ---------------------------------------------------------------------- */
23045+
23046+static match_table_t udbalevel = {
23047+ {AuOpt_UDBA_REVAL, "reval"},
23048+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
23049+#ifdef CONFIG_AUFS_HNOTIFY
23050+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23051+#ifdef CONFIG_AUFS_HFSNOTIFY
23052+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 23053+#endif
1facf9fc 23054+#endif
23055+ {-1, NULL}
23056+};
23057+
4a4d8108 23058+static int noinline_for_stack udba_val(char *str)
1facf9fc 23059+{
23060+ substring_t args[MAX_OPT_ARGS];
23061+
7f207e10 23062+ return match_token(str, udbalevel, args);
1facf9fc 23063+}
23064+
23065+const char *au_optstr_udba(int udba)
23066+{
076b876e 23067+ return au_parser_pattern(udba, udbalevel);
1facf9fc 23068+}
23069+
23070+/* ---------------------------------------------------------------------- */
23071+
23072+static match_table_t au_wbr_create_policy = {
23073+ {AuWbrCreate_TDP, "tdp"},
23074+ {AuWbrCreate_TDP, "top-down-parent"},
23075+ {AuWbrCreate_RR, "rr"},
23076+ {AuWbrCreate_RR, "round-robin"},
23077+ {AuWbrCreate_MFS, "mfs"},
23078+ {AuWbrCreate_MFS, "most-free-space"},
23079+ {AuWbrCreate_MFSV, "mfs:%d"},
23080+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23081+
23082+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23083+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23084+ {AuWbrCreate_PMFS, "pmfs"},
23085+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
23086+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23087+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 23088+
23089+ {-1, NULL}
23090+};
23091+
dece6358
AM
23092+/*
23093+ * cf. linux/lib/parser.c and cmdline.c
23094+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 23095+ * kstrto...().
dece6358 23096+ */
4a4d8108
AM
23097+static int noinline_for_stack
23098+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 23099+{
23100+ int err;
23101+ unsigned int len;
23102+ char a[32];
23103+
23104+ err = -ERANGE;
23105+ len = s->to - s->from;
23106+ if (len + 1 <= sizeof(a)) {
23107+ memcpy(a, s->from, len);
23108+ a[len] = '\0';
9dbd164d 23109+ err = kstrtoull(a, 0, result);
1facf9fc 23110+ }
23111+ return err;
23112+}
23113+
23114+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23115+ struct au_opt_wbr_create *create)
23116+{
23117+ int err;
23118+ unsigned long long ull;
23119+
23120+ err = 0;
23121+ if (!au_match_ull(arg, &ull))
23122+ create->mfsrr_watermark = ull;
23123+ else {
4a4d8108 23124+ pr_err("bad integer in %s\n", str);
1facf9fc 23125+ err = -EINVAL;
23126+ }
23127+
23128+ return err;
23129+}
23130+
23131+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23132+ struct au_opt_wbr_create *create)
23133+{
23134+ int n, err;
23135+
23136+ err = 0;
027c5e7a 23137+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 23138+ create->mfs_second = n;
23139+ else {
4a4d8108 23140+ pr_err("bad integer in %s\n", str);
1facf9fc 23141+ err = -EINVAL;
23142+ }
23143+
23144+ return err;
23145+}
23146+
4a4d8108
AM
23147+static int noinline_for_stack
23148+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 23149+{
23150+ int err, e;
23151+ substring_t args[MAX_OPT_ARGS];
23152+
23153+ err = match_token(str, au_wbr_create_policy, args);
23154+ create->wbr_create = err;
23155+ switch (err) {
23156+ case AuWbrCreate_MFSRRV:
392086de 23157+ case AuWbrCreate_PMFSRRV:
1facf9fc 23158+ e = au_wbr_mfs_wmark(&args[0], str, create);
23159+ if (!e)
23160+ e = au_wbr_mfs_sec(&args[1], str, create);
23161+ if (unlikely(e))
23162+ err = e;
23163+ break;
23164+ case AuWbrCreate_MFSRR:
392086de 23165+ case AuWbrCreate_PMFSRR:
1facf9fc 23166+ e = au_wbr_mfs_wmark(&args[0], str, create);
23167+ if (unlikely(e)) {
23168+ err = e;
23169+ break;
23170+ }
23171+ /*FALLTHROUGH*/
23172+ case AuWbrCreate_MFS:
23173+ case AuWbrCreate_PMFS:
027c5e7a 23174+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 23175+ break;
23176+ case AuWbrCreate_MFSV:
23177+ case AuWbrCreate_PMFSV:
23178+ e = au_wbr_mfs_sec(&args[0], str, create);
23179+ if (unlikely(e))
23180+ err = e;
23181+ break;
23182+ }
23183+
23184+ return err;
23185+}
23186+
23187+const char *au_optstr_wbr_create(int wbr_create)
23188+{
076b876e 23189+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 23190+}
23191+
23192+static match_table_t au_wbr_copyup_policy = {
23193+ {AuWbrCopyup_TDP, "tdp"},
23194+ {AuWbrCopyup_TDP, "top-down-parent"},
23195+ {AuWbrCopyup_BUP, "bup"},
23196+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23197+ {AuWbrCopyup_BU, "bu"},
23198+ {AuWbrCopyup_BU, "bottom-up"},
23199+ {-1, NULL}
23200+};
23201+
4a4d8108 23202+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 23203+{
23204+ substring_t args[MAX_OPT_ARGS];
23205+
23206+ return match_token(str, au_wbr_copyup_policy, args);
23207+}
23208+
23209+const char *au_optstr_wbr_copyup(int wbr_copyup)
23210+{
076b876e 23211+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 23212+}
23213+
23214+/* ---------------------------------------------------------------------- */
23215+
23216+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23217+
23218+static void dump_opts(struct au_opts *opts)
23219+{
23220+#ifdef CONFIG_AUFS_DEBUG
23221+ /* reduce stack space */
23222+ union {
23223+ struct au_opt_add *add;
23224+ struct au_opt_del *del;
23225+ struct au_opt_mod *mod;
23226+ struct au_opt_xino *xino;
23227+ struct au_opt_xino_itrunc *xino_itrunc;
23228+ struct au_opt_wbr_create *create;
23229+ } u;
23230+ struct au_opt *opt;
23231+
23232+ opt = opts->opt;
23233+ while (opt->type != Opt_tail) {
23234+ switch (opt->type) {
23235+ case Opt_add:
23236+ u.add = &opt->add;
23237+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23238+ u.add->bindex, u.add->pathname, u.add->perm,
23239+ u.add->path.dentry);
23240+ break;
23241+ case Opt_del:
23242+ case Opt_idel:
23243+ u.del = &opt->del;
23244+ AuDbg("del {%s, %p}\n",
23245+ u.del->pathname, u.del->h_path.dentry);
23246+ break;
23247+ case Opt_mod:
23248+ case Opt_imod:
23249+ u.mod = &opt->mod;
23250+ AuDbg("mod {%s, 0x%x, %p}\n",
23251+ u.mod->path, u.mod->perm, u.mod->h_root);
23252+ break;
23253+ case Opt_append:
23254+ u.add = &opt->add;
23255+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23256+ u.add->bindex, u.add->pathname, u.add->perm,
23257+ u.add->path.dentry);
23258+ break;
23259+ case Opt_prepend:
23260+ u.add = &opt->add;
23261+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23262+ u.add->bindex, u.add->pathname, u.add->perm,
23263+ u.add->path.dentry);
23264+ break;
23265+ case Opt_dirwh:
23266+ AuDbg("dirwh %d\n", opt->dirwh);
23267+ break;
23268+ case Opt_rdcache:
23269+ AuDbg("rdcache %d\n", opt->rdcache);
23270+ break;
23271+ case Opt_rdblk:
23272+ AuDbg("rdblk %u\n", opt->rdblk);
23273+ break;
dece6358
AM
23274+ case Opt_rdblk_def:
23275+ AuDbg("rdblk_def\n");
23276+ break;
1facf9fc 23277+ case Opt_rdhash:
23278+ AuDbg("rdhash %u\n", opt->rdhash);
23279+ break;
dece6358
AM
23280+ case Opt_rdhash_def:
23281+ AuDbg("rdhash_def\n");
23282+ break;
1facf9fc 23283+ case Opt_xino:
23284+ u.xino = &opt->xino;
523b37e3 23285+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 23286+ break;
23287+ case Opt_trunc_xino:
23288+ AuLabel(trunc_xino);
23289+ break;
23290+ case Opt_notrunc_xino:
23291+ AuLabel(notrunc_xino);
23292+ break;
23293+ case Opt_trunc_xino_path:
23294+ case Opt_itrunc_xino:
23295+ u.xino_itrunc = &opt->xino_itrunc;
23296+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23297+ break;
1facf9fc 23298+ case Opt_noxino:
23299+ AuLabel(noxino);
23300+ break;
23301+ case Opt_trunc_xib:
23302+ AuLabel(trunc_xib);
23303+ break;
23304+ case Opt_notrunc_xib:
23305+ AuLabel(notrunc_xib);
23306+ break;
dece6358
AM
23307+ case Opt_shwh:
23308+ AuLabel(shwh);
23309+ break;
23310+ case Opt_noshwh:
23311+ AuLabel(noshwh);
23312+ break;
076b876e
AM
23313+ case Opt_dirperm1:
23314+ AuLabel(dirperm1);
23315+ break;
23316+ case Opt_nodirperm1:
23317+ AuLabel(nodirperm1);
23318+ break;
1facf9fc 23319+ case Opt_plink:
23320+ AuLabel(plink);
23321+ break;
23322+ case Opt_noplink:
23323+ AuLabel(noplink);
23324+ break;
23325+ case Opt_list_plink:
23326+ AuLabel(list_plink);
23327+ break;
23328+ case Opt_udba:
23329+ AuDbg("udba %d, %s\n",
23330+ opt->udba, au_optstr_udba(opt->udba));
23331+ break;
4a4d8108
AM
23332+ case Opt_dio:
23333+ AuLabel(dio);
23334+ break;
23335+ case Opt_nodio:
23336+ AuLabel(nodio);
23337+ break;
1facf9fc 23338+ case Opt_diropq_a:
23339+ AuLabel(diropq_a);
23340+ break;
23341+ case Opt_diropq_w:
23342+ AuLabel(diropq_w);
23343+ break;
23344+ case Opt_warn_perm:
23345+ AuLabel(warn_perm);
23346+ break;
23347+ case Opt_nowarn_perm:
23348+ AuLabel(nowarn_perm);
23349+ break;
1facf9fc 23350+ case Opt_verbose:
23351+ AuLabel(verbose);
23352+ break;
23353+ case Opt_noverbose:
23354+ AuLabel(noverbose);
23355+ break;
23356+ case Opt_sum:
23357+ AuLabel(sum);
23358+ break;
23359+ case Opt_nosum:
23360+ AuLabel(nosum);
23361+ break;
23362+ case Opt_wsum:
23363+ AuLabel(wsum);
23364+ break;
23365+ case Opt_wbr_create:
23366+ u.create = &opt->wbr_create;
23367+ AuDbg("create %d, %s\n", u.create->wbr_create,
23368+ au_optstr_wbr_create(u.create->wbr_create));
23369+ switch (u.create->wbr_create) {
23370+ case AuWbrCreate_MFSV:
23371+ case AuWbrCreate_PMFSV:
23372+ AuDbg("%d sec\n", u.create->mfs_second);
23373+ break;
23374+ case AuWbrCreate_MFSRR:
23375+ AuDbg("%llu watermark\n",
23376+ u.create->mfsrr_watermark);
23377+ break;
23378+ case AuWbrCreate_MFSRRV:
392086de 23379+ case AuWbrCreate_PMFSRRV:
1facf9fc 23380+ AuDbg("%llu watermark, %d sec\n",
23381+ u.create->mfsrr_watermark,
23382+ u.create->mfs_second);
23383+ break;
23384+ }
23385+ break;
23386+ case Opt_wbr_copyup:
23387+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
23388+ au_optstr_wbr_copyup(opt->wbr_copyup));
23389+ break;
076b876e
AM
23390+ case Opt_fhsm_sec:
23391+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
23392+ break;
c1595e42
JR
23393+ case Opt_acl:
23394+ AuLabel(acl);
23395+ break;
23396+ case Opt_noacl:
23397+ AuLabel(noacl);
23398+ break;
1facf9fc 23399+ default:
23400+ BUG();
23401+ }
23402+ opt++;
23403+ }
23404+#endif
23405+}
23406+
23407+void au_opts_free(struct au_opts *opts)
23408+{
23409+ struct au_opt *opt;
23410+
23411+ opt = opts->opt;
23412+ while (opt->type != Opt_tail) {
23413+ switch (opt->type) {
23414+ case Opt_add:
23415+ case Opt_append:
23416+ case Opt_prepend:
23417+ path_put(&opt->add.path);
23418+ break;
23419+ case Opt_del:
23420+ case Opt_idel:
23421+ path_put(&opt->del.h_path);
23422+ break;
23423+ case Opt_mod:
23424+ case Opt_imod:
23425+ dput(opt->mod.h_root);
23426+ break;
23427+ case Opt_xino:
23428+ fput(opt->xino.file);
23429+ break;
23430+ }
23431+ opt++;
23432+ }
23433+}
23434+
23435+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
23436+ aufs_bindex_t bindex)
23437+{
23438+ int err;
23439+ struct au_opt_add *add = &opt->add;
23440+ char *p;
23441+
23442+ add->bindex = bindex;
1e00d052 23443+ add->perm = AuBrPerm_RO;
1facf9fc 23444+ add->pathname = opt_str;
23445+ p = strchr(opt_str, '=');
23446+ if (p) {
23447+ *p++ = 0;
23448+ if (*p)
23449+ add->perm = br_perm_val(p);
23450+ }
23451+
23452+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
23453+ if (!err) {
23454+ if (!p) {
23455+ add->perm = AuBrPerm_RO;
23456+ if (au_test_fs_rr(add->path.dentry->d_sb))
23457+ add->perm = AuBrPerm_RR;
23458+ else if (!bindex && !(sb_flags & MS_RDONLY))
23459+ add->perm = AuBrPerm_RW;
23460+ }
23461+ opt->type = Opt_add;
23462+ goto out;
23463+ }
4a4d8108 23464+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 23465+ err = -EINVAL;
23466+
4f0767ce 23467+out:
1facf9fc 23468+ return err;
23469+}
23470+
23471+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
23472+{
23473+ int err;
23474+
23475+ del->pathname = args[0].from;
23476+ AuDbg("del path %s\n", del->pathname);
23477+
23478+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
23479+ if (unlikely(err))
4a4d8108 23480+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 23481+
23482+ return err;
23483+}
23484+
23485+#if 0 /* reserved for future use */
23486+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
23487+ struct au_opt_del *del, substring_t args[])
23488+{
23489+ int err;
23490+ struct dentry *root;
23491+
23492+ err = -EINVAL;
23493+ root = sb->s_root;
23494+ aufs_read_lock(root, AuLock_FLUSH);
23495+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23496+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23497+ goto out;
23498+ }
23499+
23500+ err = 0;
23501+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
23502+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
23503+
4f0767ce 23504+out:
1facf9fc 23505+ aufs_read_unlock(root, !AuLock_IR);
23506+ return err;
23507+}
23508+#endif
23509+
4a4d8108
AM
23510+static int noinline_for_stack
23511+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 23512+{
23513+ int err;
23514+ struct path path;
23515+ char *p;
23516+
23517+ err = -EINVAL;
23518+ mod->path = args[0].from;
23519+ p = strchr(mod->path, '=');
23520+ if (unlikely(!p)) {
4a4d8108 23521+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 23522+ goto out;
23523+ }
23524+
23525+ *p++ = 0;
23526+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
23527+ if (unlikely(err)) {
4a4d8108 23528+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 23529+ goto out;
23530+ }
23531+
23532+ mod->perm = br_perm_val(p);
23533+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
23534+ mod->h_root = dget(path.dentry);
23535+ path_put(&path);
23536+
4f0767ce 23537+out:
1facf9fc 23538+ return err;
23539+}
23540+
23541+#if 0 /* reserved for future use */
23542+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
23543+ struct au_opt_mod *mod, substring_t args[])
23544+{
23545+ int err;
23546+ struct dentry *root;
23547+
23548+ err = -EINVAL;
23549+ root = sb->s_root;
23550+ aufs_read_lock(root, AuLock_FLUSH);
23551+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23552+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23553+ goto out;
23554+ }
23555+
23556+ err = 0;
23557+ mod->perm = br_perm_val(args[1].from);
23558+ AuDbg("mod path %s, perm 0x%x, %s\n",
23559+ mod->path, mod->perm, args[1].from);
23560+ mod->h_root = dget(au_h_dptr(root, bindex));
23561+
4f0767ce 23562+out:
1facf9fc 23563+ aufs_read_unlock(root, !AuLock_IR);
23564+ return err;
23565+}
23566+#endif
23567+
23568+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
23569+ substring_t args[])
23570+{
23571+ int err;
23572+ struct file *file;
23573+
23574+ file = au_xino_create(sb, args[0].from, /*silent*/0);
23575+ err = PTR_ERR(file);
23576+ if (IS_ERR(file))
23577+ goto out;
23578+
23579+ err = -EINVAL;
2000de60 23580+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
1facf9fc 23581+ fput(file);
4a4d8108 23582+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 23583+ goto out;
23584+ }
23585+
23586+ err = 0;
23587+ xino->file = file;
23588+ xino->path = args[0].from;
23589+
4f0767ce 23590+out:
1facf9fc 23591+ return err;
23592+}
23593+
4a4d8108
AM
23594+static int noinline_for_stack
23595+au_opts_parse_xino_itrunc_path(struct super_block *sb,
23596+ struct au_opt_xino_itrunc *xino_itrunc,
23597+ substring_t args[])
1facf9fc 23598+{
23599+ int err;
23600+ aufs_bindex_t bend, bindex;
23601+ struct path path;
23602+ struct dentry *root;
23603+
23604+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
23605+ if (unlikely(err)) {
4a4d8108 23606+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 23607+ goto out;
23608+ }
23609+
23610+ xino_itrunc->bindex = -1;
23611+ root = sb->s_root;
23612+ aufs_read_lock(root, AuLock_FLUSH);
23613+ bend = au_sbend(sb);
23614+ for (bindex = 0; bindex <= bend; bindex++) {
23615+ if (au_h_dptr(root, bindex) == path.dentry) {
23616+ xino_itrunc->bindex = bindex;
23617+ break;
23618+ }
23619+ }
23620+ aufs_read_unlock(root, !AuLock_IR);
23621+ path_put(&path);
23622+
23623+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 23624+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 23625+ err = -EINVAL;
23626+ }
23627+
4f0767ce 23628+out:
1facf9fc 23629+ return err;
23630+}
23631+
23632+/* called without aufs lock */
23633+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
23634+{
23635+ int err, n, token;
23636+ aufs_bindex_t bindex;
23637+ unsigned char skipped;
23638+ struct dentry *root;
23639+ struct au_opt *opt, *opt_tail;
23640+ char *opt_str;
23641+ /* reduce the stack space */
23642+ union {
23643+ struct au_opt_xino_itrunc *xino_itrunc;
23644+ struct au_opt_wbr_create *create;
23645+ } u;
23646+ struct {
23647+ substring_t args[MAX_OPT_ARGS];
23648+ } *a;
23649+
23650+ err = -ENOMEM;
23651+ a = kmalloc(sizeof(*a), GFP_NOFS);
23652+ if (unlikely(!a))
23653+ goto out;
23654+
23655+ root = sb->s_root;
23656+ err = 0;
23657+ bindex = 0;
23658+ opt = opts->opt;
23659+ opt_tail = opt + opts->max_opt - 1;
23660+ opt->type = Opt_tail;
23661+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
23662+ err = -EINVAL;
23663+ skipped = 0;
23664+ token = match_token(opt_str, options, a->args);
23665+ switch (token) {
23666+ case Opt_br:
23667+ err = 0;
23668+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
23669+ && *opt_str) {
23670+ err = opt_add(opt, opt_str, opts->sb_flags,
23671+ bindex++);
23672+ if (unlikely(!err && ++opt > opt_tail)) {
23673+ err = -E2BIG;
23674+ break;
23675+ }
23676+ opt->type = Opt_tail;
23677+ skipped = 1;
23678+ }
23679+ break;
23680+ case Opt_add:
23681+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23682+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23683+ break;
23684+ }
23685+ bindex = n;
23686+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
23687+ bindex);
23688+ if (!err)
23689+ opt->type = token;
23690+ break;
23691+ case Opt_append:
23692+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23693+ /*dummy bindex*/1);
23694+ if (!err)
23695+ opt->type = token;
23696+ break;
23697+ case Opt_prepend:
23698+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23699+ /*bindex*/0);
23700+ if (!err)
23701+ opt->type = token;
23702+ break;
23703+ case Opt_del:
23704+ err = au_opts_parse_del(&opt->del, a->args);
23705+ if (!err)
23706+ opt->type = token;
23707+ break;
23708+#if 0 /* reserved for future use */
23709+ case Opt_idel:
23710+ del->pathname = "(indexed)";
23711+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 23712+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23713+ break;
23714+ }
23715+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
23716+ if (!err)
23717+ opt->type = token;
23718+ break;
23719+#endif
23720+ case Opt_mod:
23721+ err = au_opts_parse_mod(&opt->mod, a->args);
23722+ if (!err)
23723+ opt->type = token;
23724+ break;
23725+#ifdef IMOD /* reserved for future use */
23726+ case Opt_imod:
23727+ u.mod->path = "(indexed)";
23728+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23729+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23730+ break;
23731+ }
23732+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
23733+ if (!err)
23734+ opt->type = token;
23735+ break;
23736+#endif
23737+ case Opt_xino:
23738+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
23739+ if (!err)
23740+ opt->type = token;
23741+ break;
23742+
23743+ case Opt_trunc_xino_path:
23744+ err = au_opts_parse_xino_itrunc_path
23745+ (sb, &opt->xino_itrunc, a->args);
23746+ if (!err)
23747+ opt->type = token;
23748+ break;
23749+
23750+ case Opt_itrunc_xino:
23751+ u.xino_itrunc = &opt->xino_itrunc;
23752+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23753+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23754+ break;
23755+ }
23756+ u.xino_itrunc->bindex = n;
23757+ aufs_read_lock(root, AuLock_FLUSH);
23758+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 23759+ pr_err("out of bounds, %d\n", n);
1facf9fc 23760+ aufs_read_unlock(root, !AuLock_IR);
23761+ break;
23762+ }
23763+ aufs_read_unlock(root, !AuLock_IR);
23764+ err = 0;
23765+ opt->type = token;
23766+ break;
23767+
23768+ case Opt_dirwh:
23769+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
23770+ break;
23771+ err = 0;
23772+ opt->type = token;
23773+ break;
23774+
23775+ case Opt_rdcache:
027c5e7a
AM
23776+ if (unlikely(match_int(&a->args[0], &n))) {
23777+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23778+ break;
027c5e7a
AM
23779+ }
23780+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
23781+ pr_err("rdcache must be smaller than %d\n",
23782+ AUFS_RDCACHE_MAX);
23783+ break;
23784+ }
23785+ opt->rdcache = n;
1facf9fc 23786+ err = 0;
23787+ opt->type = token;
23788+ break;
23789+ case Opt_rdblk:
23790+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23791+ || n < 0
1facf9fc 23792+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 23793+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23794+ break;
23795+ }
1308ab2a 23796+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
23797+ pr_err("rdblk must be larger than %d\n",
23798+ NAME_MAX);
1facf9fc 23799+ break;
23800+ }
23801+ opt->rdblk = n;
23802+ err = 0;
23803+ opt->type = token;
23804+ break;
23805+ case Opt_rdhash:
23806+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23807+ || n < 0
1facf9fc 23808+ || n * sizeof(struct hlist_head)
23809+ > KMALLOC_MAX_SIZE)) {
4a4d8108 23810+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23811+ break;
23812+ }
23813+ opt->rdhash = n;
23814+ err = 0;
23815+ opt->type = token;
23816+ break;
23817+
23818+ case Opt_trunc_xino:
23819+ case Opt_notrunc_xino:
23820+ case Opt_noxino:
23821+ case Opt_trunc_xib:
23822+ case Opt_notrunc_xib:
dece6358
AM
23823+ case Opt_shwh:
23824+ case Opt_noshwh:
076b876e
AM
23825+ case Opt_dirperm1:
23826+ case Opt_nodirperm1:
1facf9fc 23827+ case Opt_plink:
23828+ case Opt_noplink:
23829+ case Opt_list_plink:
4a4d8108
AM
23830+ case Opt_dio:
23831+ case Opt_nodio:
1facf9fc 23832+ case Opt_diropq_a:
23833+ case Opt_diropq_w:
23834+ case Opt_warn_perm:
23835+ case Opt_nowarn_perm:
1facf9fc 23836+ case Opt_verbose:
23837+ case Opt_noverbose:
23838+ case Opt_sum:
23839+ case Opt_nosum:
23840+ case Opt_wsum:
dece6358
AM
23841+ case Opt_rdblk_def:
23842+ case Opt_rdhash_def:
c1595e42
JR
23843+ case Opt_acl:
23844+ case Opt_noacl:
1facf9fc 23845+ err = 0;
23846+ opt->type = token;
23847+ break;
23848+
23849+ case Opt_udba:
23850+ opt->udba = udba_val(a->args[0].from);
23851+ if (opt->udba >= 0) {
23852+ err = 0;
23853+ opt->type = token;
23854+ } else
4a4d8108 23855+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23856+ break;
23857+
23858+ case Opt_wbr_create:
23859+ u.create = &opt->wbr_create;
23860+ u.create->wbr_create
23861+ = au_wbr_create_val(a->args[0].from, u.create);
23862+ if (u.create->wbr_create >= 0) {
23863+ err = 0;
23864+ opt->type = token;
23865+ } else
4a4d8108 23866+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23867+ break;
23868+ case Opt_wbr_copyup:
23869+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
23870+ if (opt->wbr_copyup >= 0) {
23871+ err = 0;
23872+ opt->type = token;
23873+ } else
4a4d8108 23874+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23875+ break;
23876+
076b876e
AM
23877+ case Opt_fhsm_sec:
23878+ if (unlikely(match_int(&a->args[0], &n)
23879+ || n < 0)) {
23880+ pr_err("bad integer in %s\n", opt_str);
23881+ break;
23882+ }
23883+ if (sysaufs_brs) {
23884+ opt->fhsm_second = n;
23885+ opt->type = token;
23886+ } else
23887+ pr_warn("ignored %s\n", opt_str);
23888+ err = 0;
23889+ break;
23890+
1facf9fc 23891+ case Opt_ignore:
0c3ec466 23892+ pr_warn("ignored %s\n", opt_str);
1facf9fc 23893+ /*FALLTHROUGH*/
23894+ case Opt_ignore_silent:
23895+ skipped = 1;
23896+ err = 0;
23897+ break;
23898+ case Opt_err:
4a4d8108 23899+ pr_err("unknown option %s\n", opt_str);
1facf9fc 23900+ break;
23901+ }
23902+
23903+ if (!err && !skipped) {
23904+ if (unlikely(++opt > opt_tail)) {
23905+ err = -E2BIG;
23906+ opt--;
23907+ opt->type = Opt_tail;
23908+ break;
23909+ }
23910+ opt->type = Opt_tail;
23911+ }
23912+ }
23913+
23914+ kfree(a);
23915+ dump_opts(opts);
23916+ if (unlikely(err))
23917+ au_opts_free(opts);
23918+
4f0767ce 23919+out:
1facf9fc 23920+ return err;
23921+}
23922+
23923+static int au_opt_wbr_create(struct super_block *sb,
23924+ struct au_opt_wbr_create *create)
23925+{
23926+ int err;
23927+ struct au_sbinfo *sbinfo;
23928+
dece6358
AM
23929+ SiMustWriteLock(sb);
23930+
1facf9fc 23931+ err = 1; /* handled */
23932+ sbinfo = au_sbi(sb);
23933+ if (sbinfo->si_wbr_create_ops->fin) {
23934+ err = sbinfo->si_wbr_create_ops->fin(sb);
23935+ if (!err)
23936+ err = 1;
23937+ }
23938+
23939+ sbinfo->si_wbr_create = create->wbr_create;
23940+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
23941+ switch (create->wbr_create) {
23942+ case AuWbrCreate_MFSRRV:
23943+ case AuWbrCreate_MFSRR:
392086de
AM
23944+ case AuWbrCreate_PMFSRR:
23945+ case AuWbrCreate_PMFSRRV:
1facf9fc 23946+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
23947+ /*FALLTHROUGH*/
23948+ case AuWbrCreate_MFS:
23949+ case AuWbrCreate_MFSV:
23950+ case AuWbrCreate_PMFS:
23951+ case AuWbrCreate_PMFSV:
e49829fe
JR
23952+ sbinfo->si_wbr_mfs.mfs_expire
23953+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 23954+ break;
23955+ }
23956+
23957+ if (sbinfo->si_wbr_create_ops->init)
23958+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
23959+
23960+ return err;
23961+}
23962+
23963+/*
23964+ * returns,
23965+ * plus: processed without an error
23966+ * zero: unprocessed
23967+ */
23968+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
23969+ struct au_opts *opts)
23970+{
23971+ int err;
23972+ struct au_sbinfo *sbinfo;
23973+
dece6358
AM
23974+ SiMustWriteLock(sb);
23975+
1facf9fc 23976+ err = 1; /* handled */
23977+ sbinfo = au_sbi(sb);
23978+ switch (opt->type) {
23979+ case Opt_udba:
23980+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
23981+ sbinfo->si_mntflags |= opt->udba;
23982+ opts->given_udba |= opt->udba;
23983+ break;
23984+
23985+ case Opt_plink:
23986+ au_opt_set(sbinfo->si_mntflags, PLINK);
23987+ break;
23988+ case Opt_noplink:
23989+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 23990+ au_plink_put(sb, /*verbose*/1);
1facf9fc 23991+ au_opt_clr(sbinfo->si_mntflags, PLINK);
23992+ break;
23993+ case Opt_list_plink:
23994+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
23995+ au_plink_list(sb);
23996+ break;
23997+
4a4d8108
AM
23998+ case Opt_dio:
23999+ au_opt_set(sbinfo->si_mntflags, DIO);
24000+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24001+ break;
24002+ case Opt_nodio:
24003+ au_opt_clr(sbinfo->si_mntflags, DIO);
24004+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24005+ break;
24006+
076b876e
AM
24007+ case Opt_fhsm_sec:
24008+ au_fhsm_set(sbinfo, opt->fhsm_second);
24009+ break;
24010+
1facf9fc 24011+ case Opt_diropq_a:
24012+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24013+ break;
24014+ case Opt_diropq_w:
24015+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24016+ break;
24017+
24018+ case Opt_warn_perm:
24019+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24020+ break;
24021+ case Opt_nowarn_perm:
24022+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24023+ break;
24024+
1facf9fc 24025+ case Opt_verbose:
24026+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24027+ break;
24028+ case Opt_noverbose:
24029+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24030+ break;
24031+
24032+ case Opt_sum:
24033+ au_opt_set(sbinfo->si_mntflags, SUM);
24034+ break;
24035+ case Opt_wsum:
24036+ au_opt_clr(sbinfo->si_mntflags, SUM);
24037+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24038+ case Opt_nosum:
24039+ au_opt_clr(sbinfo->si_mntflags, SUM);
24040+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24041+ break;
24042+
24043+ case Opt_wbr_create:
24044+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24045+ break;
24046+ case Opt_wbr_copyup:
24047+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24048+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24049+ break;
24050+
24051+ case Opt_dirwh:
24052+ sbinfo->si_dirwh = opt->dirwh;
24053+ break;
24054+
24055+ case Opt_rdcache:
e49829fe
JR
24056+ sbinfo->si_rdcache
24057+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 24058+ break;
24059+ case Opt_rdblk:
24060+ sbinfo->si_rdblk = opt->rdblk;
24061+ break;
dece6358
AM
24062+ case Opt_rdblk_def:
24063+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24064+ break;
1facf9fc 24065+ case Opt_rdhash:
24066+ sbinfo->si_rdhash = opt->rdhash;
24067+ break;
dece6358
AM
24068+ case Opt_rdhash_def:
24069+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24070+ break;
24071+
24072+ case Opt_shwh:
24073+ au_opt_set(sbinfo->si_mntflags, SHWH);
24074+ break;
24075+ case Opt_noshwh:
24076+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24077+ break;
1facf9fc 24078+
076b876e
AM
24079+ case Opt_dirperm1:
24080+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24081+ break;
24082+ case Opt_nodirperm1:
24083+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24084+ break;
24085+
1facf9fc 24086+ case Opt_trunc_xino:
24087+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24088+ break;
24089+ case Opt_notrunc_xino:
24090+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24091+ break;
24092+
24093+ case Opt_trunc_xino_path:
24094+ case Opt_itrunc_xino:
24095+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24096+ if (!err)
24097+ err = 1;
24098+ break;
24099+
24100+ case Opt_trunc_xib:
24101+ au_fset_opts(opts->flags, TRUNC_XIB);
24102+ break;
24103+ case Opt_notrunc_xib:
24104+ au_fclr_opts(opts->flags, TRUNC_XIB);
24105+ break;
24106+
c1595e42
JR
24107+ case Opt_acl:
24108+ sb->s_flags |= MS_POSIXACL;
24109+ break;
24110+ case Opt_noacl:
24111+ sb->s_flags &= ~MS_POSIXACL;
24112+ break;
24113+
1facf9fc 24114+ default:
24115+ err = 0;
24116+ break;
24117+ }
24118+
24119+ return err;
24120+}
24121+
24122+/*
24123+ * returns tri-state.
24124+ * plus: processed without an error
24125+ * zero: unprocessed
24126+ * minus: error
24127+ */
24128+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24129+ struct au_opts *opts)
24130+{
24131+ int err, do_refresh;
24132+
24133+ err = 0;
24134+ switch (opt->type) {
24135+ case Opt_append:
24136+ opt->add.bindex = au_sbend(sb) + 1;
24137+ if (opt->add.bindex < 0)
24138+ opt->add.bindex = 0;
24139+ goto add;
24140+ case Opt_prepend:
24141+ opt->add.bindex = 0;
f6b6e03d 24142+ add: /* indented label */
1facf9fc 24143+ case Opt_add:
24144+ err = au_br_add(sb, &opt->add,
24145+ au_ftest_opts(opts->flags, REMOUNT));
24146+ if (!err) {
24147+ err = 1;
027c5e7a 24148+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24149+ }
24150+ break;
24151+
24152+ case Opt_del:
24153+ case Opt_idel:
24154+ err = au_br_del(sb, &opt->del,
24155+ au_ftest_opts(opts->flags, REMOUNT));
24156+ if (!err) {
24157+ err = 1;
24158+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 24159+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24160+ }
24161+ break;
24162+
24163+ case Opt_mod:
24164+ case Opt_imod:
24165+ err = au_br_mod(sb, &opt->mod,
24166+ au_ftest_opts(opts->flags, REMOUNT),
24167+ &do_refresh);
24168+ if (!err) {
24169+ err = 1;
027c5e7a
AM
24170+ if (do_refresh)
24171+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24172+ }
24173+ break;
24174+ }
24175+
24176+ return err;
24177+}
24178+
24179+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24180+ struct au_opt_xino **opt_xino,
24181+ struct au_opts *opts)
24182+{
24183+ int err;
24184+ aufs_bindex_t bend, bindex;
24185+ struct dentry *root, *parent, *h_root;
24186+
24187+ err = 0;
24188+ switch (opt->type) {
24189+ case Opt_xino:
24190+ err = au_xino_set(sb, &opt->xino,
24191+ !!au_ftest_opts(opts->flags, REMOUNT));
24192+ if (unlikely(err))
24193+ break;
24194+
24195+ *opt_xino = &opt->xino;
24196+ au_xino_brid_set(sb, -1);
24197+
24198+ /* safe d_parent access */
2000de60 24199+ parent = opt->xino.file->f_path.dentry->d_parent;
1facf9fc 24200+ root = sb->s_root;
24201+ bend = au_sbend(sb);
24202+ for (bindex = 0; bindex <= bend; bindex++) {
24203+ h_root = au_h_dptr(root, bindex);
24204+ if (h_root == parent) {
24205+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24206+ break;
24207+ }
24208+ }
24209+ break;
24210+
24211+ case Opt_noxino:
24212+ au_xino_clr(sb);
24213+ au_xino_brid_set(sb, -1);
24214+ *opt_xino = (void *)-1;
24215+ break;
24216+ }
24217+
24218+ return err;
24219+}
24220+
24221+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24222+ unsigned int pending)
24223+{
076b876e 24224+ int err, fhsm;
1facf9fc 24225+ aufs_bindex_t bindex, bend;
ab036dbd 24226+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 24227+ struct au_branch *br;
24228+ struct au_wbr *wbr;
ab036dbd 24229+ struct dentry *root, *dentry;
1facf9fc 24230+ struct inode *dir, *h_dir;
24231+ struct au_sbinfo *sbinfo;
24232+ struct au_hinode *hdir;
24233+
dece6358
AM
24234+ SiMustAnyLock(sb);
24235+
1facf9fc 24236+ sbinfo = au_sbi(sb);
24237+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24238+
dece6358
AM
24239+ if (!(sb_flags & MS_RDONLY)) {
24240+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 24241+ pr_warn("first branch should be rw\n");
dece6358 24242+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
0c3ec466 24243+ pr_warn("shwh should be used with ro\n");
dece6358 24244+ }
1facf9fc 24245+
4a4d8108 24246+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 24247+ && !au_opt_test(sbinfo->si_mntflags, XINO))
0c3ec466 24248+ pr_warn("udba=*notify requires xino\n");
1facf9fc 24249+
076b876e
AM
24250+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
24251+ pr_warn("dirperm1 breaks the protection"
24252+ " by the permission bits on the lower branch\n");
24253+
1facf9fc 24254+ err = 0;
076b876e 24255+ fhsm = 0;
1facf9fc 24256+ root = sb->s_root;
5527c038 24257+ dir = d_inode(root);
1facf9fc 24258+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
ab036dbd
AM
24259+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24260+ UDBA_NONE);
1facf9fc 24261+ bend = au_sbend(sb);
24262+ for (bindex = 0; !err && bindex <= bend; bindex++) {
24263+ skip = 0;
24264+ h_dir = au_h_iptr(dir, bindex);
24265+ br = au_sbr(sb, bindex);
1facf9fc 24266+
c1595e42
JR
24267+ if ((br->br_perm & AuBrAttr_ICEX)
24268+ && !h_dir->i_op->listxattr)
24269+ br->br_perm &= ~AuBrAttr_ICEX;
24270+#if 0
24271+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24272+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24273+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24274+#endif
24275+
24276+ do_free = 0;
1facf9fc 24277+ wbr = br->br_wbr;
24278+ if (wbr)
24279+ wbr_wh_read_lock(wbr);
24280+
1e00d052 24281+ if (!au_br_writable(br->br_perm)) {
1facf9fc 24282+ do_free = !!wbr;
24283+ skip = (!wbr
24284+ || (!wbr->wbr_whbase
24285+ && !wbr->wbr_plink
24286+ && !wbr->wbr_orph));
1e00d052 24287+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 24288+ /* skip = (!br->br_whbase && !br->br_orph); */
24289+ skip = (!wbr || !wbr->wbr_whbase);
24290+ if (skip && wbr) {
24291+ if (do_plink)
24292+ skip = !!wbr->wbr_plink;
24293+ else
24294+ skip = !wbr->wbr_plink;
24295+ }
1e00d052 24296+ } else {
1facf9fc 24297+ /* skip = (br->br_whbase && br->br_ohph); */
24298+ skip = (wbr && wbr->wbr_whbase);
24299+ if (skip) {
24300+ if (do_plink)
24301+ skip = !!wbr->wbr_plink;
24302+ else
24303+ skip = !wbr->wbr_plink;
24304+ }
1facf9fc 24305+ }
24306+ if (wbr)
24307+ wbr_wh_read_unlock(wbr);
24308+
ab036dbd
AM
24309+ if (can_no_dreval) {
24310+ dentry = br->br_path.dentry;
24311+ spin_lock(&dentry->d_lock);
24312+ if (dentry->d_flags &
24313+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24314+ can_no_dreval = 0;
24315+ spin_unlock(&dentry->d_lock);
24316+ }
24317+
076b876e
AM
24318+ if (au_br_fhsm(br->br_perm)) {
24319+ fhsm++;
24320+ AuDebugOn(!br->br_fhsm);
24321+ }
24322+
1facf9fc 24323+ if (skip)
24324+ continue;
24325+
24326+ hdir = au_hi(dir, bindex);
4a4d8108 24327+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 24328+ if (wbr)
24329+ wbr_wh_write_lock(wbr);
86dc4139 24330+ err = au_wh_init(br, sb);
1facf9fc 24331+ if (wbr)
24332+ wbr_wh_write_unlock(wbr);
4a4d8108 24333+ au_hn_imtx_unlock(hdir);
1facf9fc 24334+
24335+ if (!err && do_free) {
24336+ kfree(wbr);
24337+ br->br_wbr = NULL;
24338+ }
24339+ }
24340+
ab036dbd
AM
24341+ if (can_no_dreval)
24342+ au_fset_si(sbinfo, NO_DREVAL);
24343+ else
24344+ au_fclr_si(sbinfo, NO_DREVAL);
24345+
c1595e42 24346+ if (fhsm >= 2) {
076b876e 24347+ au_fset_si(sbinfo, FHSM);
c1595e42
JR
24348+ for (bindex = bend; bindex >= 0; bindex--) {
24349+ br = au_sbr(sb, bindex);
24350+ if (au_br_fhsm(br->br_perm)) {
24351+ au_fhsm_set_bottom(sb, bindex);
24352+ break;
24353+ }
24354+ }
24355+ } else {
076b876e 24356+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
24357+ au_fhsm_set_bottom(sb, -1);
24358+ }
076b876e 24359+
1facf9fc 24360+ return err;
24361+}
24362+
24363+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24364+{
24365+ int err;
24366+ unsigned int tmp;
027c5e7a 24367+ aufs_bindex_t bindex, bend;
1facf9fc 24368+ struct au_opt *opt;
24369+ struct au_opt_xino *opt_xino, xino;
24370+ struct au_sbinfo *sbinfo;
027c5e7a 24371+ struct au_branch *br;
076b876e 24372+ struct inode *dir;
1facf9fc 24373+
dece6358
AM
24374+ SiMustWriteLock(sb);
24375+
1facf9fc 24376+ err = 0;
24377+ opt_xino = NULL;
24378+ opt = opts->opt;
24379+ while (err >= 0 && opt->type != Opt_tail)
24380+ err = au_opt_simple(sb, opt++, opts);
24381+ if (err > 0)
24382+ err = 0;
24383+ else if (unlikely(err < 0))
24384+ goto out;
24385+
24386+ /* disable xino and udba temporary */
24387+ sbinfo = au_sbi(sb);
24388+ tmp = sbinfo->si_mntflags;
24389+ au_opt_clr(sbinfo->si_mntflags, XINO);
24390+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
24391+
24392+ opt = opts->opt;
24393+ while (err >= 0 && opt->type != Opt_tail)
24394+ err = au_opt_br(sb, opt++, opts);
24395+ if (err > 0)
24396+ err = 0;
24397+ else if (unlikely(err < 0))
24398+ goto out;
24399+
24400+ bend = au_sbend(sb);
24401+ if (unlikely(bend < 0)) {
24402+ err = -EINVAL;
4a4d8108 24403+ pr_err("no branches\n");
1facf9fc 24404+ goto out;
24405+ }
24406+
24407+ if (au_opt_test(tmp, XINO))
24408+ au_opt_set(sbinfo->si_mntflags, XINO);
24409+ opt = opts->opt;
24410+ while (!err && opt->type != Opt_tail)
24411+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
24412+ if (unlikely(err))
24413+ goto out;
24414+
24415+ err = au_opts_verify(sb, sb->s_flags, tmp);
24416+ if (unlikely(err))
24417+ goto out;
24418+
24419+ /* restore xino */
24420+ if (au_opt_test(tmp, XINO) && !opt_xino) {
24421+ xino.file = au_xino_def(sb);
24422+ err = PTR_ERR(xino.file);
24423+ if (IS_ERR(xino.file))
24424+ goto out;
24425+
24426+ err = au_xino_set(sb, &xino, /*remount*/0);
24427+ fput(xino.file);
24428+ if (unlikely(err))
24429+ goto out;
24430+ }
24431+
24432+ /* restore udba */
027c5e7a 24433+ tmp &= AuOptMask_UDBA;
1facf9fc 24434+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
24435+ sbinfo->si_mntflags |= tmp;
24436+ bend = au_sbend(sb);
24437+ for (bindex = 0; bindex <= bend; bindex++) {
24438+ br = au_sbr(sb, bindex);
24439+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
24440+ if (unlikely(err))
24441+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
24442+ bindex, err);
24443+ /* go on even if err */
24444+ }
4a4d8108 24445+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
5527c038 24446+ dir = d_inode(sb->s_root);
4a4d8108 24447+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 24448+ }
24449+
4f0767ce 24450+out:
1facf9fc 24451+ return err;
24452+}
24453+
24454+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
24455+{
24456+ int err, rerr;
ab036dbd 24457+ unsigned char no_dreval;
1facf9fc 24458+ struct inode *dir;
24459+ struct au_opt_xino *opt_xino;
24460+ struct au_opt *opt;
24461+ struct au_sbinfo *sbinfo;
24462+
dece6358
AM
24463+ SiMustWriteLock(sb);
24464+
ab036dbd 24465+ err = 0;
5527c038 24466+ dir = d_inode(sb->s_root);
1facf9fc 24467+ sbinfo = au_sbi(sb);
1facf9fc 24468+ opt_xino = NULL;
24469+ opt = opts->opt;
24470+ while (err >= 0 && opt->type != Opt_tail) {
24471+ err = au_opt_simple(sb, opt, opts);
24472+ if (!err)
24473+ err = au_opt_br(sb, opt, opts);
24474+ if (!err)
24475+ err = au_opt_xino(sb, opt, &opt_xino, opts);
24476+ opt++;
24477+ }
24478+ if (err > 0)
24479+ err = 0;
24480+ AuTraceErr(err);
24481+ /* go on even err */
24482+
ab036dbd 24483+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 24484+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
24485+ if (unlikely(rerr && !err))
24486+ err = rerr;
24487+
ab036dbd
AM
24488+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
24489+ au_fset_opts(opts->flags, REFRESH_IDOP);
24490+
1facf9fc 24491+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
24492+ rerr = au_xib_trunc(sb);
24493+ if (unlikely(rerr && !err))
24494+ err = rerr;
24495+ }
24496+
24497+ /* will be handled by the caller */
027c5e7a 24498+ if (!au_ftest_opts(opts->flags, REFRESH)
ab036dbd
AM
24499+ && (opts->given_udba
24500+ || au_opt_test(sbinfo->si_mntflags, XINO)
24501+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
24502+ ))
027c5e7a 24503+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24504+
24505+ AuDbg("status 0x%x\n", opts->flags);
24506+ return err;
24507+}
24508+
24509+/* ---------------------------------------------------------------------- */
24510+
24511+unsigned int au_opt_udba(struct super_block *sb)
24512+{
24513+ return au_mntflags(sb) & AuOptMask_UDBA;
24514+}
7f207e10
AM
24515diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
24516--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
24517+++ linux/fs/aufs/opts.h 2015-12-10 17:59:16.839499823 +0100
24518@@ -0,0 +1,211 @@
1facf9fc 24519+/*
2000de60 24520+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 24521+ *
24522+ * This program, aufs is free software; you can redistribute it and/or modify
24523+ * it under the terms of the GNU General Public License as published by
24524+ * the Free Software Foundation; either version 2 of the License, or
24525+ * (at your option) any later version.
dece6358
AM
24526+ *
24527+ * This program is distributed in the hope that it will be useful,
24528+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24529+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24530+ * GNU General Public License for more details.
24531+ *
24532+ * You should have received a copy of the GNU General Public License
523b37e3 24533+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24534+ */
24535+
24536+/*
24537+ * mount options/flags
24538+ */
24539+
24540+#ifndef __AUFS_OPTS_H__
24541+#define __AUFS_OPTS_H__
24542+
24543+#ifdef __KERNEL__
24544+
dece6358 24545+#include <linux/path.h>
1facf9fc 24546+
dece6358
AM
24547+struct file;
24548+struct super_block;
24549+
1facf9fc 24550+/* ---------------------------------------------------------------------- */
24551+
24552+/* mount flags */
24553+#define AuOpt_XINO 1 /* external inode number bitmap
24554+ and translation table */
24555+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
24556+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
24557+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 24558+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
24559+#define AuOpt_SHWH (1 << 5) /* show whiteout */
24560+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
24561+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
24562+ bits */
dece6358
AM
24563+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
24564+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
24565+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
24566+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
24567+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 24568+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 24569+
4a4d8108
AM
24570+#ifndef CONFIG_AUFS_HNOTIFY
24571+#undef AuOpt_UDBA_HNOTIFY
24572+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 24573+#endif
dece6358
AM
24574+#ifndef CONFIG_AUFS_SHWH
24575+#undef AuOpt_SHWH
24576+#define AuOpt_SHWH 0
24577+#endif
1facf9fc 24578+
24579+#define AuOpt_Def (AuOpt_XINO \
24580+ | AuOpt_UDBA_REVAL \
24581+ | AuOpt_PLINK \
24582+ /* | AuOpt_DIRPERM1 */ \
24583+ | AuOpt_WARN_PERM)
24584+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
24585+ | AuOpt_UDBA_REVAL \
4a4d8108 24586+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 24587+
24588+#define au_opt_test(flags, name) (flags & AuOpt_##name)
24589+#define au_opt_set(flags, name) do { \
24590+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
24591+ ((flags) |= AuOpt_##name); \
24592+} while (0)
24593+#define au_opt_set_udba(flags, name) do { \
24594+ (flags) &= ~AuOptMask_UDBA; \
24595+ ((flags) |= AuOpt_##name); \
24596+} while (0)
7f207e10
AM
24597+#define au_opt_clr(flags, name) do { \
24598+ ((flags) &= ~AuOpt_##name); \
24599+} while (0)
1facf9fc 24600+
e49829fe
JR
24601+static inline unsigned int au_opts_plink(unsigned int mntflags)
24602+{
24603+#ifdef CONFIG_PROC_FS
24604+ return mntflags;
24605+#else
24606+ return mntflags & ~AuOpt_PLINK;
24607+#endif
24608+}
24609+
1facf9fc 24610+/* ---------------------------------------------------------------------- */
24611+
24612+/* policies to select one among multiple writable branches */
24613+enum {
24614+ AuWbrCreate_TDP, /* top down parent */
24615+ AuWbrCreate_RR, /* round robin */
24616+ AuWbrCreate_MFS, /* most free space */
24617+ AuWbrCreate_MFSV, /* mfs with seconds */
24618+ AuWbrCreate_MFSRR, /* mfs then rr */
24619+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
24620+ AuWbrCreate_PMFS, /* parent and mfs */
24621+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
24622+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
24623+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 24624+
24625+ AuWbrCreate_Def = AuWbrCreate_TDP
24626+};
24627+
24628+enum {
24629+ AuWbrCopyup_TDP, /* top down parent */
24630+ AuWbrCopyup_BUP, /* bottom up parent */
24631+ AuWbrCopyup_BU, /* bottom up */
24632+
24633+ AuWbrCopyup_Def = AuWbrCopyup_TDP
24634+};
24635+
24636+/* ---------------------------------------------------------------------- */
24637+
24638+struct au_opt_add {
24639+ aufs_bindex_t bindex;
24640+ char *pathname;
24641+ int perm;
24642+ struct path path;
24643+};
24644+
24645+struct au_opt_del {
24646+ char *pathname;
24647+ struct path h_path;
24648+};
24649+
24650+struct au_opt_mod {
24651+ char *path;
24652+ int perm;
24653+ struct dentry *h_root;
24654+};
24655+
24656+struct au_opt_xino {
24657+ char *path;
24658+ struct file *file;
24659+};
24660+
24661+struct au_opt_xino_itrunc {
24662+ aufs_bindex_t bindex;
24663+};
24664+
24665+struct au_opt_wbr_create {
24666+ int wbr_create;
24667+ int mfs_second;
24668+ unsigned long long mfsrr_watermark;
24669+};
24670+
24671+struct au_opt {
24672+ int type;
24673+ union {
24674+ struct au_opt_xino xino;
24675+ struct au_opt_xino_itrunc xino_itrunc;
24676+ struct au_opt_add add;
24677+ struct au_opt_del del;
24678+ struct au_opt_mod mod;
24679+ int dirwh;
24680+ int rdcache;
24681+ unsigned int rdblk;
24682+ unsigned int rdhash;
24683+ int udba;
24684+ struct au_opt_wbr_create wbr_create;
24685+ int wbr_copyup;
076b876e 24686+ unsigned int fhsm_second;
1facf9fc 24687+ };
24688+};
24689+
24690+/* opts flags */
24691+#define AuOpts_REMOUNT 1
027c5e7a
AM
24692+#define AuOpts_REFRESH (1 << 1)
24693+#define AuOpts_TRUNC_XIB (1 << 2)
24694+#define AuOpts_REFRESH_DYAOP (1 << 3)
ab036dbd 24695+#define AuOpts_REFRESH_IDOP (1 << 4)
1facf9fc 24696+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
24697+#define au_fset_opts(flags, name) \
24698+ do { (flags) |= AuOpts_##name; } while (0)
24699+#define au_fclr_opts(flags, name) \
24700+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 24701+
24702+struct au_opts {
24703+ struct au_opt *opt;
24704+ int max_opt;
24705+
24706+ unsigned int given_udba;
24707+ unsigned int flags;
24708+ unsigned long sb_flags;
24709+};
24710+
24711+/* ---------------------------------------------------------------------- */
24712+
7e9cd9fe 24713+/* opts.c */
076b876e 24714+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 24715+const char *au_optstr_udba(int udba);
24716+const char *au_optstr_wbr_copyup(int wbr_copyup);
24717+const char *au_optstr_wbr_create(int wbr_create);
24718+
24719+void au_opts_free(struct au_opts *opts);
24720+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
24721+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24722+ unsigned int pending);
24723+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
24724+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
24725+
24726+unsigned int au_opt_udba(struct super_block *sb);
24727+
1facf9fc 24728+#endif /* __KERNEL__ */
24729+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
24730diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
24731--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 24732+++ linux/fs/aufs/plink.c 2015-09-24 10:47:58.254719746 +0200
5527c038 24733@@ -0,0 +1,528 @@
1facf9fc 24734+/*
2000de60 24735+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 24736+ *
24737+ * This program, aufs is free software; you can redistribute it and/or modify
24738+ * it under the terms of the GNU General Public License as published by
24739+ * the Free Software Foundation; either version 2 of the License, or
24740+ * (at your option) any later version.
dece6358
AM
24741+ *
24742+ * This program is distributed in the hope that it will be useful,
24743+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24744+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24745+ * GNU General Public License for more details.
24746+ *
24747+ * You should have received a copy of the GNU General Public License
523b37e3 24748+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24749+ */
24750+
24751+/*
24752+ * pseudo-link
24753+ */
24754+
24755+#include "aufs.h"
24756+
24757+/*
e49829fe 24758+ * the pseudo-link maintenance mode.
1facf9fc 24759+ * during a user process maintains the pseudo-links,
24760+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
24761+ *
24762+ * Flags
24763+ * NOPLM:
24764+ * For entry functions which will handle plink, and i_mutex is already held
24765+ * in VFS.
24766+ * They cannot wait and should return an error at once.
24767+ * Callers has to check the error.
24768+ * NOPLMW:
24769+ * For entry functions which will handle plink, but i_mutex is not held
24770+ * in VFS.
24771+ * They can wait the plink maintenance mode to finish.
24772+ *
24773+ * They behave like F_SETLK and F_SETLKW.
24774+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 24775+ */
e49829fe
JR
24776+
24777+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 24778+{
e49829fe
JR
24779+ int err;
24780+ pid_t pid, ppid;
24781+ struct au_sbinfo *sbi;
dece6358
AM
24782+
24783+ SiMustAnyLock(sb);
24784+
e49829fe
JR
24785+ err = 0;
24786+ if (!au_opt_test(au_mntflags(sb), PLINK))
24787+ goto out;
24788+
24789+ sbi = au_sbi(sb);
24790+ pid = sbi->si_plink_maint_pid;
24791+ if (!pid || pid == current->pid)
24792+ goto out;
24793+
24794+ /* todo: it highly depends upon /sbin/mount.aufs */
24795+ rcu_read_lock();
24796+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
24797+ rcu_read_unlock();
24798+ if (pid == ppid)
24799+ goto out;
24800+
24801+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
24802+ /* if there is no i_mutex lock in VFS, we don't need to wait */
24803+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
24804+ while (sbi->si_plink_maint_pid) {
24805+ si_read_unlock(sb);
24806+ /* gave up wake_up_bit() */
24807+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
24808+
24809+ if (au_ftest_lock(flags, FLUSH))
24810+ au_nwt_flush(&sbi->si_nowait);
24811+ si_noflush_read_lock(sb);
24812+ }
24813+ } else if (au_ftest_lock(flags, NOPLM)) {
24814+ AuDbg("ppid %d, pid %d\n", ppid, pid);
24815+ err = -EAGAIN;
24816+ }
24817+
24818+out:
24819+ return err;
4a4d8108
AM
24820+}
24821+
e49829fe 24822+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 24823+{
4a4d8108 24824+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 24825+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 24826+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 24827+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
24828+}
24829+
e49829fe 24830+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
24831+{
24832+ int err;
4a4d8108
AM
24833+ struct au_sbinfo *sbinfo;
24834+
24835+ err = 0;
4a4d8108
AM
24836+ sbinfo = au_sbi(sb);
24837+ /* make sure i am the only one in this fs */
e49829fe
JR
24838+ si_write_lock(sb, AuLock_FLUSH);
24839+ if (au_opt_test(au_mntflags(sb), PLINK)) {
24840+ spin_lock(&sbinfo->si_plink_maint_lock);
24841+ if (!sbinfo->si_plink_maint_pid)
24842+ sbinfo->si_plink_maint_pid = current->pid;
24843+ else
24844+ err = -EBUSY;
24845+ spin_unlock(&sbinfo->si_plink_maint_lock);
24846+ }
4a4d8108
AM
24847+ si_write_unlock(sb);
24848+
24849+ return err;
1facf9fc 24850+}
24851+
24852+/* ---------------------------------------------------------------------- */
24853+
1facf9fc 24854+#ifdef CONFIG_AUFS_DEBUG
24855+void au_plink_list(struct super_block *sb)
24856+{
86dc4139 24857+ int i;
1facf9fc 24858+ struct au_sbinfo *sbinfo;
86dc4139 24859+ struct hlist_head *plink_hlist;
1facf9fc 24860+ struct pseudo_link *plink;
24861+
dece6358
AM
24862+ SiMustAnyLock(sb);
24863+
1facf9fc 24864+ sbinfo = au_sbi(sb);
24865+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 24866+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 24867+
86dc4139
AM
24868+ for (i = 0; i < AuPlink_NHASH; i++) {
24869+ plink_hlist = &sbinfo->si_plink[i].head;
24870+ rcu_read_lock();
24871+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
24872+ AuDbg("%lu\n", plink->inode->i_ino);
24873+ rcu_read_unlock();
24874+ }
1facf9fc 24875+}
24876+#endif
24877+
24878+/* is the inode pseudo-linked? */
24879+int au_plink_test(struct inode *inode)
24880+{
86dc4139 24881+ int found, i;
1facf9fc 24882+ struct au_sbinfo *sbinfo;
86dc4139 24883+ struct hlist_head *plink_hlist;
1facf9fc 24884+ struct pseudo_link *plink;
24885+
24886+ sbinfo = au_sbi(inode->i_sb);
dece6358 24887+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 24888+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 24889+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 24890+
24891+ found = 0;
86dc4139
AM
24892+ i = au_plink_hash(inode->i_ino);
24893+ plink_hlist = &sbinfo->si_plink[i].head;
4a4d8108 24894+ rcu_read_lock();
86dc4139 24895+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
1facf9fc 24896+ if (plink->inode == inode) {
24897+ found = 1;
24898+ break;
24899+ }
4a4d8108 24900+ rcu_read_unlock();
1facf9fc 24901+ return found;
24902+}
24903+
24904+/* ---------------------------------------------------------------------- */
24905+
24906+/*
24907+ * generate a name for plink.
24908+ * the file will be stored under AUFS_WH_PLINKDIR.
24909+ */
24910+/* 20 is max digits length of ulong 64 */
24911+#define PLINK_NAME_LEN ((20 + 1) * 2)
24912+
24913+static int plink_name(char *name, int len, struct inode *inode,
24914+ aufs_bindex_t bindex)
24915+{
24916+ int rlen;
24917+ struct inode *h_inode;
24918+
24919+ h_inode = au_h_iptr(inode, bindex);
24920+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
24921+ return rlen;
24922+}
24923+
7f207e10
AM
24924+struct au_do_plink_lkup_args {
24925+ struct dentry **errp;
24926+ struct qstr *tgtname;
24927+ struct dentry *h_parent;
24928+ struct au_branch *br;
24929+};
24930+
24931+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
24932+ struct dentry *h_parent,
24933+ struct au_branch *br)
24934+{
24935+ struct dentry *h_dentry;
24936+ struct mutex *h_mtx;
24937+
5527c038 24938+ h_mtx = &d_inode(h_parent)->i_mutex;
7f207e10 24939+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
b4510431 24940+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
7f207e10
AM
24941+ mutex_unlock(h_mtx);
24942+ return h_dentry;
24943+}
24944+
24945+static void au_call_do_plink_lkup(void *args)
24946+{
24947+ struct au_do_plink_lkup_args *a = args;
24948+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
24949+}
24950+
1facf9fc 24951+/* lookup the plink-ed @inode under the branch at @bindex */
24952+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
24953+{
24954+ struct dentry *h_dentry, *h_parent;
24955+ struct au_branch *br;
7f207e10 24956+ int wkq_err;
1facf9fc 24957+ char a[PLINK_NAME_LEN];
0c3ec466 24958+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 24959+
e49829fe
JR
24960+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
24961+
1facf9fc 24962+ br = au_sbr(inode->i_sb, bindex);
24963+ h_parent = br->br_wbr->wbr_plink;
1facf9fc 24964+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
24965+
2dfbb274 24966+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
24967+ struct au_do_plink_lkup_args args = {
24968+ .errp = &h_dentry,
24969+ .tgtname = &tgtname,
24970+ .h_parent = h_parent,
24971+ .br = br
24972+ };
24973+
24974+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
24975+ if (unlikely(wkq_err))
24976+ h_dentry = ERR_PTR(wkq_err);
24977+ } else
24978+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
24979+
1facf9fc 24980+ return h_dentry;
24981+}
24982+
24983+/* create a pseudo-link */
24984+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
24985+ struct dentry *h_dentry, struct au_branch *br)
24986+{
24987+ int err;
24988+ struct path h_path = {
86dc4139 24989+ .mnt = au_br_mnt(br)
1facf9fc 24990+ };
523b37e3 24991+ struct inode *h_dir, *delegated;
1facf9fc 24992+
5527c038 24993+ h_dir = d_inode(h_parent);
7f207e10 24994+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 24995+again:
b4510431 24996+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 24997+ err = PTR_ERR(h_path.dentry);
24998+ if (IS_ERR(h_path.dentry))
24999+ goto out;
25000+
25001+ err = 0;
25002+ /* wh.plink dir is not monitored */
7f207e10 25003+ /* todo: is it really safe? */
5527c038
JR
25004+ if (d_is_positive(h_path.dentry)
25005+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
523b37e3
AM
25006+ delegated = NULL;
25007+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25008+ if (unlikely(err == -EWOULDBLOCK)) {
25009+ pr_warn("cannot retry for NFSv4 delegation"
25010+ " for an internal unlink\n");
25011+ iput(delegated);
25012+ }
1facf9fc 25013+ dput(h_path.dentry);
25014+ h_path.dentry = NULL;
25015+ if (!err)
25016+ goto again;
25017+ }
5527c038 25018+ if (!err && d_is_negative(h_path.dentry)) {
523b37e3
AM
25019+ delegated = NULL;
25020+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25021+ if (unlikely(err == -EWOULDBLOCK)) {
25022+ pr_warn("cannot retry for NFSv4 delegation"
25023+ " for an internal link\n");
25024+ iput(delegated);
25025+ }
25026+ }
1facf9fc 25027+ dput(h_path.dentry);
25028+
4f0767ce 25029+out:
7f207e10 25030+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 25031+ return err;
25032+}
25033+
25034+struct do_whplink_args {
25035+ int *errp;
25036+ struct qstr *tgt;
25037+ struct dentry *h_parent;
25038+ struct dentry *h_dentry;
25039+ struct au_branch *br;
25040+};
25041+
25042+static void call_do_whplink(void *args)
25043+{
25044+ struct do_whplink_args *a = args;
25045+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25046+}
25047+
25048+static int whplink(struct dentry *h_dentry, struct inode *inode,
25049+ aufs_bindex_t bindex, struct au_branch *br)
25050+{
25051+ int err, wkq_err;
25052+ struct au_wbr *wbr;
25053+ struct dentry *h_parent;
1facf9fc 25054+ char a[PLINK_NAME_LEN];
0c3ec466 25055+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25056+
25057+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25058+ h_parent = wbr->wbr_plink;
1facf9fc 25059+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25060+
25061+ /* always superio. */
2dfbb274 25062+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 25063+ struct do_whplink_args args = {
25064+ .errp = &err,
25065+ .tgt = &tgtname,
25066+ .h_parent = h_parent,
25067+ .h_dentry = h_dentry,
25068+ .br = br
25069+ };
25070+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25071+ if (unlikely(wkq_err))
25072+ err = wkq_err;
25073+ } else
25074+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 25075+
25076+ return err;
25077+}
25078+
25079+/* free a single plink */
25080+static void do_put_plink(struct pseudo_link *plink, int do_del)
25081+{
1facf9fc 25082+ if (do_del)
86dc4139 25083+ hlist_del(&plink->hlist);
4a4d8108
AM
25084+ iput(plink->inode);
25085+ kfree(plink);
25086+}
25087+
25088+static void do_put_plink_rcu(struct rcu_head *rcu)
25089+{
25090+ struct pseudo_link *plink;
25091+
25092+ plink = container_of(rcu, struct pseudo_link, rcu);
25093+ iput(plink->inode);
1facf9fc 25094+ kfree(plink);
25095+}
25096+
25097+/*
25098+ * create a new pseudo-link for @h_dentry on @bindex.
25099+ * the linked inode is held in aufs @inode.
25100+ */
25101+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25102+ struct dentry *h_dentry)
25103+{
25104+ struct super_block *sb;
25105+ struct au_sbinfo *sbinfo;
86dc4139 25106+ struct hlist_head *plink_hlist;
4a4d8108 25107+ struct pseudo_link *plink, *tmp;
86dc4139
AM
25108+ struct au_sphlhead *sphl;
25109+ int found, err, cnt, i;
1facf9fc 25110+
25111+ sb = inode->i_sb;
25112+ sbinfo = au_sbi(sb);
25113+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25114+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25115+
86dc4139 25116+ found = au_plink_test(inode);
4a4d8108 25117+ if (found)
1facf9fc 25118+ return;
4a4d8108 25119+
86dc4139
AM
25120+ i = au_plink_hash(inode->i_ino);
25121+ sphl = sbinfo->si_plink + i;
25122+ plink_hlist = &sphl->head;
4a4d8108
AM
25123+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
25124+ if (tmp)
25125+ tmp->inode = au_igrab(inode);
25126+ else {
25127+ err = -ENOMEM;
25128+ goto out;
1facf9fc 25129+ }
25130+
86dc4139
AM
25131+ spin_lock(&sphl->spin);
25132+ hlist_for_each_entry(plink, plink_hlist, hlist) {
4a4d8108
AM
25133+ if (plink->inode == inode) {
25134+ found = 1;
25135+ break;
25136+ }
1facf9fc 25137+ }
4a4d8108 25138+ if (!found)
86dc4139
AM
25139+ hlist_add_head_rcu(&tmp->hlist, plink_hlist);
25140+ spin_unlock(&sphl->spin);
4a4d8108 25141+ if (!found) {
86dc4139
AM
25142+ cnt = au_sphl_count(sphl);
25143+#define msg "unexpectedly unblanced or too many pseudo-links"
25144+ if (cnt > AUFS_PLINK_WARN)
25145+ AuWarn1(msg ", %d\n", cnt);
25146+#undef msg
1facf9fc 25147+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
25148+ } else {
25149+ do_put_plink(tmp, 0);
25150+ return;
1facf9fc 25151+ }
25152+
4a4d8108 25153+out:
1facf9fc 25154+ if (unlikely(err)) {
0c3ec466 25155+ pr_warn("err %d, damaged pseudo link.\n", err);
4a4d8108 25156+ if (tmp) {
86dc4139 25157+ au_sphl_del_rcu(&tmp->hlist, sphl);
4a4d8108
AM
25158+ call_rcu(&tmp->rcu, do_put_plink_rcu);
25159+ }
1facf9fc 25160+ }
25161+}
25162+
25163+/* free all plinks */
e49829fe 25164+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 25165+{
86dc4139 25166+ int i, warned;
1facf9fc 25167+ struct au_sbinfo *sbinfo;
86dc4139
AM
25168+ struct hlist_head *plink_hlist;
25169+ struct hlist_node *tmp;
25170+ struct pseudo_link *plink;
1facf9fc 25171+
dece6358
AM
25172+ SiMustWriteLock(sb);
25173+
1facf9fc 25174+ sbinfo = au_sbi(sb);
25175+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25176+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25177+
1facf9fc 25178+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25179+ warned = 0;
25180+ for (i = 0; i < AuPlink_NHASH; i++) {
25181+ plink_hlist = &sbinfo->si_plink[i].head;
25182+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25183+ pr_warn("pseudo-link is not flushed");
25184+ warned = 1;
25185+ }
25186+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist)
25187+ do_put_plink(plink, 0);
25188+ INIT_HLIST_HEAD(plink_hlist);
25189+ }
1facf9fc 25190+}
25191+
e49829fe
JR
25192+void au_plink_clean(struct super_block *sb, int verbose)
25193+{
25194+ struct dentry *root;
25195+
25196+ root = sb->s_root;
25197+ aufs_write_lock(root);
25198+ if (au_opt_test(au_mntflags(sb), PLINK))
25199+ au_plink_put(sb, verbose);
25200+ aufs_write_unlock(root);
25201+}
25202+
86dc4139
AM
25203+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25204+{
25205+ int do_put;
25206+ aufs_bindex_t bstart, bend, bindex;
25207+
25208+ do_put = 0;
25209+ bstart = au_ibstart(inode);
25210+ bend = au_ibend(inode);
25211+ if (bstart >= 0) {
25212+ for (bindex = bstart; bindex <= bend; bindex++) {
25213+ if (!au_h_iptr(inode, bindex)
25214+ || au_ii_br_id(inode, bindex) != br_id)
25215+ continue;
25216+ au_set_h_iptr(inode, bindex, NULL, 0);
25217+ do_put = 1;
25218+ break;
25219+ }
25220+ if (do_put)
25221+ for (bindex = bstart; bindex <= bend; bindex++)
25222+ if (au_h_iptr(inode, bindex)) {
25223+ do_put = 0;
25224+ break;
25225+ }
25226+ } else
25227+ do_put = 1;
25228+
25229+ return do_put;
25230+}
25231+
1facf9fc 25232+/* free the plinks on a branch specified by @br_id */
25233+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25234+{
25235+ struct au_sbinfo *sbinfo;
86dc4139
AM
25236+ struct hlist_head *plink_hlist;
25237+ struct hlist_node *tmp;
25238+ struct pseudo_link *plink;
1facf9fc 25239+ struct inode *inode;
86dc4139 25240+ int i, do_put;
1facf9fc 25241+
dece6358
AM
25242+ SiMustWriteLock(sb);
25243+
1facf9fc 25244+ sbinfo = au_sbi(sb);
25245+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25246+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25247+
1facf9fc 25248+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25249+ for (i = 0; i < AuPlink_NHASH; i++) {
25250+ plink_hlist = &sbinfo->si_plink[i].head;
25251+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) {
25252+ inode = au_igrab(plink->inode);
25253+ ii_write_lock_child(inode);
25254+ do_put = au_plink_do_half_refresh(inode, br_id);
dece6358
AM
25255+ if (do_put)
25256+ do_put_plink(plink, 1);
86dc4139
AM
25257+ ii_write_unlock(inode);
25258+ iput(inode);
dece6358 25259+ }
dece6358
AM
25260+ }
25261+}
7f207e10
AM
25262diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25263--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 25264+++ linux/fs/aufs/poll.c 2015-09-24 10:47:58.254719746 +0200
b912730e 25265@@ -0,0 +1,52 @@
dece6358 25266+/*
2000de60 25267+ * Copyright (C) 2005-2015 Junjiro R. Okajima
dece6358
AM
25268+ *
25269+ * This program, aufs is free software; you can redistribute it and/or modify
25270+ * it under the terms of the GNU General Public License as published by
25271+ * the Free Software Foundation; either version 2 of the License, or
25272+ * (at your option) any later version.
25273+ *
25274+ * This program is distributed in the hope that it will be useful,
25275+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25276+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25277+ * GNU General Public License for more details.
25278+ *
25279+ * You should have received a copy of the GNU General Public License
523b37e3 25280+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
25281+ */
25282+
1308ab2a 25283+/*
25284+ * poll operation
25285+ * There is only one filesystem which implements ->poll operation, currently.
25286+ */
25287+
25288+#include "aufs.h"
25289+
25290+unsigned int aufs_poll(struct file *file, poll_table *wait)
25291+{
25292+ unsigned int mask;
25293+ int err;
25294+ struct file *h_file;
1308ab2a 25295+ struct super_block *sb;
25296+
25297+ /* We should pretend an error happened. */
25298+ mask = POLLERR /* | POLLIN | POLLOUT */;
b912730e 25299+ sb = file->f_path.dentry->d_sb;
e49829fe 25300+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
25301+
25302+ h_file = au_read_pre(file, /*keep_fi*/0);
25303+ err = PTR_ERR(h_file);
25304+ if (IS_ERR(h_file))
1308ab2a 25305+ goto out;
25306+
25307+ /* it is not an error if h_file has no operation */
25308+ mask = DEFAULT_POLLMASK;
523b37e3 25309+ if (h_file->f_op->poll)
1308ab2a 25310+ mask = h_file->f_op->poll(h_file, wait);
b912730e 25311+ fput(h_file); /* instead of au_read_post() */
1308ab2a 25312+
4f0767ce 25313+out:
1308ab2a 25314+ si_read_unlock(sb);
25315+ AuTraceErr((int)mask);
25316+ return mask;
25317+}
c1595e42
JR
25318diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25319--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 25320+++ linux/fs/aufs/posix_acl.c 2015-09-24 10:47:58.254719746 +0200
c1595e42
JR
25321@@ -0,0 +1,99 @@
25322+/*
2000de60 25323+ * Copyright (C) 2014-2015 Junjiro R. Okajima
c1595e42
JR
25324+ *
25325+ * This program, aufs is free software; you can redistribute it and/or modify
25326+ * it under the terms of the GNU General Public License as published by
25327+ * the Free Software Foundation; either version 2 of the License, or
25328+ * (at your option) any later version.
25329+ *
25330+ * This program is distributed in the hope that it will be useful,
25331+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25332+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25333+ * GNU General Public License for more details.
25334+ *
25335+ * You should have received a copy of the GNU General Public License
25336+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25337+ */
25338+
25339+/*
25340+ * posix acl operations
25341+ */
25342+
25343+#include <linux/fs.h>
25344+#include <linux/posix_acl.h>
25345+#include "aufs.h"
25346+
25347+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25348+{
25349+ struct posix_acl *acl;
25350+ int err;
25351+ aufs_bindex_t bindex;
25352+ struct inode *h_inode;
25353+ struct super_block *sb;
25354+
25355+ acl = NULL;
25356+ sb = inode->i_sb;
25357+ si_read_lock(sb, AuLock_FLUSH);
25358+ ii_read_lock_child(inode);
25359+ if (!(sb->s_flags & MS_POSIXACL))
25360+ goto out;
25361+
25362+ bindex = au_ibstart(inode);
25363+ h_inode = au_h_iptr(inode, bindex);
25364+ if (unlikely(!h_inode
25365+ || ((h_inode->i_mode & S_IFMT)
25366+ != (inode->i_mode & S_IFMT)))) {
25367+ err = au_busy_or_stale();
25368+ acl = ERR_PTR(err);
25369+ goto out;
25370+ }
25371+
25372+ /* always topmost only */
25373+ acl = get_acl(h_inode, type);
25374+
25375+out:
25376+ ii_read_unlock(inode);
25377+ si_read_unlock(sb);
25378+
25379+ AuTraceErrPtr(acl);
25380+ return acl;
25381+}
25382+
25383+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25384+{
25385+ int err;
25386+ ssize_t ssz;
25387+ struct dentry *dentry;
25388+ struct au_srxattr arg = {
25389+ .type = AU_ACL_SET,
25390+ .u.acl_set = {
25391+ .acl = acl,
25392+ .type = type
25393+ },
25394+ };
25395+
25396+ mutex_lock(&inode->i_mutex);
25397+ if (inode->i_ino == AUFS_ROOT_INO)
25398+ dentry = dget(inode->i_sb->s_root);
25399+ else {
25400+ dentry = d_find_alias(inode);
25401+ if (!dentry)
25402+ dentry = d_find_any_alias(inode);
25403+ if (!dentry) {
25404+ pr_warn("cannot handle this inode, "
25405+ "please report to aufs-users ML\n");
25406+ err = -ENOENT;
25407+ goto out;
25408+ }
25409+ }
25410+
25411+ ssz = au_srxattr(dentry, &arg);
25412+ dput(dentry);
25413+ err = ssz;
25414+ if (ssz >= 0)
25415+ err = 0;
25416+
25417+out:
25418+ mutex_unlock(&inode->i_mutex);
25419+ return err;
25420+}
7f207e10
AM
25421diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
25422--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 25423+++ linux/fs/aufs/procfs.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 25424@@ -0,0 +1,169 @@
e49829fe 25425+/*
2000de60 25426+ * Copyright (C) 2010-2015 Junjiro R. Okajima
e49829fe
JR
25427+ *
25428+ * This program, aufs is free software; you can redistribute it and/or modify
25429+ * it under the terms of the GNU General Public License as published by
25430+ * the Free Software Foundation; either version 2 of the License, or
25431+ * (at your option) any later version.
25432+ *
25433+ * This program is distributed in the hope that it will be useful,
25434+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25435+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25436+ * GNU General Public License for more details.
25437+ *
25438+ * You should have received a copy of the GNU General Public License
523b37e3 25439+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
25440+ */
25441+
25442+/*
25443+ * procfs interfaces
25444+ */
25445+
25446+#include <linux/proc_fs.h>
25447+#include "aufs.h"
25448+
25449+static int au_procfs_plm_release(struct inode *inode, struct file *file)
25450+{
25451+ struct au_sbinfo *sbinfo;
25452+
25453+ sbinfo = file->private_data;
25454+ if (sbinfo) {
25455+ au_plink_maint_leave(sbinfo);
25456+ kobject_put(&sbinfo->si_kobj);
25457+ }
25458+
25459+ return 0;
25460+}
25461+
25462+static void au_procfs_plm_write_clean(struct file *file)
25463+{
25464+ struct au_sbinfo *sbinfo;
25465+
25466+ sbinfo = file->private_data;
25467+ if (sbinfo)
25468+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
25469+}
25470+
25471+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
25472+{
25473+ int err;
25474+ struct super_block *sb;
25475+ struct au_sbinfo *sbinfo;
25476+
25477+ err = -EBUSY;
25478+ if (unlikely(file->private_data))
25479+ goto out;
25480+
25481+ sb = NULL;
53392da6 25482+ /* don't use au_sbilist_lock() here */
e49829fe
JR
25483+ spin_lock(&au_sbilist.spin);
25484+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
25485+ if (id == sysaufs_si_id(sbinfo)) {
25486+ kobject_get(&sbinfo->si_kobj);
25487+ sb = sbinfo->si_sb;
25488+ break;
25489+ }
25490+ spin_unlock(&au_sbilist.spin);
25491+
25492+ err = -EINVAL;
25493+ if (unlikely(!sb))
25494+ goto out;
25495+
25496+ err = au_plink_maint_enter(sb);
25497+ if (!err)
25498+ /* keep kobject_get() */
25499+ file->private_data = sbinfo;
25500+ else
25501+ kobject_put(&sbinfo->si_kobj);
25502+out:
25503+ return err;
25504+}
25505+
25506+/*
25507+ * Accept a valid "si=xxxx" only.
25508+ * Once it is accepted successfully, accept "clean" too.
25509+ */
25510+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
25511+ size_t count, loff_t *ppos)
25512+{
25513+ ssize_t err;
25514+ unsigned long id;
25515+ /* last newline is allowed */
25516+ char buf[3 + sizeof(unsigned long) * 2 + 1];
25517+
25518+ err = -EACCES;
25519+ if (unlikely(!capable(CAP_SYS_ADMIN)))
25520+ goto out;
25521+
25522+ err = -EINVAL;
25523+ if (unlikely(count > sizeof(buf)))
25524+ goto out;
25525+
25526+ err = copy_from_user(buf, ubuf, count);
25527+ if (unlikely(err)) {
25528+ err = -EFAULT;
25529+ goto out;
25530+ }
25531+ buf[count] = 0;
25532+
25533+ err = -EINVAL;
25534+ if (!strcmp("clean", buf)) {
25535+ au_procfs_plm_write_clean(file);
25536+ goto out_success;
25537+ } else if (unlikely(strncmp("si=", buf, 3)))
25538+ goto out;
25539+
9dbd164d 25540+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
25541+ if (unlikely(err))
25542+ goto out;
25543+
25544+ err = au_procfs_plm_write_si(file, id);
25545+ if (unlikely(err))
25546+ goto out;
25547+
25548+out_success:
25549+ err = count; /* success */
25550+out:
25551+ return err;
25552+}
25553+
25554+static const struct file_operations au_procfs_plm_fop = {
25555+ .write = au_procfs_plm_write,
25556+ .release = au_procfs_plm_release,
25557+ .owner = THIS_MODULE
25558+};
25559+
25560+/* ---------------------------------------------------------------------- */
25561+
25562+static struct proc_dir_entry *au_procfs_dir;
25563+
25564+void au_procfs_fin(void)
25565+{
25566+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
25567+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25568+}
25569+
25570+int __init au_procfs_init(void)
25571+{
25572+ int err;
25573+ struct proc_dir_entry *entry;
25574+
25575+ err = -ENOMEM;
25576+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
25577+ if (unlikely(!au_procfs_dir))
25578+ goto out;
25579+
25580+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
25581+ au_procfs_dir, &au_procfs_plm_fop);
25582+ if (unlikely(!entry))
25583+ goto out_dir;
25584+
25585+ err = 0;
25586+ goto out; /* success */
25587+
25588+
25589+out_dir:
25590+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25591+out:
25592+ return err;
25593+}
7f207e10
AM
25594diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
25595--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 25596+++ linux/fs/aufs/rdu.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 25597@@ -0,0 +1,388 @@
1308ab2a 25598+/*
2000de60 25599+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1308ab2a 25600+ *
25601+ * This program, aufs is free software; you can redistribute it and/or modify
25602+ * it under the terms of the GNU General Public License as published by
25603+ * the Free Software Foundation; either version 2 of the License, or
25604+ * (at your option) any later version.
25605+ *
25606+ * This program is distributed in the hope that it will be useful,
25607+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25608+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25609+ * GNU General Public License for more details.
25610+ *
25611+ * You should have received a copy of the GNU General Public License
523b37e3 25612+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 25613+ */
25614+
25615+/*
25616+ * readdir in userspace.
25617+ */
25618+
b752ccd1 25619+#include <linux/compat.h>
4a4d8108 25620+#include <linux/fs_stack.h>
1308ab2a 25621+#include <linux/security.h>
1308ab2a 25622+#include "aufs.h"
25623+
25624+/* bits for struct aufs_rdu.flags */
25625+#define AuRdu_CALLED 1
25626+#define AuRdu_CONT (1 << 1)
25627+#define AuRdu_FULL (1 << 2)
25628+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
25629+#define au_fset_rdu(flags, name) \
25630+ do { (flags) |= AuRdu_##name; } while (0)
25631+#define au_fclr_rdu(flags, name) \
25632+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 25633+
25634+struct au_rdu_arg {
392086de 25635+ struct dir_context ctx;
1308ab2a 25636+ struct aufs_rdu *rdu;
25637+ union au_rdu_ent_ul ent;
25638+ unsigned long end;
25639+
25640+ struct super_block *sb;
25641+ int err;
25642+};
25643+
392086de 25644+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 25645+ loff_t offset, u64 h_ino, unsigned int d_type)
25646+{
25647+ int err, len;
392086de 25648+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 25649+ struct aufs_rdu *rdu = arg->rdu;
25650+ struct au_rdu_ent ent;
25651+
25652+ err = 0;
25653+ arg->err = 0;
25654+ au_fset_rdu(rdu->cookie.flags, CALLED);
25655+ len = au_rdu_len(nlen);
25656+ if (arg->ent.ul + len < arg->end) {
25657+ ent.ino = h_ino;
25658+ ent.bindex = rdu->cookie.bindex;
25659+ ent.type = d_type;
25660+ ent.nlen = nlen;
4a4d8108
AM
25661+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25662+ ent.type = DT_UNKNOWN;
1308ab2a 25663+
9dbd164d 25664+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25665+ err = -EFAULT;
25666+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
25667+ goto out;
25668+ if (copy_to_user(arg->ent.e->name, name, nlen))
25669+ goto out;
25670+ /* the terminating NULL */
25671+ if (__put_user(0, arg->ent.e->name + nlen))
25672+ goto out;
25673+ err = 0;
25674+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
25675+ arg->ent.ul += len;
25676+ rdu->rent++;
25677+ } else {
25678+ err = -EFAULT;
25679+ au_fset_rdu(rdu->cookie.flags, FULL);
25680+ rdu->full = 1;
25681+ rdu->tail = arg->ent;
25682+ }
25683+
4f0767ce 25684+out:
1308ab2a 25685+ /* AuTraceErr(err); */
25686+ return err;
25687+}
25688+
25689+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
25690+{
25691+ int err;
25692+ loff_t offset;
25693+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
25694+
92d182d2 25695+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 25696+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
25697+ err = offset;
25698+ if (unlikely(offset != cookie->h_pos))
25699+ goto out;
25700+
25701+ err = 0;
25702+ do {
25703+ arg->err = 0;
25704+ au_fclr_rdu(cookie->flags, CALLED);
25705+ /* smp_mb(); */
392086de 25706+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 25707+ if (err >= 0)
25708+ err = arg->err;
25709+ } while (!err
25710+ && au_ftest_rdu(cookie->flags, CALLED)
25711+ && !au_ftest_rdu(cookie->flags, FULL));
25712+ cookie->h_pos = h_file->f_pos;
25713+
4f0767ce 25714+out:
1308ab2a 25715+ AuTraceErr(err);
25716+ return err;
25717+}
25718+
25719+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
25720+{
25721+ int err;
25722+ aufs_bindex_t bend;
392086de
AM
25723+ struct au_rdu_arg arg = {
25724+ .ctx = {
2000de60 25725+ .actor = au_rdu_fill
392086de
AM
25726+ }
25727+ };
1308ab2a 25728+ struct dentry *dentry;
25729+ struct inode *inode;
25730+ struct file *h_file;
25731+ struct au_rdu_cookie *cookie = &rdu->cookie;
25732+
25733+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
25734+ if (unlikely(err)) {
25735+ err = -EFAULT;
25736+ AuTraceErr(err);
25737+ goto out;
25738+ }
25739+ rdu->rent = 0;
25740+ rdu->tail = rdu->ent;
25741+ rdu->full = 0;
25742+ arg.rdu = rdu;
25743+ arg.ent = rdu->ent;
25744+ arg.end = arg.ent.ul;
25745+ arg.end += rdu->sz;
25746+
25747+ err = -ENOTDIR;
523b37e3 25748+ if (unlikely(!file->f_op->iterate))
1308ab2a 25749+ goto out;
25750+
25751+ err = security_file_permission(file, MAY_READ);
25752+ AuTraceErr(err);
25753+ if (unlikely(err))
25754+ goto out;
25755+
2000de60 25756+ dentry = file->f_path.dentry;
5527c038 25757+ inode = d_inode(dentry);
1308ab2a 25758+#if 1
25759+ mutex_lock(&inode->i_mutex);
25760+#else
25761+ err = mutex_lock_killable(&inode->i_mutex);
25762+ AuTraceErr(err);
25763+ if (unlikely(err))
25764+ goto out;
25765+#endif
1308ab2a 25766+
25767+ arg.sb = inode->i_sb;
e49829fe
JR
25768+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
25769+ if (unlikely(err))
25770+ goto out_mtx;
027c5e7a
AM
25771+ err = au_alive_dir(dentry);
25772+ if (unlikely(err))
25773+ goto out_si;
e49829fe 25774+ /* todo: reval? */
1308ab2a 25775+ fi_read_lock(file);
25776+
25777+ err = -EAGAIN;
25778+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
25779+ && cookie->generation != au_figen(file)))
25780+ goto out_unlock;
25781+
25782+ err = 0;
25783+ if (!rdu->blk) {
25784+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
25785+ if (!rdu->blk)
25786+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
25787+ }
25788+ bend = au_fbstart(file);
25789+ if (cookie->bindex < bend)
25790+ cookie->bindex = bend;
4a4d8108 25791+ bend = au_fbend_dir(file);
1308ab2a 25792+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
25793+ for (; !err && cookie->bindex <= bend;
25794+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 25795+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 25796+ if (!h_file)
25797+ continue;
25798+
25799+ au_fclr_rdu(cookie->flags, FULL);
25800+ err = au_rdu_do(h_file, &arg);
25801+ AuTraceErr(err);
25802+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
25803+ break;
25804+ }
25805+ AuDbg("rent %llu\n", rdu->rent);
25806+
25807+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
25808+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
25809+ au_fset_rdu(cookie->flags, CONT);
25810+ cookie->generation = au_figen(file);
25811+ }
25812+
25813+ ii_read_lock_child(inode);
25814+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
25815+ ii_read_unlock(inode);
25816+
4f0767ce 25817+out_unlock:
1308ab2a 25818+ fi_read_unlock(file);
027c5e7a 25819+out_si:
1308ab2a 25820+ si_read_unlock(arg.sb);
4f0767ce 25821+out_mtx:
1308ab2a 25822+ mutex_unlock(&inode->i_mutex);
4f0767ce 25823+out:
1308ab2a 25824+ AuTraceErr(err);
25825+ return err;
25826+}
25827+
25828+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
25829+{
25830+ int err;
25831+ ino_t ino;
25832+ unsigned long long nent;
25833+ union au_rdu_ent_ul *u;
25834+ struct au_rdu_ent ent;
25835+ struct super_block *sb;
25836+
25837+ err = 0;
25838+ nent = rdu->nent;
25839+ u = &rdu->ent;
2000de60 25840+ sb = file->f_path.dentry->d_sb;
1308ab2a 25841+ si_read_lock(sb, AuLock_FLUSH);
25842+ while (nent-- > 0) {
9dbd164d 25843+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25844+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
25845+ if (!err)
25846+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 25847+ if (unlikely(err)) {
25848+ err = -EFAULT;
25849+ AuTraceErr(err);
25850+ break;
25851+ }
25852+
25853+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
25854+ if (!ent.wh)
25855+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
25856+ else
25857+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
25858+ &ino);
25859+ if (unlikely(err)) {
25860+ AuTraceErr(err);
25861+ break;
25862+ }
25863+
25864+ err = __put_user(ino, &u->e->ino);
25865+ if (unlikely(err)) {
25866+ err = -EFAULT;
25867+ AuTraceErr(err);
25868+ break;
25869+ }
25870+ u->ul += au_rdu_len(ent.nlen);
25871+ }
25872+ si_read_unlock(sb);
25873+
25874+ return err;
25875+}
25876+
25877+/* ---------------------------------------------------------------------- */
25878+
25879+static int au_rdu_verify(struct aufs_rdu *rdu)
25880+{
b752ccd1 25881+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 25882+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 25883+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 25884+ rdu->blk,
25885+ rdu->rent, rdu->shwh, rdu->full,
25886+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
25887+ rdu->cookie.generation);
dece6358 25888+
b752ccd1 25889+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 25890+ return 0;
dece6358 25891+
b752ccd1
AM
25892+ AuDbg("%u:%u\n",
25893+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 25894+ return -EINVAL;
25895+}
25896+
25897+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 25898+{
1308ab2a 25899+ long err, e;
25900+ struct aufs_rdu rdu;
25901+ void __user *p = (void __user *)arg;
dece6358 25902+
1308ab2a 25903+ err = copy_from_user(&rdu, p, sizeof(rdu));
25904+ if (unlikely(err)) {
25905+ err = -EFAULT;
25906+ AuTraceErr(err);
25907+ goto out;
25908+ }
25909+ err = au_rdu_verify(&rdu);
dece6358
AM
25910+ if (unlikely(err))
25911+ goto out;
25912+
1308ab2a 25913+ switch (cmd) {
25914+ case AUFS_CTL_RDU:
25915+ err = au_rdu(file, &rdu);
25916+ if (unlikely(err))
25917+ break;
dece6358 25918+
1308ab2a 25919+ e = copy_to_user(p, &rdu, sizeof(rdu));
25920+ if (unlikely(e)) {
25921+ err = -EFAULT;
25922+ AuTraceErr(err);
25923+ }
25924+ break;
25925+ case AUFS_CTL_RDU_INO:
25926+ err = au_rdu_ino(file, &rdu);
25927+ break;
25928+
25929+ default:
4a4d8108 25930+ /* err = -ENOTTY; */
1308ab2a 25931+ err = -EINVAL;
25932+ }
dece6358 25933+
4f0767ce 25934+out:
1308ab2a 25935+ AuTraceErr(err);
25936+ return err;
1facf9fc 25937+}
b752ccd1
AM
25938+
25939+#ifdef CONFIG_COMPAT
25940+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
25941+{
25942+ long err, e;
25943+ struct aufs_rdu rdu;
25944+ void __user *p = compat_ptr(arg);
25945+
25946+ /* todo: get_user()? */
25947+ err = copy_from_user(&rdu, p, sizeof(rdu));
25948+ if (unlikely(err)) {
25949+ err = -EFAULT;
25950+ AuTraceErr(err);
25951+ goto out;
25952+ }
25953+ rdu.ent.e = compat_ptr(rdu.ent.ul);
25954+ err = au_rdu_verify(&rdu);
25955+ if (unlikely(err))
25956+ goto out;
25957+
25958+ switch (cmd) {
25959+ case AUFS_CTL_RDU:
25960+ err = au_rdu(file, &rdu);
25961+ if (unlikely(err))
25962+ break;
25963+
25964+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
25965+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
25966+ e = copy_to_user(p, &rdu, sizeof(rdu));
25967+ if (unlikely(e)) {
25968+ err = -EFAULT;
25969+ AuTraceErr(err);
25970+ }
25971+ break;
25972+ case AUFS_CTL_RDU_INO:
25973+ err = au_rdu_ino(file, &rdu);
25974+ break;
25975+
25976+ default:
25977+ /* err = -ENOTTY; */
25978+ err = -EINVAL;
25979+ }
25980+
4f0767ce 25981+out:
b752ccd1
AM
25982+ AuTraceErr(err);
25983+ return err;
25984+}
25985+#endif
7f207e10
AM
25986diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
25987--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 25988+++ linux/fs/aufs/rwsem.h 2015-09-24 10:47:58.254719746 +0200
076b876e 25989@@ -0,0 +1,191 @@
1facf9fc 25990+/*
2000de60 25991+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 25992+ *
25993+ * This program, aufs is free software; you can redistribute it and/or modify
25994+ * it under the terms of the GNU General Public License as published by
25995+ * the Free Software Foundation; either version 2 of the License, or
25996+ * (at your option) any later version.
dece6358
AM
25997+ *
25998+ * This program is distributed in the hope that it will be useful,
25999+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26000+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26001+ * GNU General Public License for more details.
26002+ *
26003+ * You should have received a copy of the GNU General Public License
523b37e3 26004+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26005+ */
26006+
26007+/*
26008+ * simple read-write semaphore wrappers
26009+ */
26010+
26011+#ifndef __AUFS_RWSEM_H__
26012+#define __AUFS_RWSEM_H__
26013+
26014+#ifdef __KERNEL__
26015+
4a4d8108 26016+#include "debug.h"
dece6358
AM
26017+
26018+struct au_rwsem {
26019+ struct rw_semaphore rwsem;
26020+#ifdef CONFIG_AUFS_DEBUG
26021+ /* just for debugging, not almighty counter */
26022+ atomic_t rcnt, wcnt;
26023+#endif
26024+};
26025+
26026+#ifdef CONFIG_AUFS_DEBUG
26027+#define AuDbgCntInit(rw) do { \
26028+ atomic_set(&(rw)->rcnt, 0); \
26029+ atomic_set(&(rw)->wcnt, 0); \
26030+ smp_mb(); /* atomic set */ \
26031+} while (0)
26032+
e49829fe 26033+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 26034+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 26035+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
26036+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
26037+#else
26038+#define AuDbgCntInit(rw) do {} while (0)
26039+#define AuDbgRcntInc(rw) do {} while (0)
26040+#define AuDbgRcntDec(rw) do {} while (0)
26041+#define AuDbgWcntInc(rw) do {} while (0)
26042+#define AuDbgWcntDec(rw) do {} while (0)
26043+#endif /* CONFIG_AUFS_DEBUG */
26044+
26045+/* to debug easier, do not make them inlined functions */
26046+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
26047+/* rwsem_is_locked() is unusable */
26048+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
26049+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
26050+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
26051+ && atomic_read(&(rw)->wcnt) <= 0)
26052+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
26053+ || atomic_read(&(rw)->wcnt))
26054+
e49829fe
JR
26055+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
26056+
dece6358
AM
26057+static inline void au_rw_init(struct au_rwsem *rw)
26058+{
26059+ AuDbgCntInit(rw);
26060+ init_rwsem(&rw->rwsem);
26061+}
26062+
26063+static inline void au_rw_init_wlock(struct au_rwsem *rw)
26064+{
26065+ au_rw_init(rw);
26066+ down_write(&rw->rwsem);
26067+ AuDbgWcntInc(rw);
26068+}
26069+
26070+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
26071+ unsigned int lsc)
26072+{
26073+ au_rw_init(rw);
26074+ down_write_nested(&rw->rwsem, lsc);
26075+ AuDbgWcntInc(rw);
26076+}
26077+
26078+static inline void au_rw_read_lock(struct au_rwsem *rw)
26079+{
26080+ down_read(&rw->rwsem);
26081+ AuDbgRcntInc(rw);
26082+}
26083+
26084+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26085+{
26086+ down_read_nested(&rw->rwsem, lsc);
26087+ AuDbgRcntInc(rw);
26088+}
26089+
26090+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26091+{
26092+ AuRwMustReadLock(rw);
26093+ AuDbgRcntDec(rw);
26094+ up_read(&rw->rwsem);
26095+}
26096+
26097+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26098+{
26099+ AuRwMustWriteLock(rw);
26100+ AuDbgRcntInc(rw);
26101+ AuDbgWcntDec(rw);
26102+ downgrade_write(&rw->rwsem);
26103+}
26104+
26105+static inline void au_rw_write_lock(struct au_rwsem *rw)
26106+{
26107+ down_write(&rw->rwsem);
26108+ AuDbgWcntInc(rw);
26109+}
26110+
26111+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26112+ unsigned int lsc)
26113+{
26114+ down_write_nested(&rw->rwsem, lsc);
26115+ AuDbgWcntInc(rw);
26116+}
1facf9fc 26117+
dece6358
AM
26118+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26119+{
26120+ AuRwMustWriteLock(rw);
26121+ AuDbgWcntDec(rw);
26122+ up_write(&rw->rwsem);
26123+}
26124+
26125+/* why is not _nested version defined */
26126+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26127+{
076b876e
AM
26128+ int ret;
26129+
26130+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
26131+ if (ret)
26132+ AuDbgRcntInc(rw);
26133+ return ret;
26134+}
26135+
26136+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26137+{
076b876e
AM
26138+ int ret;
26139+
26140+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
26141+ if (ret)
26142+ AuDbgWcntInc(rw);
26143+ return ret;
26144+}
26145+
26146+#undef AuDbgCntInit
26147+#undef AuDbgRcntInc
26148+#undef AuDbgRcntDec
26149+#undef AuDbgWcntInc
26150+#undef AuDbgWcntDec
1facf9fc 26151+
26152+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26153+static inline void prefix##_read_lock(param) \
dece6358 26154+{ au_rw_read_lock(rwsem); } \
1facf9fc 26155+static inline void prefix##_write_lock(param) \
dece6358 26156+{ au_rw_write_lock(rwsem); } \
1facf9fc 26157+static inline int prefix##_read_trylock(param) \
dece6358 26158+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 26159+static inline int prefix##_write_trylock(param) \
dece6358 26160+{ return au_rw_write_trylock(rwsem); }
1facf9fc 26161+/* why is not _nested version defined */
26162+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 26163+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 26164+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 26165+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 26166+
26167+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26168+static inline void prefix##_read_unlock(param) \
dece6358 26169+{ au_rw_read_unlock(rwsem); } \
1facf9fc 26170+static inline void prefix##_write_unlock(param) \
dece6358 26171+{ au_rw_write_unlock(rwsem); } \
1facf9fc 26172+static inline void prefix##_downgrade_lock(param) \
dece6358 26173+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 26174+
26175+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26176+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26177+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26178+
26179+#endif /* __KERNEL__ */
26180+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
26181diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26182--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
26183+++ linux/fs/aufs/sbinfo.c 2015-12-10 18:46:31.223310574 +0100
26184@@ -0,0 +1,362 @@
1facf9fc 26185+/*
2000de60 26186+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26187+ *
26188+ * This program, aufs is free software; you can redistribute it and/or modify
26189+ * it under the terms of the GNU General Public License as published by
26190+ * the Free Software Foundation; either version 2 of the License, or
26191+ * (at your option) any later version.
dece6358
AM
26192+ *
26193+ * This program is distributed in the hope that it will be useful,
26194+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26195+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26196+ * GNU General Public License for more details.
26197+ *
26198+ * You should have received a copy of the GNU General Public License
523b37e3 26199+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26200+ */
26201+
26202+/*
26203+ * superblock private data
26204+ */
26205+
26206+#include "aufs.h"
26207+
26208+/*
26209+ * they are necessary regardless sysfs is disabled.
26210+ */
26211+void au_si_free(struct kobject *kobj)
26212+{
86dc4139 26213+ int i;
1facf9fc 26214+ struct au_sbinfo *sbinfo;
b752ccd1 26215+ char *locked __maybe_unused; /* debug only */
1facf9fc 26216+
26217+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139
AM
26218+ for (i = 0; i < AuPlink_NHASH; i++)
26219+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
e49829fe 26220+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 26221+
e49829fe 26222+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 26223+ au_br_free(sbinfo);
e49829fe 26224+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
26225+
26226+ AuDebugOn(radix_tree_gang_lookup
26227+ (&sbinfo->au_si_pid.tree, (void **)&locked,
26228+ /*first_index*/PID_MAX_DEFAULT - 1,
26229+ /*max_items*/sizeof(locked)/sizeof(*locked)));
26230+
1facf9fc 26231+ kfree(sbinfo->si_branch);
b752ccd1 26232+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 26233+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 26234+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 26235+
26236+ kfree(sbinfo);
26237+}
26238+
26239+int au_si_alloc(struct super_block *sb)
26240+{
86dc4139 26241+ int err, i;
1facf9fc 26242+ struct au_sbinfo *sbinfo;
e49829fe 26243+ static struct lock_class_key aufs_si;
1facf9fc 26244+
26245+ err = -ENOMEM;
4a4d8108 26246+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 26247+ if (unlikely(!sbinfo))
26248+ goto out;
26249+
b752ccd1
AM
26250+ BUILD_BUG_ON(sizeof(unsigned long) !=
26251+ sizeof(*sbinfo->au_si_pid.bitmap));
26252+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
26253+ sizeof(*sbinfo->au_si_pid.bitmap),
26254+ GFP_NOFS);
26255+ if (unlikely(!sbinfo->au_si_pid.bitmap))
26256+ goto out_sbinfo;
26257+
1facf9fc 26258+ /* will be reallocated separately */
26259+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26260+ if (unlikely(!sbinfo->si_branch))
b752ccd1 26261+ goto out_pidmap;
1facf9fc 26262+
1facf9fc 26263+ err = sysaufs_si_init(sbinfo);
26264+ if (unlikely(err))
26265+ goto out_br;
26266+
26267+ au_nwt_init(&sbinfo->si_nowait);
dece6358 26268+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 26269+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
26270+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
26271+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
26272+
7f207e10 26273+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
26274+ atomic_long_set(&sbinfo->si_nfiles, 0);
26275+
1facf9fc 26276+ sbinfo->si_bend = -1;
392086de 26277+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 26278+
26279+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26280+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
26281+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26282+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 26283+
076b876e
AM
26284+ au_fhsm_init(sbinfo);
26285+
e49829fe 26286+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 26287+
392086de
AM
26288+ sbinfo->si_xino_jiffy = jiffies;
26289+ sbinfo->si_xino_expire
26290+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 26291+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 26292+ sbinfo->si_xino_brid = -1;
26293+ /* leave si_xib_last_pindex and si_xib_next_bit */
26294+
b912730e
AM
26295+ au_sphl_init(&sbinfo->si_aopen);
26296+
e49829fe 26297+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 26298+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26299+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26300+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26301+
86dc4139
AM
26302+ for (i = 0; i < AuPlink_NHASH; i++)
26303+ au_sphl_init(sbinfo->si_plink + i);
1facf9fc 26304+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 26305+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 26306+
523b37e3
AM
26307+ au_sphl_init(&sbinfo->si_files);
26308+
ab036dbd
AM
26309+ /* with getattr by default */
26310+ sbinfo->si_iop_array = aufs_iop;
26311+
1facf9fc 26312+ /* leave other members for sysaufs and si_mnt. */
26313+ sbinfo->si_sb = sb;
26314+ sb->s_fs_info = sbinfo;
b752ccd1 26315+ si_pid_set(sb);
1facf9fc 26316+ return 0; /* success */
26317+
4f0767ce 26318+out_br:
1facf9fc 26319+ kfree(sbinfo->si_branch);
4f0767ce 26320+out_pidmap:
b752ccd1 26321+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 26322+out_sbinfo:
1facf9fc 26323+ kfree(sbinfo);
4f0767ce 26324+out:
1facf9fc 26325+ return err;
26326+}
26327+
26328+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
26329+{
26330+ int err, sz;
26331+ struct au_branch **brp;
26332+
dece6358
AM
26333+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26334+
1facf9fc 26335+ err = -ENOMEM;
26336+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
26337+ if (unlikely(!sz))
26338+ sz = sizeof(*brp);
26339+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
26340+ if (brp) {
26341+ sbinfo->si_branch = brp;
26342+ err = 0;
26343+ }
26344+
26345+ return err;
26346+}
26347+
26348+/* ---------------------------------------------------------------------- */
26349+
26350+unsigned int au_sigen_inc(struct super_block *sb)
26351+{
26352+ unsigned int gen;
5527c038 26353+ struct inode *inode;
1facf9fc 26354+
dece6358
AM
26355+ SiMustWriteLock(sb);
26356+
1facf9fc 26357+ gen = ++au_sbi(sb)->si_generation;
26358+ au_update_digen(sb->s_root);
5527c038
JR
26359+ inode = d_inode(sb->s_root);
26360+ au_update_iigen(inode, /*half*/0);
26361+ inode->i_version++;
1facf9fc 26362+ return gen;
26363+}
26364+
26365+aufs_bindex_t au_new_br_id(struct super_block *sb)
26366+{
26367+ aufs_bindex_t br_id;
26368+ int i;
26369+ struct au_sbinfo *sbinfo;
26370+
dece6358
AM
26371+ SiMustWriteLock(sb);
26372+
1facf9fc 26373+ sbinfo = au_sbi(sb);
26374+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26375+ br_id = ++sbinfo->si_last_br_id;
7f207e10 26376+ AuDebugOn(br_id < 0);
1facf9fc 26377+ if (br_id && au_br_index(sb, br_id) < 0)
26378+ return br_id;
26379+ }
26380+
26381+ return -1;
26382+}
26383+
26384+/* ---------------------------------------------------------------------- */
26385+
e49829fe
JR
26386+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26387+int si_read_lock(struct super_block *sb, int flags)
26388+{
26389+ int err;
26390+
26391+ err = 0;
26392+ if (au_ftest_lock(flags, FLUSH))
26393+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26394+
26395+ si_noflush_read_lock(sb);
26396+ err = au_plink_maint(sb, flags);
26397+ if (unlikely(err))
26398+ si_read_unlock(sb);
26399+
26400+ return err;
26401+}
26402+
26403+int si_write_lock(struct super_block *sb, int flags)
26404+{
26405+ int err;
26406+
26407+ if (au_ftest_lock(flags, FLUSH))
26408+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26409+
26410+ si_noflush_write_lock(sb);
26411+ err = au_plink_maint(sb, flags);
26412+ if (unlikely(err))
26413+ si_write_unlock(sb);
26414+
26415+ return err;
26416+}
26417+
1facf9fc 26418+/* dentry and super_block lock. call at entry point */
e49829fe 26419+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 26420+{
e49829fe 26421+ int err;
027c5e7a 26422+ struct super_block *sb;
e49829fe 26423+
027c5e7a
AM
26424+ sb = dentry->d_sb;
26425+ err = si_read_lock(sb, flags);
26426+ if (unlikely(err))
26427+ goto out;
26428+
26429+ if (au_ftest_lock(flags, DW))
26430+ di_write_lock_child(dentry);
26431+ else
26432+ di_read_lock_child(dentry, flags);
26433+
26434+ if (au_ftest_lock(flags, GEN)) {
26435+ err = au_digen_test(dentry, au_sigen(sb));
ab036dbd
AM
26436+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
26437+ AuDebugOn(!err && au_dbrange_test(dentry));
26438+ else if (!err)
26439+ err = au_dbrange_test(dentry);
027c5e7a
AM
26440+ if (unlikely(err))
26441+ aufs_read_unlock(dentry, flags);
e49829fe
JR
26442+ }
26443+
027c5e7a 26444+out:
e49829fe 26445+ return err;
1facf9fc 26446+}
26447+
26448+void aufs_read_unlock(struct dentry *dentry, int flags)
26449+{
26450+ if (au_ftest_lock(flags, DW))
26451+ di_write_unlock(dentry);
26452+ else
26453+ di_read_unlock(dentry, flags);
26454+ si_read_unlock(dentry->d_sb);
26455+}
26456+
26457+void aufs_write_lock(struct dentry *dentry)
26458+{
e49829fe 26459+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 26460+ di_write_lock_child(dentry);
26461+}
26462+
26463+void aufs_write_unlock(struct dentry *dentry)
26464+{
26465+ di_write_unlock(dentry);
26466+ si_write_unlock(dentry->d_sb);
26467+}
26468+
e49829fe 26469+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 26470+{
e49829fe 26471+ int err;
027c5e7a
AM
26472+ unsigned int sigen;
26473+ struct super_block *sb;
e49829fe 26474+
027c5e7a
AM
26475+ sb = d1->d_sb;
26476+ err = si_read_lock(sb, flags);
26477+ if (unlikely(err))
26478+ goto out;
26479+
ab036dbd 26480+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
26481+
26482+ if (au_ftest_lock(flags, GEN)) {
26483+ sigen = au_sigen(sb);
26484+ err = au_digen_test(d1, sigen);
26485+ AuDebugOn(!err && au_dbrange_test(d1));
26486+ if (!err) {
26487+ err = au_digen_test(d2, sigen);
26488+ AuDebugOn(!err && au_dbrange_test(d2));
26489+ }
26490+ if (unlikely(err))
26491+ aufs_read_and_write_unlock2(d1, d2);
26492+ }
26493+
26494+out:
e49829fe 26495+ return err;
1facf9fc 26496+}
26497+
26498+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
26499+{
26500+ di_write_unlock2(d1, d2);
26501+ si_read_unlock(d1->d_sb);
26502+}
b752ccd1
AM
26503+
26504+/* ---------------------------------------------------------------------- */
26505+
26506+int si_pid_test_slow(struct super_block *sb)
26507+{
26508+ void *p;
26509+
26510+ rcu_read_lock();
26511+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
26512+ rcu_read_unlock();
26513+
027c5e7a 26514+ return (long)!!p;
b752ccd1
AM
26515+}
26516+
26517+void si_pid_set_slow(struct super_block *sb)
26518+{
26519+ int err;
26520+ struct au_sbinfo *sbinfo;
26521+
26522+ AuDebugOn(si_pid_test_slow(sb));
26523+
26524+ sbinfo = au_sbi(sb);
26525+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
26526+ AuDebugOn(err);
26527+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26528+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 26529+ /*any valid ptr*/sb);
b752ccd1
AM
26530+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
26531+ AuDebugOn(err);
26532+ radix_tree_preload_end();
26533+}
26534+
26535+void si_pid_clr_slow(struct super_block *sb)
26536+{
26537+ void *p;
26538+ struct au_sbinfo *sbinfo;
26539+
26540+ AuDebugOn(!si_pid_test_slow(sb));
26541+
26542+ sbinfo = au_sbi(sb);
26543+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26544+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
26545+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 26546+}
7f207e10
AM
26547diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
26548--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 26549+++ linux/fs/aufs/spl.h 2015-09-24 10:47:58.254719746 +0200
523b37e3 26550@@ -0,0 +1,111 @@
1facf9fc 26551+/*
2000de60 26552+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26553+ *
26554+ * This program, aufs is free software; you can redistribute it and/or modify
26555+ * it under the terms of the GNU General Public License as published by
26556+ * the Free Software Foundation; either version 2 of the License, or
26557+ * (at your option) any later version.
dece6358
AM
26558+ *
26559+ * This program is distributed in the hope that it will be useful,
26560+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26561+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26562+ * GNU General Public License for more details.
26563+ *
26564+ * You should have received a copy of the GNU General Public License
523b37e3 26565+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26566+ */
26567+
26568+/*
26569+ * simple list protected by a spinlock
26570+ */
26571+
26572+#ifndef __AUFS_SPL_H__
26573+#define __AUFS_SPL_H__
26574+
26575+#ifdef __KERNEL__
26576+
1facf9fc 26577+struct au_splhead {
26578+ spinlock_t spin;
26579+ struct list_head head;
26580+};
26581+
26582+static inline void au_spl_init(struct au_splhead *spl)
26583+{
26584+ spin_lock_init(&spl->spin);
26585+ INIT_LIST_HEAD(&spl->head);
26586+}
26587+
26588+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
26589+{
26590+ spin_lock(&spl->spin);
26591+ list_add(list, &spl->head);
26592+ spin_unlock(&spl->spin);
26593+}
26594+
26595+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
26596+{
26597+ spin_lock(&spl->spin);
26598+ list_del(list);
26599+ spin_unlock(&spl->spin);
26600+}
26601+
4a4d8108
AM
26602+static inline void au_spl_del_rcu(struct list_head *list,
26603+ struct au_splhead *spl)
26604+{
26605+ spin_lock(&spl->spin);
26606+ list_del_rcu(list);
26607+ spin_unlock(&spl->spin);
26608+}
26609+
86dc4139
AM
26610+/* ---------------------------------------------------------------------- */
26611+
26612+struct au_sphlhead {
26613+ spinlock_t spin;
26614+ struct hlist_head head;
26615+};
26616+
26617+static inline void au_sphl_init(struct au_sphlhead *sphl)
26618+{
26619+ spin_lock_init(&sphl->spin);
26620+ INIT_HLIST_HEAD(&sphl->head);
26621+}
26622+
26623+static inline void au_sphl_add(struct hlist_node *hlist,
26624+ struct au_sphlhead *sphl)
26625+{
26626+ spin_lock(&sphl->spin);
26627+ hlist_add_head(hlist, &sphl->head);
26628+ spin_unlock(&sphl->spin);
26629+}
26630+
26631+static inline void au_sphl_del(struct hlist_node *hlist,
26632+ struct au_sphlhead *sphl)
26633+{
26634+ spin_lock(&sphl->spin);
26635+ hlist_del(hlist);
26636+ spin_unlock(&sphl->spin);
26637+}
26638+
26639+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
26640+ struct au_sphlhead *sphl)
26641+{
26642+ spin_lock(&sphl->spin);
26643+ hlist_del_rcu(hlist);
26644+ spin_unlock(&sphl->spin);
26645+}
26646+
26647+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
26648+{
26649+ unsigned long cnt;
26650+ struct hlist_node *pos;
26651+
26652+ cnt = 0;
26653+ spin_lock(&sphl->spin);
26654+ hlist_for_each(pos, &sphl->head)
26655+ cnt++;
26656+ spin_unlock(&sphl->spin);
26657+ return cnt;
26658+}
26659+
1facf9fc 26660+#endif /* __KERNEL__ */
26661+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
26662diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
26663--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
26664+++ linux/fs/aufs/super.c 2015-12-10 18:46:31.223310574 +0100
26665@@ -0,0 +1,1046 @@
1facf9fc 26666+/*
2000de60 26667+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 26668+ *
26669+ * This program, aufs is free software; you can redistribute it and/or modify
26670+ * it under the terms of the GNU General Public License as published by
26671+ * the Free Software Foundation; either version 2 of the License, or
26672+ * (at your option) any later version.
dece6358
AM
26673+ *
26674+ * This program is distributed in the hope that it will be useful,
26675+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26676+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26677+ * GNU General Public License for more details.
26678+ *
26679+ * You should have received a copy of the GNU General Public License
523b37e3 26680+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26681+ */
26682+
26683+/*
26684+ * mount and super_block operations
26685+ */
26686+
f6c5ef8b 26687+#include <linux/mm.h>
1facf9fc 26688+#include <linux/seq_file.h>
26689+#include <linux/statfs.h>
7f207e10 26690+#include <linux/vmalloc.h>
1facf9fc 26691+#include "aufs.h"
26692+
26693+/*
26694+ * super_operations
26695+ */
26696+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
26697+{
26698+ struct au_icntnr *c;
26699+
26700+ c = au_cache_alloc_icntnr();
26701+ if (c) {
027c5e7a 26702+ au_icntnr_init(c);
1facf9fc 26703+ c->vfs_inode.i_version = 1; /* sigen(sb); */
26704+ c->iinfo.ii_hinode = NULL;
26705+ return &c->vfs_inode;
26706+ }
26707+ return NULL;
26708+}
26709+
027c5e7a
AM
26710+static void aufs_destroy_inode_cb(struct rcu_head *head)
26711+{
26712+ struct inode *inode = container_of(head, struct inode, i_rcu);
26713+
b4510431 26714+ INIT_HLIST_HEAD(&inode->i_dentry);
027c5e7a
AM
26715+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
26716+}
26717+
1facf9fc 26718+static void aufs_destroy_inode(struct inode *inode)
26719+{
26720+ au_iinfo_fin(inode);
027c5e7a 26721+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 26722+}
26723+
26724+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
26725+{
26726+ struct inode *inode;
26727+ int err;
26728+
26729+ inode = iget_locked(sb, ino);
26730+ if (unlikely(!inode)) {
26731+ inode = ERR_PTR(-ENOMEM);
26732+ goto out;
26733+ }
26734+ if (!(inode->i_state & I_NEW))
26735+ goto out;
26736+
26737+ err = au_xigen_new(inode);
26738+ if (!err)
26739+ err = au_iinfo_init(inode);
26740+ if (!err)
26741+ inode->i_version++;
26742+ else {
26743+ iget_failed(inode);
26744+ inode = ERR_PTR(err);
26745+ }
26746+
4f0767ce 26747+out:
1facf9fc 26748+ /* never return NULL */
26749+ AuDebugOn(!inode);
26750+ AuTraceErrPtr(inode);
26751+ return inode;
26752+}
26753+
26754+/* lock free root dinfo */
26755+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
26756+{
26757+ int err;
26758+ aufs_bindex_t bindex, bend;
26759+ struct path path;
4a4d8108 26760+ struct au_hdentry *hdp;
1facf9fc 26761+ struct au_branch *br;
076b876e 26762+ au_br_perm_str_t perm;
1facf9fc 26763+
26764+ err = 0;
26765+ bend = au_sbend(sb);
4a4d8108 26766+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 26767+ for (bindex = 0; !err && bindex <= bend; bindex++) {
26768+ br = au_sbr(sb, bindex);
86dc4139 26769+ path.mnt = au_br_mnt(br);
4a4d8108 26770+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 26771+ err = au_seq_path(seq, &path);
ab036dbd 26772+ if (!err) {
076b876e
AM
26773+ au_optstr_br_perm(&perm, br->br_perm);
26774+ err = seq_printf(seq, "=%s", perm.a);
26775+ if (err == -1)
26776+ err = -E2BIG;
1e00d052 26777+ }
1facf9fc 26778+ if (!err && bindex != bend)
26779+ err = seq_putc(seq, ':');
26780+ }
26781+
26782+ return err;
26783+}
26784+
26785+static void au_show_wbr_create(struct seq_file *m, int v,
26786+ struct au_sbinfo *sbinfo)
26787+{
26788+ const char *pat;
26789+
dece6358
AM
26790+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26791+
c2b27bf2 26792+ seq_puts(m, ",create=");
1facf9fc 26793+ pat = au_optstr_wbr_create(v);
26794+ switch (v) {
26795+ case AuWbrCreate_TDP:
26796+ case AuWbrCreate_RR:
26797+ case AuWbrCreate_MFS:
26798+ case AuWbrCreate_PMFS:
c2b27bf2 26799+ seq_puts(m, pat);
1facf9fc 26800+ break;
26801+ case AuWbrCreate_MFSV:
26802+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
26803+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26804+ / MSEC_PER_SEC);
1facf9fc 26805+ break;
26806+ case AuWbrCreate_PMFSV:
26807+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
26808+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26809+ / MSEC_PER_SEC);
1facf9fc 26810+ break;
26811+ case AuWbrCreate_MFSRR:
26812+ seq_printf(m, /*pat*/"mfsrr:%llu",
26813+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26814+ break;
26815+ case AuWbrCreate_MFSRRV:
26816+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
26817+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
26818+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26819+ / MSEC_PER_SEC);
1facf9fc 26820+ break;
392086de
AM
26821+ case AuWbrCreate_PMFSRR:
26822+ seq_printf(m, /*pat*/"pmfsrr:%llu",
26823+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26824+ break;
26825+ case AuWbrCreate_PMFSRRV:
26826+ seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
26827+ sbinfo->si_wbr_mfs.mfsrr_watermark,
26828+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26829+ / MSEC_PER_SEC);
26830+ break;
1facf9fc 26831+ }
26832+}
26833+
7eafdf33 26834+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 26835+{
26836+#ifdef CONFIG_SYSFS
26837+ return 0;
26838+#else
26839+ int err;
26840+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
26841+ aufs_bindex_t bindex, brid;
1facf9fc 26842+ struct qstr *name;
26843+ struct file *f;
26844+ struct dentry *d, *h_root;
4a4d8108 26845+ struct au_hdentry *hdp;
1facf9fc 26846+
dece6358
AM
26847+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26848+
1facf9fc 26849+ err = 0;
1facf9fc 26850+ f = au_sbi(sb)->si_xib;
26851+ if (!f)
26852+ goto out;
26853+
26854+ /* stop printing the default xino path on the first writable branch */
26855+ h_root = NULL;
26856+ brid = au_xino_brid(sb);
26857+ if (brid >= 0) {
26858+ bindex = au_br_index(sb, brid);
4a4d8108
AM
26859+ hdp = au_di(sb->s_root)->di_hdentry;
26860+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 26861+ }
2000de60 26862+ d = f->f_path.dentry;
1facf9fc 26863+ name = &d->d_name;
26864+ /* safe ->d_parent because the file is unlinked */
26865+ if (d->d_parent == h_root
26866+ && name->len == len
26867+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
26868+ goto out;
26869+
26870+ seq_puts(seq, ",xino=");
26871+ err = au_xino_path(seq, f);
26872+
4f0767ce 26873+out:
1facf9fc 26874+ return err;
26875+#endif
26876+}
26877+
26878+/* seq_file will re-call me in case of too long string */
7eafdf33 26879+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 26880+{
027c5e7a 26881+ int err;
1facf9fc 26882+ unsigned int mnt_flags, v;
26883+ struct super_block *sb;
26884+ struct au_sbinfo *sbinfo;
26885+
26886+#define AuBool(name, str) do { \
26887+ v = au_opt_test(mnt_flags, name); \
26888+ if (v != au_opt_test(AuOpt_Def, name)) \
26889+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
26890+} while (0)
26891+
26892+#define AuStr(name, str) do { \
26893+ v = mnt_flags & AuOptMask_##name; \
26894+ if (v != (AuOpt_Def & AuOptMask_##name)) \
26895+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
26896+} while (0)
26897+
26898+#define AuUInt(name, str, val) do { \
26899+ if (val != AUFS_##name##_DEF) \
26900+ seq_printf(m, "," #str "=%u", val); \
26901+} while (0)
26902+
7eafdf33 26903+ sb = dentry->d_sb;
c1595e42
JR
26904+ if (sb->s_flags & MS_POSIXACL)
26905+ seq_puts(m, ",acl");
26906+
26907+ /* lock free root dinfo */
1facf9fc 26908+ si_noflush_read_lock(sb);
26909+ sbinfo = au_sbi(sb);
26910+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
26911+
26912+ mnt_flags = au_mntflags(sb);
26913+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 26914+ err = au_show_xino(m, sb);
1facf9fc 26915+ if (unlikely(err))
26916+ goto out;
26917+ } else
26918+ seq_puts(m, ",noxino");
26919+
26920+ AuBool(TRUNC_XINO, trunc_xino);
26921+ AuStr(UDBA, udba);
dece6358 26922+ AuBool(SHWH, shwh);
1facf9fc 26923+ AuBool(PLINK, plink);
4a4d8108 26924+ AuBool(DIO, dio);
076b876e 26925+ AuBool(DIRPERM1, dirperm1);
1facf9fc 26926+
26927+ v = sbinfo->si_wbr_create;
26928+ if (v != AuWbrCreate_Def)
26929+ au_show_wbr_create(m, v, sbinfo);
26930+
26931+ v = sbinfo->si_wbr_copyup;
26932+ if (v != AuWbrCopyup_Def)
26933+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
26934+
26935+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
26936+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
26937+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
26938+
26939+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
26940+
027c5e7a
AM
26941+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
26942+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 26943+
26944+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
26945+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
26946+
076b876e
AM
26947+ au_fhsm_show(m, sbinfo);
26948+
1facf9fc 26949+ AuBool(SUM, sum);
26950+ /* AuBool(SUM_W, wsum); */
26951+ AuBool(WARN_PERM, warn_perm);
26952+ AuBool(VERBOSE, verbose);
26953+
4f0767ce 26954+out:
1facf9fc 26955+ /* be sure to print "br:" last */
26956+ if (!sysaufs_brs) {
26957+ seq_puts(m, ",br:");
26958+ au_show_brs(m, sb);
26959+ }
26960+ si_read_unlock(sb);
26961+ return 0;
26962+
1facf9fc 26963+#undef AuBool
26964+#undef AuStr
4a4d8108 26965+#undef AuUInt
1facf9fc 26966+}
26967+
26968+/* ---------------------------------------------------------------------- */
26969+
26970+/* sum mode which returns the summation for statfs(2) */
26971+
26972+static u64 au_add_till_max(u64 a, u64 b)
26973+{
26974+ u64 old;
26975+
26976+ old = a;
26977+ a += b;
92d182d2
AM
26978+ if (old <= a)
26979+ return a;
26980+ return ULLONG_MAX;
26981+}
26982+
26983+static u64 au_mul_till_max(u64 a, long mul)
26984+{
26985+ u64 old;
26986+
26987+ old = a;
26988+ a *= mul;
26989+ if (old <= a)
1facf9fc 26990+ return a;
26991+ return ULLONG_MAX;
26992+}
26993+
26994+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
26995+{
26996+ int err;
92d182d2 26997+ long bsize, factor;
1facf9fc 26998+ u64 blocks, bfree, bavail, files, ffree;
26999+ aufs_bindex_t bend, bindex, i;
27000+ unsigned char shared;
7f207e10 27001+ struct path h_path;
1facf9fc 27002+ struct super_block *h_sb;
27003+
92d182d2
AM
27004+ err = 0;
27005+ bsize = LONG_MAX;
27006+ files = 0;
27007+ ffree = 0;
1facf9fc 27008+ blocks = 0;
27009+ bfree = 0;
27010+ bavail = 0;
1facf9fc 27011+ bend = au_sbend(sb);
92d182d2 27012+ for (bindex = 0; bindex <= bend; bindex++) {
7f207e10
AM
27013+ h_path.mnt = au_sbr_mnt(sb, bindex);
27014+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 27015+ shared = 0;
92d182d2 27016+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 27017+ shared = (au_sbr_sb(sb, i) == h_sb);
27018+ if (shared)
27019+ continue;
27020+
27021+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27022+ h_path.dentry = h_path.mnt->mnt_root;
27023+ err = vfs_statfs(&h_path, buf);
1facf9fc 27024+ if (unlikely(err))
27025+ goto out;
27026+
92d182d2
AM
27027+ if (bsize > buf->f_bsize) {
27028+ /*
27029+ * we will reduce bsize, so we have to expand blocks
27030+ * etc. to match them again
27031+ */
27032+ factor = (bsize / buf->f_bsize);
27033+ blocks = au_mul_till_max(blocks, factor);
27034+ bfree = au_mul_till_max(bfree, factor);
27035+ bavail = au_mul_till_max(bavail, factor);
27036+ bsize = buf->f_bsize;
27037+ }
27038+
27039+ factor = (buf->f_bsize / bsize);
27040+ blocks = au_add_till_max(blocks,
27041+ au_mul_till_max(buf->f_blocks, factor));
27042+ bfree = au_add_till_max(bfree,
27043+ au_mul_till_max(buf->f_bfree, factor));
27044+ bavail = au_add_till_max(bavail,
27045+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 27046+ files = au_add_till_max(files, buf->f_files);
27047+ ffree = au_add_till_max(ffree, buf->f_ffree);
27048+ }
27049+
92d182d2 27050+ buf->f_bsize = bsize;
1facf9fc 27051+ buf->f_blocks = blocks;
27052+ buf->f_bfree = bfree;
27053+ buf->f_bavail = bavail;
27054+ buf->f_files = files;
27055+ buf->f_ffree = ffree;
92d182d2 27056+ buf->f_frsize = 0;
1facf9fc 27057+
4f0767ce 27058+out:
1facf9fc 27059+ return err;
27060+}
27061+
27062+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27063+{
27064+ int err;
7f207e10 27065+ struct path h_path;
1facf9fc 27066+ struct super_block *sb;
27067+
27068+ /* lock free root dinfo */
27069+ sb = dentry->d_sb;
27070+ si_noflush_read_lock(sb);
7f207e10 27071+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 27072+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27073+ h_path.mnt = au_sbr_mnt(sb, 0);
27074+ h_path.dentry = h_path.mnt->mnt_root;
27075+ err = vfs_statfs(&h_path, buf);
27076+ } else
1facf9fc 27077+ err = au_statfs_sum(sb, buf);
27078+ si_read_unlock(sb);
27079+
27080+ if (!err) {
27081+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 27082+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 27083+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27084+ }
27085+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27086+
27087+ return err;
27088+}
27089+
27090+/* ---------------------------------------------------------------------- */
27091+
537831f9
AM
27092+static int aufs_sync_fs(struct super_block *sb, int wait)
27093+{
27094+ int err, e;
27095+ aufs_bindex_t bend, bindex;
27096+ struct au_branch *br;
27097+ struct super_block *h_sb;
27098+
27099+ err = 0;
27100+ si_noflush_read_lock(sb);
27101+ bend = au_sbend(sb);
27102+ for (bindex = 0; bindex <= bend; bindex++) {
27103+ br = au_sbr(sb, bindex);
27104+ if (!au_br_writable(br->br_perm))
27105+ continue;
27106+
27107+ h_sb = au_sbr_sb(sb, bindex);
27108+ if (h_sb->s_op->sync_fs) {
27109+ e = h_sb->s_op->sync_fs(h_sb, wait);
27110+ if (unlikely(e && !err))
27111+ err = e;
27112+ /* go on even if an error happens */
27113+ }
27114+ }
27115+ si_read_unlock(sb);
27116+
27117+ return err;
27118+}
27119+
27120+/* ---------------------------------------------------------------------- */
27121+
1facf9fc 27122+/* final actions when unmounting a file system */
27123+static void aufs_put_super(struct super_block *sb)
27124+{
27125+ struct au_sbinfo *sbinfo;
27126+
27127+ sbinfo = au_sbi(sb);
27128+ if (!sbinfo)
27129+ return;
27130+
1facf9fc 27131+ dbgaufs_si_fin(sbinfo);
27132+ kobject_put(&sbinfo->si_kobj);
27133+}
27134+
27135+/* ---------------------------------------------------------------------- */
27136+
7f207e10
AM
27137+void au_array_free(void *array)
27138+{
27139+ if (array) {
27140+ if (!is_vmalloc_addr(array))
27141+ kfree(array);
27142+ else
27143+ vfree(array);
27144+ }
27145+}
27146+
27147+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
27148+{
27149+ void *array;
076b876e 27150+ unsigned long long n, sz;
7f207e10
AM
27151+
27152+ array = NULL;
27153+ n = 0;
27154+ if (!*hint)
27155+ goto out;
27156+
27157+ if (*hint > ULLONG_MAX / sizeof(array)) {
27158+ array = ERR_PTR(-EMFILE);
27159+ pr_err("hint %llu\n", *hint);
27160+ goto out;
27161+ }
27162+
076b876e
AM
27163+ sz = sizeof(array) * *hint;
27164+ array = kzalloc(sz, GFP_NOFS);
7f207e10 27165+ if (unlikely(!array))
076b876e 27166+ array = vzalloc(sz);
7f207e10
AM
27167+ if (unlikely(!array)) {
27168+ array = ERR_PTR(-ENOMEM);
27169+ goto out;
27170+ }
27171+
27172+ n = cb(array, *hint, arg);
27173+ AuDebugOn(n > *hint);
27174+
27175+out:
27176+ *hint = n;
27177+ return array;
27178+}
27179+
27180+static unsigned long long au_iarray_cb(void *a,
27181+ unsigned long long max __maybe_unused,
27182+ void *arg)
27183+{
27184+ unsigned long long n;
27185+ struct inode **p, *inode;
27186+ struct list_head *head;
27187+
27188+ n = 0;
27189+ p = a;
27190+ head = arg;
2cbb1c4b 27191+ spin_lock(&inode_sb_list_lock);
7f207e10
AM
27192+ list_for_each_entry(inode, head, i_sb_list) {
27193+ if (!is_bad_inode(inode)
27194+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
27195+ spin_lock(&inode->i_lock);
27196+ if (atomic_read(&inode->i_count)) {
27197+ au_igrab(inode);
27198+ *p++ = inode;
27199+ n++;
27200+ AuDebugOn(n > max);
27201+ }
27202+ spin_unlock(&inode->i_lock);
7f207e10
AM
27203+ }
27204+ }
2cbb1c4b 27205+ spin_unlock(&inode_sb_list_lock);
7f207e10
AM
27206+
27207+ return n;
27208+}
27209+
27210+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27211+{
27212+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
27213+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
27214+}
27215+
27216+void au_iarray_free(struct inode **a, unsigned long long max)
27217+{
27218+ unsigned long long ull;
27219+
27220+ for (ull = 0; ull < max; ull++)
27221+ iput(a[ull]);
27222+ au_array_free(a);
27223+}
27224+
27225+/* ---------------------------------------------------------------------- */
27226+
1facf9fc 27227+/*
27228+ * refresh dentry and inode at remount time.
27229+ */
027c5e7a
AM
27230+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27231+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27232+ struct dentry *parent)
1facf9fc 27233+{
27234+ int err;
1facf9fc 27235+
27236+ di_write_lock_child(dentry);
1facf9fc 27237+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
27238+ err = au_refresh_dentry(dentry, parent);
27239+ if (!err && dir_flags)
5527c038 27240+ au_hn_reset(d_inode(dentry), dir_flags);
1facf9fc 27241+ di_read_unlock(parent, AuLock_IR);
1facf9fc 27242+ di_write_unlock(dentry);
27243+
27244+ return err;
27245+}
27246+
027c5e7a
AM
27247+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27248+ struct au_sbinfo *sbinfo,
ab036dbd 27249+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 27250+{
027c5e7a
AM
27251+ int err;
27252+ struct dentry *parent;
027c5e7a
AM
27253+
27254+ err = 0;
27255+ parent = dget_parent(dentry);
27256+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
5527c038
JR
27257+ if (d_really_is_positive(dentry)) {
27258+ if (!d_is_dir(dentry))
027c5e7a
AM
27259+ err = au_do_refresh(dentry, /*dir_flags*/0,
27260+ parent);
27261+ else {
27262+ err = au_do_refresh(dentry, dir_flags, parent);
27263+ if (unlikely(err))
27264+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27265+ }
27266+ } else
27267+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27268+ AuDbgDentry(dentry);
27269+ }
27270+ dput(parent);
27271+
ab036dbd
AM
27272+ if (!err) {
27273+ if (do_idop)
27274+ au_refresh_dop(dentry, /*force_reval*/0);
27275+ } else
27276+ au_refresh_dop(dentry, /*force_reval*/1);
27277+
027c5e7a
AM
27278+ AuTraceErr(err);
27279+ return err;
1facf9fc 27280+}
27281+
ab036dbd 27282+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 27283+{
27284+ int err, i, j, ndentry, e;
027c5e7a 27285+ unsigned int sigen;
1facf9fc 27286+ struct au_dcsub_pages dpages;
27287+ struct au_dpage *dpage;
027c5e7a
AM
27288+ struct dentry **dentries, *d;
27289+ struct au_sbinfo *sbinfo;
27290+ struct dentry *root = sb->s_root;
5527c038 27291+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
1facf9fc 27292+
ab036dbd
AM
27293+ if (do_idop)
27294+ au_refresh_dop(root, /*force_reval*/0);
27295+
027c5e7a
AM
27296+ err = au_dpages_init(&dpages, GFP_NOFS);
27297+ if (unlikely(err))
1facf9fc 27298+ goto out;
027c5e7a
AM
27299+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27300+ if (unlikely(err))
1facf9fc 27301+ goto out_dpages;
1facf9fc 27302+
027c5e7a
AM
27303+ sigen = au_sigen(sb);
27304+ sbinfo = au_sbi(sb);
27305+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 27306+ dpage = dpages.dpages + i;
27307+ dentries = dpage->dentries;
27308+ ndentry = dpage->ndentry;
027c5e7a 27309+ for (j = 0; j < ndentry; j++) {
1facf9fc 27310+ d = dentries[j];
ab036dbd
AM
27311+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
27312+ do_idop);
027c5e7a
AM
27313+ if (unlikely(e && !err))
27314+ err = e;
27315+ /* go on even err */
1facf9fc 27316+ }
27317+ }
27318+
4f0767ce 27319+out_dpages:
1facf9fc 27320+ au_dpages_free(&dpages);
4f0767ce 27321+out:
1facf9fc 27322+ return err;
27323+}
27324+
ab036dbd 27325+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 27326+{
027c5e7a
AM
27327+ int err, e;
27328+ unsigned int sigen;
27329+ unsigned long long max, ull;
27330+ struct inode *inode, **array;
1facf9fc 27331+
027c5e7a
AM
27332+ array = au_iarray_alloc(sb, &max);
27333+ err = PTR_ERR(array);
27334+ if (IS_ERR(array))
27335+ goto out;
1facf9fc 27336+
27337+ err = 0;
027c5e7a
AM
27338+ sigen = au_sigen(sb);
27339+ for (ull = 0; ull < max; ull++) {
27340+ inode = array[ull];
076b876e
AM
27341+ if (unlikely(!inode))
27342+ break;
ab036dbd
AM
27343+
27344+ e = 0;
27345+ ii_write_lock_child(inode);
537831f9 27346+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 27347+ e = au_refresh_hinode_self(inode);
1facf9fc 27348+ if (unlikely(e)) {
ab036dbd 27349+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 27350+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 27351+ if (!err)
27352+ err = e;
27353+ /* go on even if err */
27354+ }
27355+ }
ab036dbd
AM
27356+ if (!e && do_idop)
27357+ au_refresh_iop(inode, /*force_getattr*/0);
27358+ ii_write_unlock(inode);
1facf9fc 27359+ }
27360+
027c5e7a 27361+ au_iarray_free(array, max);
1facf9fc 27362+
4f0767ce 27363+out:
1facf9fc 27364+ return err;
27365+}
27366+
ab036dbd 27367+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 27368+{
027c5e7a
AM
27369+ int err, e;
27370+ unsigned int udba;
27371+ aufs_bindex_t bindex, bend;
1facf9fc 27372+ struct dentry *root;
27373+ struct inode *inode;
027c5e7a 27374+ struct au_branch *br;
ab036dbd 27375+ struct au_sbinfo *sbi;
1facf9fc 27376+
27377+ au_sigen_inc(sb);
ab036dbd
AM
27378+ sbi = au_sbi(sb);
27379+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 27380+
27381+ root = sb->s_root;
27382+ DiMustNoWaiters(root);
5527c038 27383+ inode = d_inode(root);
1facf9fc 27384+ IiMustNoWaiters(inode);
1facf9fc 27385+
027c5e7a
AM
27386+ udba = au_opt_udba(sb);
27387+ bend = au_sbend(sb);
27388+ for (bindex = 0; bindex <= bend; bindex++) {
27389+ br = au_sbr(sb, bindex);
27390+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 27391+ if (unlikely(err))
027c5e7a
AM
27392+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27393+ bindex, err);
27394+ /* go on even if err */
1facf9fc 27395+ }
027c5e7a 27396+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 27397+
ab036dbd
AM
27398+ if (do_idop) {
27399+ if (au_ftest_si(sbi, NO_DREVAL)) {
27400+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27401+ sb->s_d_op = &aufs_dop_noreval;
27402+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27403+ sbi->si_iop_array = aufs_iop_nogetattr;
27404+ } else {
27405+ AuDebugOn(sb->s_d_op == &aufs_dop);
27406+ sb->s_d_op = &aufs_dop;
27407+ AuDebugOn(sbi->si_iop_array == aufs_iop);
27408+ sbi->si_iop_array = aufs_iop;
27409+ }
27410+ pr_info("reset to %pf and %pf\n",
27411+ sb->s_d_op, sbi->si_iop_array);
27412+ }
27413+
027c5e7a 27414+ di_write_unlock(root);
ab036dbd
AM
27415+ err = au_refresh_d(sb, do_idop);
27416+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
27417+ if (unlikely(e && !err))
27418+ err = e;
1facf9fc 27419+ /* aufs_write_lock() calls ..._child() */
27420+ di_write_lock_child(root);
027c5e7a
AM
27421+
27422+ au_cpup_attr_all(inode, /*force*/1);
27423+
27424+ if (unlikely(err))
27425+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 27426+}
27427+
27428+/* stop extra interpretation of errno in mount(8), and strange error messages */
27429+static int cvt_err(int err)
27430+{
27431+ AuTraceErr(err);
27432+
27433+ switch (err) {
27434+ case -ENOENT:
27435+ case -ENOTDIR:
27436+ case -EEXIST:
27437+ case -EIO:
27438+ err = -EINVAL;
27439+ }
27440+ return err;
27441+}
27442+
27443+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
27444+{
4a4d8108
AM
27445+ int err, do_dx;
27446+ unsigned int mntflags;
1facf9fc 27447+ struct au_opts opts;
27448+ struct dentry *root;
27449+ struct inode *inode;
27450+ struct au_sbinfo *sbinfo;
27451+
27452+ err = 0;
27453+ root = sb->s_root;
27454+ if (!data || !*data) {
e49829fe
JR
27455+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27456+ if (!err) {
27457+ di_write_lock_child(root);
27458+ err = au_opts_verify(sb, *flags, /*pending*/0);
27459+ aufs_write_unlock(root);
27460+ }
1facf9fc 27461+ goto out;
27462+ }
27463+
27464+ err = -ENOMEM;
27465+ memset(&opts, 0, sizeof(opts));
27466+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27467+ if (unlikely(!opts.opt))
27468+ goto out;
27469+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27470+ opts.flags = AuOpts_REMOUNT;
27471+ opts.sb_flags = *flags;
27472+
27473+ /* parse it before aufs lock */
27474+ err = au_opts_parse(sb, data, &opts);
27475+ if (unlikely(err))
27476+ goto out_opts;
27477+
27478+ sbinfo = au_sbi(sb);
5527c038 27479+ inode = d_inode(root);
1facf9fc 27480+ mutex_lock(&inode->i_mutex);
e49829fe
JR
27481+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27482+ if (unlikely(err))
27483+ goto out_mtx;
27484+ di_write_lock_child(root);
1facf9fc 27485+
27486+ /* au_opts_remount() may return an error */
27487+ err = au_opts_remount(sb, &opts);
27488+ au_opts_free(&opts);
27489+
027c5e7a 27490+ if (au_ftest_opts(opts.flags, REFRESH))
ab036dbd 27491+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 27492+
4a4d8108
AM
27493+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
27494+ mntflags = au_mntflags(sb);
27495+ do_dx = !!au_opt_test(mntflags, DIO);
27496+ au_dy_arefresh(do_dx);
27497+ }
27498+
076b876e 27499+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 27500+ aufs_write_unlock(root);
953406b4 27501+
e49829fe
JR
27502+out_mtx:
27503+ mutex_unlock(&inode->i_mutex);
4f0767ce 27504+out_opts:
1facf9fc 27505+ free_page((unsigned long)opts.opt);
4f0767ce 27506+out:
1facf9fc 27507+ err = cvt_err(err);
27508+ AuTraceErr(err);
27509+ return err;
27510+}
27511+
4a4d8108 27512+static const struct super_operations aufs_sop = {
1facf9fc 27513+ .alloc_inode = aufs_alloc_inode,
27514+ .destroy_inode = aufs_destroy_inode,
b752ccd1 27515+ /* always deleting, no clearing */
1facf9fc 27516+ .drop_inode = generic_delete_inode,
27517+ .show_options = aufs_show_options,
27518+ .statfs = aufs_statfs,
27519+ .put_super = aufs_put_super,
537831f9 27520+ .sync_fs = aufs_sync_fs,
1facf9fc 27521+ .remount_fs = aufs_remount_fs
27522+};
27523+
27524+/* ---------------------------------------------------------------------- */
27525+
27526+static int alloc_root(struct super_block *sb)
27527+{
27528+ int err;
27529+ struct inode *inode;
27530+ struct dentry *root;
27531+
27532+ err = -ENOMEM;
27533+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
27534+ err = PTR_ERR(inode);
27535+ if (IS_ERR(inode))
27536+ goto out;
27537+
ab036dbd 27538+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 27539+ inode->i_fop = &aufs_dir_fop;
27540+ inode->i_mode = S_IFDIR;
9dbd164d 27541+ set_nlink(inode, 2);
1facf9fc 27542+ unlock_new_inode(inode);
27543+
92d182d2 27544+ root = d_make_root(inode);
1facf9fc 27545+ if (unlikely(!root))
92d182d2 27546+ goto out;
1facf9fc 27547+ err = PTR_ERR(root);
27548+ if (IS_ERR(root))
92d182d2 27549+ goto out;
1facf9fc 27550+
4a4d8108 27551+ err = au_di_init(root);
1facf9fc 27552+ if (!err) {
27553+ sb->s_root = root;
27554+ return 0; /* success */
27555+ }
27556+ dput(root);
1facf9fc 27557+
4f0767ce 27558+out:
1facf9fc 27559+ return err;
1facf9fc 27560+}
27561+
27562+static int aufs_fill_super(struct super_block *sb, void *raw_data,
27563+ int silent __maybe_unused)
27564+{
27565+ int err;
27566+ struct au_opts opts;
ab036dbd 27567+ struct au_sbinfo *sbinfo;
1facf9fc 27568+ struct dentry *root;
27569+ struct inode *inode;
27570+ char *arg = raw_data;
27571+
27572+ if (unlikely(!arg || !*arg)) {
27573+ err = -EINVAL;
4a4d8108 27574+ pr_err("no arg\n");
1facf9fc 27575+ goto out;
27576+ }
27577+
27578+ err = -ENOMEM;
27579+ memset(&opts, 0, sizeof(opts));
27580+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27581+ if (unlikely(!opts.opt))
27582+ goto out;
27583+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27584+ opts.sb_flags = sb->s_flags;
27585+
27586+ err = au_si_alloc(sb);
27587+ if (unlikely(err))
27588+ goto out_opts;
ab036dbd 27589+ sbinfo = au_sbi(sb);
1facf9fc 27590+
27591+ /* all timestamps always follow the ones on the branch */
27592+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
27593+ sb->s_op = &aufs_sop;
027c5e7a 27594+ sb->s_d_op = &aufs_dop;
1facf9fc 27595+ sb->s_magic = AUFS_SUPER_MAGIC;
27596+ sb->s_maxbytes = 0;
c1595e42 27597+ sb->s_stack_depth = 1;
1facf9fc 27598+ au_export_init(sb);
c1595e42 27599+ /* au_xattr_init(sb); */
1facf9fc 27600+
27601+ err = alloc_root(sb);
27602+ if (unlikely(err)) {
27603+ si_write_unlock(sb);
27604+ goto out_info;
27605+ }
27606+ root = sb->s_root;
5527c038 27607+ inode = d_inode(root);
1facf9fc 27608+
27609+ /*
27610+ * actually we can parse options regardless aufs lock here.
27611+ * but at remount time, parsing must be done before aufs lock.
27612+ * so we follow the same rule.
27613+ */
27614+ ii_write_lock_parent(inode);
27615+ aufs_write_unlock(root);
27616+ err = au_opts_parse(sb, arg, &opts);
27617+ if (unlikely(err))
27618+ goto out_root;
27619+
27620+ /* lock vfs_inode first, then aufs. */
27621+ mutex_lock(&inode->i_mutex);
1facf9fc 27622+ aufs_write_lock(root);
27623+ err = au_opts_mount(sb, &opts);
27624+ au_opts_free(&opts);
ab036dbd
AM
27625+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
27626+ sb->s_d_op = &aufs_dop_noreval;
27627+ pr_info("%pf\n", sb->s_d_op);
27628+ au_refresh_dop(root, /*force_reval*/0);
27629+ sbinfo->si_iop_array = aufs_iop_nogetattr;
27630+ au_refresh_iop(inode, /*force_getattr*/0);
27631+ }
1facf9fc 27632+ aufs_write_unlock(root);
27633+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
27634+ if (!err)
27635+ goto out_opts; /* success */
1facf9fc 27636+
4f0767ce 27637+out_root:
1facf9fc 27638+ dput(root);
27639+ sb->s_root = NULL;
4f0767ce 27640+out_info:
ab036dbd
AM
27641+ dbgaufs_si_fin(sbinfo);
27642+ kobject_put(&sbinfo->si_kobj);
1facf9fc 27643+ sb->s_fs_info = NULL;
4f0767ce 27644+out_opts:
1facf9fc 27645+ free_page((unsigned long)opts.opt);
4f0767ce 27646+out:
1facf9fc 27647+ AuTraceErr(err);
27648+ err = cvt_err(err);
27649+ AuTraceErr(err);
27650+ return err;
27651+}
27652+
27653+/* ---------------------------------------------------------------------- */
27654+
027c5e7a
AM
27655+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
27656+ const char *dev_name __maybe_unused,
27657+ void *raw_data)
1facf9fc 27658+{
027c5e7a 27659+ struct dentry *root;
1facf9fc 27660+ struct super_block *sb;
27661+
27662+ /* all timestamps always follow the ones on the branch */
27663+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
27664+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
27665+ if (IS_ERR(root))
27666+ goto out;
27667+
27668+ sb = root->d_sb;
27669+ si_write_lock(sb, !AuLock_FLUSH);
27670+ sysaufs_brs_add(sb, 0);
27671+ si_write_unlock(sb);
27672+ au_sbilist_add(sb);
27673+
27674+out:
27675+ return root;
1facf9fc 27676+}
27677+
e49829fe
JR
27678+static void aufs_kill_sb(struct super_block *sb)
27679+{
27680+ struct au_sbinfo *sbinfo;
27681+
27682+ sbinfo = au_sbi(sb);
27683+ if (sbinfo) {
27684+ au_sbilist_del(sb);
27685+ aufs_write_lock(sb->s_root);
076b876e 27686+ au_fhsm_fin(sb);
e49829fe
JR
27687+ if (sbinfo->si_wbr_create_ops->fin)
27688+ sbinfo->si_wbr_create_ops->fin(sb);
27689+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
27690+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
ab036dbd 27691+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
27692+ }
27693+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
27694+ au_plink_put(sb, /*verbose*/1);
27695+ au_xino_clr(sb);
1e00d052 27696+ sbinfo->si_sb = NULL;
e49829fe 27697+ aufs_write_unlock(sb->s_root);
e49829fe
JR
27698+ au_nwt_flush(&sbinfo->si_nowait);
27699+ }
98d9a5b1 27700+ kill_anon_super(sb);
e49829fe
JR
27701+}
27702+
1facf9fc 27703+struct file_system_type aufs_fs_type = {
27704+ .name = AUFS_FSTYPE,
c06a8ce3
AM
27705+ /* a race between rename and others */
27706+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 27707+ .mount = aufs_mount,
e49829fe 27708+ .kill_sb = aufs_kill_sb,
1facf9fc 27709+ /* no need to __module_get() and module_put(). */
27710+ .owner = THIS_MODULE,
27711+};
7f207e10
AM
27712diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
27713--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
27714+++ linux/fs/aufs/super.h 2015-12-10 18:46:31.223310574 +0100
27715@@ -0,0 +1,638 @@
1facf9fc 27716+/*
2000de60 27717+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 27718+ *
27719+ * This program, aufs is free software; you can redistribute it and/or modify
27720+ * it under the terms of the GNU General Public License as published by
27721+ * the Free Software Foundation; either version 2 of the License, or
27722+ * (at your option) any later version.
dece6358
AM
27723+ *
27724+ * This program is distributed in the hope that it will be useful,
27725+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27726+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27727+ * GNU General Public License for more details.
27728+ *
27729+ * You should have received a copy of the GNU General Public License
523b37e3 27730+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27731+ */
27732+
27733+/*
27734+ * super_block operations
27735+ */
27736+
27737+#ifndef __AUFS_SUPER_H__
27738+#define __AUFS_SUPER_H__
27739+
27740+#ifdef __KERNEL__
27741+
27742+#include <linux/fs.h>
5527c038 27743+#include <linux/kobject.h>
1facf9fc 27744+#include "rwsem.h"
27745+#include "spl.h"
27746+#include "wkq.h"
27747+
1facf9fc 27748+/* policies to select one among multiple writable branches */
27749+struct au_wbr_copyup_operations {
27750+ int (*copyup)(struct dentry *dentry);
27751+};
27752+
392086de
AM
27753+#define AuWbr_DIR 1 /* target is a dir */
27754+#define AuWbr_PARENT (1 << 1) /* always require a parent */
27755+
27756+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
27757+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
27758+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
27759+
1facf9fc 27760+struct au_wbr_create_operations {
392086de 27761+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 27762+ int (*init)(struct super_block *sb);
27763+ int (*fin)(struct super_block *sb);
27764+};
27765+
27766+struct au_wbr_mfs {
27767+ struct mutex mfs_lock; /* protect this structure */
27768+ unsigned long mfs_jiffy;
27769+ unsigned long mfs_expire;
27770+ aufs_bindex_t mfs_bindex;
27771+
27772+ unsigned long long mfsrr_bytes;
27773+ unsigned long long mfsrr_watermark;
27774+};
27775+
86dc4139
AM
27776+struct pseudo_link {
27777+ union {
27778+ struct hlist_node hlist;
27779+ struct rcu_head rcu;
27780+ };
27781+ struct inode *inode;
27782+};
27783+
27784+#define AuPlink_NHASH 100
27785+static inline int au_plink_hash(ino_t ino)
27786+{
27787+ return ino % AuPlink_NHASH;
27788+}
27789+
076b876e
AM
27790+/* File-based Hierarchical Storage Management */
27791+struct au_fhsm {
27792+#ifdef CONFIG_AUFS_FHSM
27793+ /* allow only one process who can receive the notification */
27794+ spinlock_t fhsm_spin;
27795+ pid_t fhsm_pid;
27796+ wait_queue_head_t fhsm_wqh;
27797+ atomic_t fhsm_readable;
27798+
c1595e42 27799+ /* these are protected by si_rwsem */
076b876e 27800+ unsigned long fhsm_expire;
c1595e42 27801+ aufs_bindex_t fhsm_bottom;
076b876e
AM
27802+#endif
27803+};
27804+
1facf9fc 27805+struct au_branch;
27806+struct au_sbinfo {
27807+ /* nowait tasks in the system-wide workqueue */
27808+ struct au_nowait_tasks si_nowait;
27809+
b752ccd1
AM
27810+ /*
27811+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
27812+ * rwsem for au_sbinfo is necessary.
27813+ */
dece6358 27814+ struct au_rwsem si_rwsem;
1facf9fc 27815+
b752ccd1
AM
27816+ /* prevent recursive locking in deleting inode */
27817+ struct {
27818+ unsigned long *bitmap;
27819+ spinlock_t tree_lock;
27820+ struct radix_tree_root tree;
27821+ } au_si_pid;
27822+
7f207e10 27823+ /*
523b37e3
AM
27824+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
27825+ * remount.
7f207e10
AM
27826+ */
27827+ atomic_long_t si_ninodes, si_nfiles;
27828+
1facf9fc 27829+ /* branch management */
27830+ unsigned int si_generation;
27831+
2000de60 27832+ /* see AuSi_ flags */
1facf9fc 27833+ unsigned char au_si_status;
27834+
27835+ aufs_bindex_t si_bend;
7f207e10
AM
27836+
27837+ /* dirty trick to keep br_id plus */
27838+ unsigned int si_last_br_id :
27839+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 27840+ struct au_branch **si_branch;
27841+
27842+ /* policy to select a writable branch */
27843+ unsigned char si_wbr_copyup;
27844+ unsigned char si_wbr_create;
27845+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
27846+ struct au_wbr_create_operations *si_wbr_create_ops;
27847+
27848+ /* round robin */
27849+ atomic_t si_wbr_rr_next;
27850+
27851+ /* most free space */
27852+ struct au_wbr_mfs si_wbr_mfs;
27853+
076b876e
AM
27854+ /* File-based Hierarchical Storage Management */
27855+ struct au_fhsm si_fhsm;
27856+
1facf9fc 27857+ /* mount flags */
27858+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
27859+ unsigned int si_mntflags;
27860+
27861+ /* external inode number (bitmap and translation table) */
5527c038
JR
27862+ vfs_readf_t si_xread;
27863+ vfs_writef_t si_xwrite;
1facf9fc 27864+ struct file *si_xib;
27865+ struct mutex si_xib_mtx; /* protect xib members */
27866+ unsigned long *si_xib_buf;
27867+ unsigned long si_xib_last_pindex;
27868+ int si_xib_next_bit;
27869+ aufs_bindex_t si_xino_brid;
392086de
AM
27870+ unsigned long si_xino_jiffy;
27871+ unsigned long si_xino_expire;
1facf9fc 27872+ /* reserved for future use */
27873+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
27874+
27875+#ifdef CONFIG_AUFS_EXPORT
27876+ /* i_generation */
27877+ struct file *si_xigen;
27878+ atomic_t si_xigen_next;
27879+#endif
27880+
b912730e
AM
27881+ /* dirty trick to suppoer atomic_open */
27882+ struct au_sphlhead si_aopen;
27883+
1facf9fc 27884+ /* vdir parameters */
e49829fe 27885+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 27886+ unsigned int si_rdblk; /* deblk size */
27887+ unsigned int si_rdhash; /* hash size */
27888+
27889+ /*
27890+ * If the number of whiteouts are larger than si_dirwh, leave all of
27891+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
27892+ * future fsck.aufs or kernel thread will remove them later.
27893+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
27894+ */
27895+ unsigned int si_dirwh;
27896+
1facf9fc 27897+ /* pseudo_link list */
86dc4139 27898+ struct au_sphlhead si_plink[AuPlink_NHASH];
1facf9fc 27899+ wait_queue_head_t si_plink_wq;
4a4d8108 27900+ spinlock_t si_plink_maint_lock;
e49829fe 27901+ pid_t si_plink_maint_pid;
1facf9fc 27902+
523b37e3
AM
27903+ /* file list */
27904+ struct au_sphlhead si_files;
27905+
ab036dbd
AM
27906+ /* with/without getattr, brother of sb->s_d_op */
27907+ struct inode_operations *si_iop_array;
27908+
1facf9fc 27909+ /*
27910+ * sysfs and lifetime management.
27911+ * this is not a small structure and it may be a waste of memory in case
27912+ * of sysfs is disabled, particulary when many aufs-es are mounted.
27913+ * but using sysfs is majority.
27914+ */
27915+ struct kobject si_kobj;
27916+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
27917+ struct dentry *si_dbgaufs;
27918+ struct dentry *si_dbgaufs_plink;
27919+ struct dentry *si_dbgaufs_xib;
1facf9fc 27920+#ifdef CONFIG_AUFS_EXPORT
27921+ struct dentry *si_dbgaufs_xigen;
27922+#endif
27923+#endif
27924+
e49829fe
JR
27925+#ifdef CONFIG_AUFS_SBILIST
27926+ struct list_head si_list;
27927+#endif
27928+
1facf9fc 27929+ /* dirty, necessary for unmounting, sysfs and sysrq */
27930+ struct super_block *si_sb;
27931+};
27932+
dece6358
AM
27933+/* sbinfo status flags */
27934+/*
27935+ * set true when refresh_dirs() failed at remount time.
27936+ * then try refreshing dirs at access time again.
27937+ * if it is false, refreshing dirs at access time is unnecesary
27938+ */
027c5e7a 27939+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 27940+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
ab036dbd 27941+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
27942+
27943+#ifndef CONFIG_AUFS_FHSM
27944+#undef AuSi_FHSM
27945+#define AuSi_FHSM 0
27946+#endif
27947+
dece6358
AM
27948+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
27949+ unsigned int flag)
27950+{
27951+ AuRwMustAnyLock(&sbi->si_rwsem);
27952+ return sbi->au_si_status & flag;
27953+}
27954+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
27955+#define au_fset_si(sbinfo, name) do { \
27956+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
27957+ (sbinfo)->au_si_status |= AuSi_##name; \
27958+} while (0)
27959+#define au_fclr_si(sbinfo, name) do { \
27960+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
27961+ (sbinfo)->au_si_status &= ~AuSi_##name; \
27962+} while (0)
27963+
1facf9fc 27964+/* ---------------------------------------------------------------------- */
27965+
27966+/* policy to select one among writable branches */
4a4d8108
AM
27967+#define AuWbrCopyup(sbinfo, ...) \
27968+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
27969+#define AuWbrCreate(sbinfo, ...) \
27970+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 27971+
27972+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
27973+#define AuLock_DW 1 /* write-lock dentry */
27974+#define AuLock_IR (1 << 1) /* read-lock inode */
27975+#define AuLock_IW (1 << 2) /* write-lock inode */
27976+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
ab036dbd 27977+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
e49829fe
JR
27978+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
27979+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 27980+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 27981+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
27982+#define au_fset_lock(flags, name) \
27983+ do { (flags) |= AuLock_##name; } while (0)
27984+#define au_fclr_lock(flags, name) \
27985+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 27986+
27987+/* ---------------------------------------------------------------------- */
27988+
27989+/* super.c */
27990+extern struct file_system_type aufs_fs_type;
27991+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
7f207e10
AM
27992+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
27993+ void *arg);
27994+void au_array_free(void *array);
27995+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
27996+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
27997+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 27998+
27999+/* sbinfo.c */
28000+void au_si_free(struct kobject *kobj);
28001+int au_si_alloc(struct super_block *sb);
28002+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
28003+
28004+unsigned int au_sigen_inc(struct super_block *sb);
28005+aufs_bindex_t au_new_br_id(struct super_block *sb);
28006+
e49829fe
JR
28007+int si_read_lock(struct super_block *sb, int flags);
28008+int si_write_lock(struct super_block *sb, int flags);
28009+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 28010+void aufs_read_unlock(struct dentry *dentry, int flags);
28011+void aufs_write_lock(struct dentry *dentry);
28012+void aufs_write_unlock(struct dentry *dentry);
e49829fe 28013+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 28014+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28015+
b752ccd1
AM
28016+int si_pid_test_slow(struct super_block *sb);
28017+void si_pid_set_slow(struct super_block *sb);
28018+void si_pid_clr_slow(struct super_block *sb);
28019+
1facf9fc 28020+/* wbr_policy.c */
28021+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28022+extern struct au_wbr_create_operations au_wbr_create_ops[];
28023+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 28024+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
076b876e 28025+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart);
c2b27bf2
AM
28026+
28027+/* mvdown.c */
28028+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 28029+
076b876e
AM
28030+#ifdef CONFIG_AUFS_FHSM
28031+/* fhsm.c */
28032+
28033+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28034+{
28035+ pid_t pid;
28036+
28037+ spin_lock(&fhsm->fhsm_spin);
28038+ pid = fhsm->fhsm_pid;
28039+ spin_unlock(&fhsm->fhsm_spin);
28040+
28041+ return pid;
28042+}
28043+
28044+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28045+void au_fhsm_wrote_all(struct super_block *sb, int force);
28046+int au_fhsm_fd(struct super_block *sb, int oflags);
28047+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 28048+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
28049+void au_fhsm_fin(struct super_block *sb);
28050+void au_fhsm_init(struct au_sbinfo *sbinfo);
28051+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28052+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28053+#else
28054+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28055+ int force)
28056+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28057+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
28058+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28059+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28060+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
28061+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28062+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28063+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28064+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28065+#endif
28066+
1facf9fc 28067+/* ---------------------------------------------------------------------- */
28068+
28069+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28070+{
28071+ return sb->s_fs_info;
28072+}
28073+
28074+/* ---------------------------------------------------------------------- */
28075+
28076+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 28077+int au_test_nfsd(void);
1facf9fc 28078+void au_export_init(struct super_block *sb);
b752ccd1 28079+void au_xigen_inc(struct inode *inode);
1facf9fc 28080+int au_xigen_new(struct inode *inode);
28081+int au_xigen_set(struct super_block *sb, struct file *base);
28082+void au_xigen_clr(struct super_block *sb);
28083+
28084+static inline int au_busy_or_stale(void)
28085+{
b752ccd1 28086+ if (!au_test_nfsd())
1facf9fc 28087+ return -EBUSY;
28088+ return -ESTALE;
28089+}
28090+#else
b752ccd1 28091+AuStubInt0(au_test_nfsd, void)
a2a7ad62 28092+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 28093+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
28094+AuStubInt0(au_xigen_new, struct inode *inode)
28095+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28096+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 28097+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 28098+#endif /* CONFIG_AUFS_EXPORT */
28099+
28100+/* ---------------------------------------------------------------------- */
28101+
e49829fe
JR
28102+#ifdef CONFIG_AUFS_SBILIST
28103+/* module.c */
28104+extern struct au_splhead au_sbilist;
28105+
28106+static inline void au_sbilist_init(void)
28107+{
28108+ au_spl_init(&au_sbilist);
28109+}
28110+
28111+static inline void au_sbilist_add(struct super_block *sb)
28112+{
28113+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
28114+}
28115+
28116+static inline void au_sbilist_del(struct super_block *sb)
28117+{
28118+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
28119+}
53392da6
AM
28120+
28121+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28122+static inline void au_sbilist_lock(void)
28123+{
28124+ spin_lock(&au_sbilist.spin);
28125+}
28126+
28127+static inline void au_sbilist_unlock(void)
28128+{
28129+ spin_unlock(&au_sbilist.spin);
28130+}
28131+#define AuGFP_SBILIST GFP_ATOMIC
28132+#else
28133+AuStubVoid(au_sbilist_lock, void)
28134+AuStubVoid(au_sbilist_unlock, void)
28135+#define AuGFP_SBILIST GFP_NOFS
28136+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
28137+#else
28138+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
28139+AuStubVoid(au_sbilist_add, struct super_block *sb)
28140+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
28141+AuStubVoid(au_sbilist_lock, void)
28142+AuStubVoid(au_sbilist_unlock, void)
28143+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
28144+#endif
28145+
28146+/* ---------------------------------------------------------------------- */
28147+
1facf9fc 28148+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28149+{
dece6358 28150+ /*
c1595e42 28151+ * This function is a dynamic '__init' function actually,
dece6358
AM
28152+ * so the tiny check for si_rwsem is unnecessary.
28153+ */
28154+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 28155+#ifdef CONFIG_DEBUG_FS
28156+ sbinfo->si_dbgaufs = NULL;
86dc4139 28157+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 28158+ sbinfo->si_dbgaufs_xib = NULL;
28159+#ifdef CONFIG_AUFS_EXPORT
28160+ sbinfo->si_dbgaufs_xigen = NULL;
28161+#endif
28162+#endif
28163+}
28164+
28165+/* ---------------------------------------------------------------------- */
28166+
b752ccd1
AM
28167+static inline pid_t si_pid_bit(void)
28168+{
28169+ /* the origin of pid is 1, but the bitmap's is 0 */
28170+ return current->pid - 1;
28171+}
28172+
28173+static inline int si_pid_test(struct super_block *sb)
28174+{
076b876e
AM
28175+ pid_t bit;
28176+
28177+ bit = si_pid_bit();
b752ccd1
AM
28178+ if (bit < PID_MAX_DEFAULT)
28179+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
c1595e42 28180+ return si_pid_test_slow(sb);
b752ccd1
AM
28181+}
28182+
28183+static inline void si_pid_set(struct super_block *sb)
28184+{
076b876e
AM
28185+ pid_t bit;
28186+
28187+ bit = si_pid_bit();
b752ccd1
AM
28188+ if (bit < PID_MAX_DEFAULT) {
28189+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28190+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28191+ /* smp_mb(); */
28192+ } else
28193+ si_pid_set_slow(sb);
28194+}
28195+
28196+static inline void si_pid_clr(struct super_block *sb)
28197+{
076b876e
AM
28198+ pid_t bit;
28199+
28200+ bit = si_pid_bit();
b752ccd1
AM
28201+ if (bit < PID_MAX_DEFAULT) {
28202+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28203+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28204+ /* smp_mb(); */
28205+ } else
28206+ si_pid_clr_slow(sb);
28207+}
28208+
28209+/* ---------------------------------------------------------------------- */
28210+
1facf9fc 28211+/* lock superblock. mainly for entry point functions */
28212+/*
b752ccd1
AM
28213+ * __si_read_lock, __si_write_lock,
28214+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 28215+ */
b752ccd1 28216+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 28217+
dece6358
AM
28218+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28219+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28220+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28221+
b752ccd1
AM
28222+static inline void si_noflush_read_lock(struct super_block *sb)
28223+{
28224+ __si_read_lock(sb);
28225+ si_pid_set(sb);
28226+}
28227+
28228+static inline int si_noflush_read_trylock(struct super_block *sb)
28229+{
076b876e
AM
28230+ int locked;
28231+
28232+ locked = __si_read_trylock(sb);
b752ccd1
AM
28233+ if (locked)
28234+ si_pid_set(sb);
28235+ return locked;
28236+}
28237+
28238+static inline void si_noflush_write_lock(struct super_block *sb)
28239+{
28240+ __si_write_lock(sb);
28241+ si_pid_set(sb);
28242+}
28243+
28244+static inline int si_noflush_write_trylock(struct super_block *sb)
28245+{
076b876e
AM
28246+ int locked;
28247+
28248+ locked = __si_write_trylock(sb);
b752ccd1
AM
28249+ if (locked)
28250+ si_pid_set(sb);
28251+ return locked;
28252+}
28253+
7e9cd9fe 28254+#if 0 /* reserved */
1facf9fc 28255+static inline int si_read_trylock(struct super_block *sb, int flags)
28256+{
28257+ if (au_ftest_lock(flags, FLUSH))
28258+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28259+ return si_noflush_read_trylock(sb);
28260+}
e49829fe 28261+#endif
1facf9fc 28262+
b752ccd1
AM
28263+static inline void si_read_unlock(struct super_block *sb)
28264+{
28265+ si_pid_clr(sb);
28266+ __si_read_unlock(sb);
28267+}
28268+
7e9cd9fe 28269+#if 0 /* reserved */
1facf9fc 28270+static inline int si_write_trylock(struct super_block *sb, int flags)
28271+{
28272+ if (au_ftest_lock(flags, FLUSH))
28273+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28274+ return si_noflush_write_trylock(sb);
28275+}
b752ccd1
AM
28276+#endif
28277+
28278+static inline void si_write_unlock(struct super_block *sb)
28279+{
28280+ si_pid_clr(sb);
28281+ __si_write_unlock(sb);
28282+}
28283+
7e9cd9fe 28284+#if 0 /* reserved */
b752ccd1
AM
28285+static inline void si_downgrade_lock(struct super_block *sb)
28286+{
28287+ __si_downgrade_lock(sb);
28288+}
28289+#endif
1facf9fc 28290+
28291+/* ---------------------------------------------------------------------- */
28292+
28293+static inline aufs_bindex_t au_sbend(struct super_block *sb)
28294+{
dece6358 28295+ SiMustAnyLock(sb);
1facf9fc 28296+ return au_sbi(sb)->si_bend;
28297+}
28298+
28299+static inline unsigned int au_mntflags(struct super_block *sb)
28300+{
dece6358 28301+ SiMustAnyLock(sb);
1facf9fc 28302+ return au_sbi(sb)->si_mntflags;
28303+}
28304+
28305+static inline unsigned int au_sigen(struct super_block *sb)
28306+{
dece6358 28307+ SiMustAnyLock(sb);
1facf9fc 28308+ return au_sbi(sb)->si_generation;
28309+}
28310+
7f207e10
AM
28311+static inline void au_ninodes_inc(struct super_block *sb)
28312+{
28313+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
28314+}
28315+
28316+static inline void au_ninodes_dec(struct super_block *sb)
28317+{
28318+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
28319+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
28320+}
28321+
28322+static inline void au_nfiles_inc(struct super_block *sb)
28323+{
28324+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
28325+}
28326+
28327+static inline void au_nfiles_dec(struct super_block *sb)
28328+{
28329+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
28330+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
28331+}
28332+
1facf9fc 28333+static inline struct au_branch *au_sbr(struct super_block *sb,
28334+ aufs_bindex_t bindex)
28335+{
dece6358 28336+ SiMustAnyLock(sb);
1facf9fc 28337+ return au_sbi(sb)->si_branch[0 + bindex];
28338+}
28339+
28340+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28341+{
dece6358 28342+ SiMustWriteLock(sb);
1facf9fc 28343+ au_sbi(sb)->si_xino_brid = brid;
28344+}
28345+
28346+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28347+{
dece6358 28348+ SiMustAnyLock(sb);
1facf9fc 28349+ return au_sbi(sb)->si_xino_brid;
28350+}
28351+
28352+#endif /* __KERNEL__ */
28353+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
28354diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28355--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 28356+++ linux/fs/aufs/sysaufs.c 2015-09-24 10:47:58.254719746 +0200
523b37e3 28357@@ -0,0 +1,104 @@
1facf9fc 28358+/*
2000de60 28359+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28360+ *
28361+ * This program, aufs is free software; you can redistribute it and/or modify
28362+ * it under the terms of the GNU General Public License as published by
28363+ * the Free Software Foundation; either version 2 of the License, or
28364+ * (at your option) any later version.
dece6358
AM
28365+ *
28366+ * This program is distributed in the hope that it will be useful,
28367+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28368+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28369+ * GNU General Public License for more details.
28370+ *
28371+ * You should have received a copy of the GNU General Public License
523b37e3 28372+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28373+ */
28374+
28375+/*
28376+ * sysfs interface and lifetime management
28377+ * they are necessary regardless sysfs is disabled.
28378+ */
28379+
1facf9fc 28380+#include <linux/random.h>
1facf9fc 28381+#include "aufs.h"
28382+
28383+unsigned long sysaufs_si_mask;
e49829fe 28384+struct kset *sysaufs_kset;
1facf9fc 28385+
28386+#define AuSiAttr(_name) { \
28387+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28388+ .show = sysaufs_si_##_name, \
28389+}
28390+
28391+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28392+struct attribute *sysaufs_si_attrs[] = {
28393+ &sysaufs_si_attr_xi_path.attr,
28394+ NULL,
28395+};
28396+
4a4d8108 28397+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 28398+ .show = sysaufs_si_show
28399+};
28400+
28401+static struct kobj_type au_sbi_ktype = {
28402+ .release = au_si_free,
28403+ .sysfs_ops = &au_sbi_ops,
28404+ .default_attrs = sysaufs_si_attrs
28405+};
28406+
28407+/* ---------------------------------------------------------------------- */
28408+
28409+int sysaufs_si_init(struct au_sbinfo *sbinfo)
28410+{
28411+ int err;
28412+
e49829fe 28413+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 28414+ /* cf. sysaufs_name() */
28415+ err = kobject_init_and_add
e49829fe 28416+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 28417+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
28418+
28419+ dbgaufs_si_null(sbinfo);
28420+ if (!err) {
28421+ err = dbgaufs_si_init(sbinfo);
28422+ if (unlikely(err))
28423+ kobject_put(&sbinfo->si_kobj);
28424+ }
28425+ return err;
28426+}
28427+
28428+void sysaufs_fin(void)
28429+{
28430+ dbgaufs_fin();
e49829fe
JR
28431+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
28432+ kset_unregister(sysaufs_kset);
1facf9fc 28433+}
28434+
28435+int __init sysaufs_init(void)
28436+{
28437+ int err;
28438+
28439+ do {
28440+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
28441+ } while (!sysaufs_si_mask);
28442+
4a4d8108 28443+ err = -EINVAL;
e49829fe
JR
28444+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
28445+ if (unlikely(!sysaufs_kset))
4a4d8108 28446+ goto out;
e49829fe
JR
28447+ err = PTR_ERR(sysaufs_kset);
28448+ if (IS_ERR(sysaufs_kset))
1facf9fc 28449+ goto out;
e49829fe 28450+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 28451+ if (unlikely(err)) {
e49829fe 28452+ kset_unregister(sysaufs_kset);
1facf9fc 28453+ goto out;
28454+ }
28455+
28456+ err = dbgaufs_init();
28457+ if (unlikely(err))
28458+ sysaufs_fin();
4f0767ce 28459+out:
1facf9fc 28460+ return err;
28461+}
7f207e10
AM
28462diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
28463--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 28464+++ linux/fs/aufs/sysaufs.h 2015-09-24 10:47:58.254719746 +0200
c1595e42 28465@@ -0,0 +1,101 @@
1facf9fc 28466+/*
2000de60 28467+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28468+ *
28469+ * This program, aufs is free software; you can redistribute it and/or modify
28470+ * it under the terms of the GNU General Public License as published by
28471+ * the Free Software Foundation; either version 2 of the License, or
28472+ * (at your option) any later version.
dece6358
AM
28473+ *
28474+ * This program is distributed in the hope that it will be useful,
28475+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28476+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28477+ * GNU General Public License for more details.
28478+ *
28479+ * You should have received a copy of the GNU General Public License
523b37e3 28480+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28481+ */
28482+
28483+/*
28484+ * sysfs interface and mount lifetime management
28485+ */
28486+
28487+#ifndef __SYSAUFS_H__
28488+#define __SYSAUFS_H__
28489+
28490+#ifdef __KERNEL__
28491+
1facf9fc 28492+#include <linux/sysfs.h>
1facf9fc 28493+#include "module.h"
28494+
dece6358
AM
28495+struct super_block;
28496+struct au_sbinfo;
28497+
1facf9fc 28498+struct sysaufs_si_attr {
28499+ struct attribute attr;
28500+ int (*show)(struct seq_file *seq, struct super_block *sb);
28501+};
28502+
28503+/* ---------------------------------------------------------------------- */
28504+
28505+/* sysaufs.c */
28506+extern unsigned long sysaufs_si_mask;
e49829fe 28507+extern struct kset *sysaufs_kset;
1facf9fc 28508+extern struct attribute *sysaufs_si_attrs[];
28509+int sysaufs_si_init(struct au_sbinfo *sbinfo);
28510+int __init sysaufs_init(void);
28511+void sysaufs_fin(void);
28512+
28513+/* ---------------------------------------------------------------------- */
28514+
28515+/* some people doesn't like to show a pointer in kernel */
28516+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
28517+{
28518+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
28519+}
28520+
28521+#define SysaufsSiNamePrefix "si_"
28522+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
28523+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
28524+{
28525+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
28526+ sysaufs_si_id(sbinfo));
28527+}
28528+
28529+struct au_branch;
28530+#ifdef CONFIG_SYSFS
28531+/* sysfs.c */
28532+extern struct attribute_group *sysaufs_attr_group;
28533+
28534+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
28535+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
28536+ char *buf);
076b876e
AM
28537+long au_brinfo_ioctl(struct file *file, unsigned long arg);
28538+#ifdef CONFIG_COMPAT
28539+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
28540+#endif
1facf9fc 28541+
28542+void sysaufs_br_init(struct au_branch *br);
28543+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
28544+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
28545+
28546+#define sysaufs_brs_init() do {} while (0)
28547+
28548+#else
28549+#define sysaufs_attr_group NULL
28550+
4a4d8108 28551+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
28552+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
28553+ struct attribute *attr, char *buf)
4a4d8108
AM
28554+AuStubVoid(sysaufs_br_init, struct au_branch *br)
28555+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
28556+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 28557+
28558+static inline void sysaufs_brs_init(void)
28559+{
28560+ sysaufs_brs = 0;
28561+}
28562+
28563+#endif /* CONFIG_SYSFS */
28564+
28565+#endif /* __KERNEL__ */
28566+#endif /* __SYSAUFS_H__ */
7f207e10
AM
28567diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
28568--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
28569+++ linux/fs/aufs/sysfs.c 2015-12-10 18:46:31.223310574 +0100
28570@@ -0,0 +1,376 @@
1facf9fc 28571+/*
2000de60 28572+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28573+ *
28574+ * This program, aufs is free software; you can redistribute it and/or modify
28575+ * it under the terms of the GNU General Public License as published by
28576+ * the Free Software Foundation; either version 2 of the License, or
28577+ * (at your option) any later version.
dece6358
AM
28578+ *
28579+ * This program is distributed in the hope that it will be useful,
28580+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28581+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28582+ * GNU General Public License for more details.
28583+ *
28584+ * You should have received a copy of the GNU General Public License
523b37e3 28585+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28586+ */
28587+
28588+/*
28589+ * sysfs interface
28590+ */
28591+
076b876e 28592+#include <linux/compat.h>
1facf9fc 28593+#include <linux/seq_file.h>
1facf9fc 28594+#include "aufs.h"
28595+
4a4d8108
AM
28596+#ifdef CONFIG_AUFS_FS_MODULE
28597+/* this entry violates the "one line per file" policy of sysfs */
28598+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
28599+ char *buf)
28600+{
28601+ ssize_t err;
28602+ static char *conf =
28603+/* this file is generated at compiling */
28604+#include "conf.str"
28605+ ;
28606+
28607+ err = snprintf(buf, PAGE_SIZE, conf);
28608+ if (unlikely(err >= PAGE_SIZE))
28609+ err = -EFBIG;
28610+ return err;
28611+}
28612+
28613+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
28614+#endif
28615+
1facf9fc 28616+static struct attribute *au_attr[] = {
4a4d8108
AM
28617+#ifdef CONFIG_AUFS_FS_MODULE
28618+ &au_config_attr.attr,
28619+#endif
1facf9fc 28620+ NULL, /* need to NULL terminate the list of attributes */
28621+};
28622+
28623+static struct attribute_group sysaufs_attr_group_body = {
28624+ .attrs = au_attr
28625+};
28626+
28627+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
28628+
28629+/* ---------------------------------------------------------------------- */
28630+
28631+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
28632+{
28633+ int err;
28634+
dece6358
AM
28635+ SiMustAnyLock(sb);
28636+
1facf9fc 28637+ err = 0;
28638+ if (au_opt_test(au_mntflags(sb), XINO)) {
28639+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
28640+ seq_putc(seq, '\n');
28641+ }
28642+ return err;
28643+}
28644+
28645+/*
28646+ * the lifetime of branch is independent from the entry under sysfs.
28647+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
28648+ * unlinked.
28649+ */
28650+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 28651+ aufs_bindex_t bindex, int idx)
1facf9fc 28652+{
1e00d052 28653+ int err;
1facf9fc 28654+ struct path path;
28655+ struct dentry *root;
28656+ struct au_branch *br;
076b876e 28657+ au_br_perm_str_t perm;
1facf9fc 28658+
28659+ AuDbg("b%d\n", bindex);
28660+
1e00d052 28661+ err = 0;
1facf9fc 28662+ root = sb->s_root;
28663+ di_read_lock_parent(root, !AuLock_IR);
28664+ br = au_sbr(sb, bindex);
392086de
AM
28665+
28666+ switch (idx) {
28667+ case AuBrSysfs_BR:
28668+ path.mnt = au_br_mnt(br);
28669+ path.dentry = au_h_dptr(root, bindex);
ab036dbd
AM
28670+ err = au_seq_path(seq, &path);
28671+ if (!err) {
28672+ au_optstr_br_perm(&perm, br->br_perm);
28673+ err = seq_printf(seq, "=%s\n", perm.a);
28674+ }
392086de
AM
28675+ break;
28676+ case AuBrSysfs_BRID:
28677+ err = seq_printf(seq, "%d\n", br->br_id);
392086de
AM
28678+ break;
28679+ }
076b876e
AM
28680+ di_read_unlock(root, !AuLock_IR);
28681+ if (err == -1)
28682+ err = -E2BIG;
392086de 28683+
1e00d052 28684+ return err;
1facf9fc 28685+}
28686+
28687+/* ---------------------------------------------------------------------- */
28688+
28689+static struct seq_file *au_seq(char *p, ssize_t len)
28690+{
28691+ struct seq_file *seq;
28692+
28693+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
28694+ if (seq) {
28695+ /* mutex_init(&seq.lock); */
28696+ seq->buf = p;
28697+ seq->size = len;
28698+ return seq; /* success */
28699+ }
28700+
28701+ seq = ERR_PTR(-ENOMEM);
28702+ return seq;
28703+}
28704+
392086de
AM
28705+#define SysaufsBr_PREFIX "br"
28706+#define SysaufsBrid_PREFIX "brid"
1facf9fc 28707+
28708+/* todo: file size may exceed PAGE_SIZE */
28709+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 28710+ char *buf)
1facf9fc 28711+{
28712+ ssize_t err;
392086de 28713+ int idx;
1facf9fc 28714+ long l;
28715+ aufs_bindex_t bend;
28716+ struct au_sbinfo *sbinfo;
28717+ struct super_block *sb;
28718+ struct seq_file *seq;
28719+ char *name;
28720+ struct attribute **cattr;
28721+
28722+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
28723+ sb = sbinfo->si_sb;
1308ab2a 28724+
28725+ /*
28726+ * prevent a race condition between sysfs and aufs.
28727+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
28728+ * prohibits maintaining the sysfs entries.
28729+ * hew we acquire read lock after sysfs_get_active_two().
28730+ * on the other hand, the remount process may maintain the sysfs/aufs
28731+ * entries after acquiring write lock.
28732+ * it can cause a deadlock.
28733+ * simply we gave up processing read here.
28734+ */
28735+ err = -EBUSY;
28736+ if (unlikely(!si_noflush_read_trylock(sb)))
28737+ goto out;
1facf9fc 28738+
28739+ seq = au_seq(buf, PAGE_SIZE);
28740+ err = PTR_ERR(seq);
28741+ if (IS_ERR(seq))
1308ab2a 28742+ goto out_unlock;
1facf9fc 28743+
28744+ name = (void *)attr->name;
28745+ cattr = sysaufs_si_attrs;
28746+ while (*cattr) {
28747+ if (!strcmp(name, (*cattr)->name)) {
28748+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
28749+ ->show(seq, sb);
28750+ goto out_seq;
28751+ }
28752+ cattr++;
28753+ }
28754+
392086de
AM
28755+ if (!strncmp(name, SysaufsBrid_PREFIX,
28756+ sizeof(SysaufsBrid_PREFIX) - 1)) {
28757+ idx = AuBrSysfs_BRID;
28758+ name += sizeof(SysaufsBrid_PREFIX) - 1;
28759+ } else if (!strncmp(name, SysaufsBr_PREFIX,
28760+ sizeof(SysaufsBr_PREFIX) - 1)) {
28761+ idx = AuBrSysfs_BR;
1facf9fc 28762+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
28763+ } else
28764+ BUG();
28765+
28766+ err = kstrtol(name, 10, &l);
28767+ if (!err) {
28768+ bend = au_sbend(sb);
28769+ if (l <= bend)
28770+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
28771+ else
28772+ err = -ENOENT;
1facf9fc 28773+ }
1facf9fc 28774+
4f0767ce 28775+out_seq:
1facf9fc 28776+ if (!err) {
28777+ err = seq->count;
28778+ /* sysfs limit */
28779+ if (unlikely(err == PAGE_SIZE))
28780+ err = -EFBIG;
28781+ }
28782+ kfree(seq);
4f0767ce 28783+out_unlock:
1facf9fc 28784+ si_read_unlock(sb);
4f0767ce 28785+out:
1facf9fc 28786+ return err;
28787+}
28788+
28789+/* ---------------------------------------------------------------------- */
28790+
076b876e
AM
28791+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
28792+{
28793+ int err;
28794+ int16_t brid;
28795+ aufs_bindex_t bindex, bend;
28796+ size_t sz;
28797+ char *buf;
28798+ struct seq_file *seq;
28799+ struct au_branch *br;
28800+
28801+ si_read_lock(sb, AuLock_FLUSH);
28802+ bend = au_sbend(sb);
28803+ err = bend + 1;
28804+ if (!arg)
28805+ goto out;
28806+
28807+ err = -ENOMEM;
28808+ buf = (void *)__get_free_page(GFP_NOFS);
28809+ if (unlikely(!buf))
28810+ goto out;
28811+
28812+ seq = au_seq(buf, PAGE_SIZE);
28813+ err = PTR_ERR(seq);
28814+ if (IS_ERR(seq))
28815+ goto out_buf;
28816+
28817+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
28818+ for (bindex = 0; bindex <= bend; bindex++, arg++) {
28819+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
28820+ if (unlikely(err))
28821+ break;
28822+
28823+ br = au_sbr(sb, bindex);
28824+ brid = br->br_id;
28825+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
28826+ err = __put_user(brid, &arg->id);
28827+ if (unlikely(err))
28828+ break;
28829+
28830+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
28831+ err = __put_user(br->br_perm, &arg->perm);
28832+ if (unlikely(err))
28833+ break;
28834+
ab036dbd
AM
28835+ err = au_seq_path(seq, &br->br_path);
28836+ if (unlikely(err))
28837+ break;
076b876e
AM
28838+ err = seq_putc(seq, '\0');
28839+ if (!err && seq->count <= sz) {
28840+ err = copy_to_user(arg->path, seq->buf, seq->count);
28841+ seq->count = 0;
28842+ if (unlikely(err))
28843+ break;
28844+ } else {
28845+ err = -E2BIG;
28846+ goto out_seq;
28847+ }
28848+ }
28849+ if (unlikely(err))
28850+ err = -EFAULT;
28851+
28852+out_seq:
28853+ kfree(seq);
28854+out_buf:
28855+ free_page((unsigned long)buf);
28856+out:
28857+ si_read_unlock(sb);
28858+ return err;
28859+}
28860+
28861+long au_brinfo_ioctl(struct file *file, unsigned long arg)
28862+{
2000de60 28863+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
076b876e
AM
28864+}
28865+
28866+#ifdef CONFIG_COMPAT
28867+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
28868+{
2000de60 28869+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
076b876e
AM
28870+}
28871+#endif
28872+
28873+/* ---------------------------------------------------------------------- */
28874+
1facf9fc 28875+void sysaufs_br_init(struct au_branch *br)
28876+{
392086de
AM
28877+ int i;
28878+ struct au_brsysfs *br_sysfs;
28879+ struct attribute *attr;
4a4d8108 28880+
392086de
AM
28881+ br_sysfs = br->br_sysfs;
28882+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
28883+ attr = &br_sysfs->attr;
28884+ sysfs_attr_init(attr);
28885+ attr->name = br_sysfs->name;
28886+ attr->mode = S_IRUGO;
28887+ br_sysfs++;
28888+ }
1facf9fc 28889+}
28890+
28891+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
28892+{
28893+ struct au_branch *br;
28894+ struct kobject *kobj;
392086de
AM
28895+ struct au_brsysfs *br_sysfs;
28896+ int i;
1facf9fc 28897+ aufs_bindex_t bend;
28898+
28899+ dbgaufs_brs_del(sb, bindex);
28900+
28901+ if (!sysaufs_brs)
28902+ return;
28903+
28904+ kobj = &au_sbi(sb)->si_kobj;
28905+ bend = au_sbend(sb);
28906+ for (; bindex <= bend; bindex++) {
28907+ br = au_sbr(sb, bindex);
392086de
AM
28908+ br_sysfs = br->br_sysfs;
28909+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
28910+ sysfs_remove_file(kobj, &br_sysfs->attr);
28911+ br_sysfs++;
28912+ }
1facf9fc 28913+ }
28914+}
28915+
28916+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
28917+{
392086de 28918+ int err, i;
1facf9fc 28919+ aufs_bindex_t bend;
28920+ struct kobject *kobj;
28921+ struct au_branch *br;
392086de 28922+ struct au_brsysfs *br_sysfs;
1facf9fc 28923+
28924+ dbgaufs_brs_add(sb, bindex);
28925+
28926+ if (!sysaufs_brs)
28927+ return;
28928+
28929+ kobj = &au_sbi(sb)->si_kobj;
28930+ bend = au_sbend(sb);
28931+ for (; bindex <= bend; bindex++) {
28932+ br = au_sbr(sb, bindex);
392086de
AM
28933+ br_sysfs = br->br_sysfs;
28934+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
28935+ SysaufsBr_PREFIX "%d", bindex);
28936+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
28937+ SysaufsBrid_PREFIX "%d", bindex);
28938+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
28939+ err = sysfs_create_file(kobj, &br_sysfs->attr);
28940+ if (unlikely(err))
28941+ pr_warn("failed %s under sysfs(%d)\n",
28942+ br_sysfs->name, err);
28943+ br_sysfs++;
28944+ }
1facf9fc 28945+ }
28946+}
7f207e10
AM
28947diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
28948--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 28949+++ linux/fs/aufs/sysrq.c 2015-12-10 18:46:31.223310574 +0100
076b876e 28950@@ -0,0 +1,157 @@
1facf9fc 28951+/*
2000de60 28952+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 28953+ *
28954+ * This program, aufs is free software; you can redistribute it and/or modify
28955+ * it under the terms of the GNU General Public License as published by
28956+ * the Free Software Foundation; either version 2 of the License, or
28957+ * (at your option) any later version.
dece6358
AM
28958+ *
28959+ * This program is distributed in the hope that it will be useful,
28960+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28961+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28962+ * GNU General Public License for more details.
28963+ *
28964+ * You should have received a copy of the GNU General Public License
523b37e3 28965+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28966+ */
28967+
28968+/*
28969+ * magic sysrq hanlder
28970+ */
28971+
1facf9fc 28972+/* #include <linux/sysrq.h> */
027c5e7a 28973+#include <linux/writeback.h>
1facf9fc 28974+#include "aufs.h"
28975+
28976+/* ---------------------------------------------------------------------- */
28977+
28978+static void sysrq_sb(struct super_block *sb)
28979+{
28980+ char *plevel;
28981+ struct au_sbinfo *sbinfo;
28982+ struct file *file;
523b37e3
AM
28983+ struct au_sphlhead *files;
28984+ struct au_finfo *finfo;
1facf9fc 28985+
28986+ plevel = au_plevel;
28987+ au_plevel = KERN_WARNING;
1facf9fc 28988+
4a4d8108 28989+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
28990+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
28991+
28992+ sbinfo = au_sbi(sb);
4a4d8108 28993+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 28994+ pr("superblock\n");
1facf9fc 28995+ au_dpri_sb(sb);
027c5e7a
AM
28996+
28997+#if 0
c06a8ce3 28998+ pr("root dentry\n");
1facf9fc 28999+ au_dpri_dentry(sb->s_root);
c06a8ce3 29000+ pr("root inode\n");
5527c038 29001+ au_dpri_inode(d_inode(sb->s_root));
027c5e7a
AM
29002+#endif
29003+
1facf9fc 29004+#if 0
027c5e7a
AM
29005+ do {
29006+ int err, i, j, ndentry;
29007+ struct au_dcsub_pages dpages;
29008+ struct au_dpage *dpage;
29009+
29010+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29011+ if (unlikely(err))
29012+ break;
29013+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29014+ if (!err)
29015+ for (i = 0; i < dpages.ndpage; i++) {
29016+ dpage = dpages.dpages + i;
29017+ ndentry = dpage->ndentry;
29018+ for (j = 0; j < ndentry; j++)
29019+ au_dpri_dentry(dpage->dentries[j]);
29020+ }
29021+ au_dpages_free(&dpages);
29022+ } while (0);
29023+#endif
29024+
29025+#if 1
29026+ {
29027+ struct inode *i;
076b876e 29028+
c06a8ce3 29029+ pr("isolated inode\n");
2cbb1c4b
JR
29030+ spin_lock(&inode_sb_list_lock);
29031+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29032+ spin_lock(&i->i_lock);
b4510431 29033+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 29034+ au_dpri_inode(i);
2cbb1c4b
JR
29035+ spin_unlock(&i->i_lock);
29036+ }
29037+ spin_unlock(&inode_sb_list_lock);
027c5e7a 29038+ }
1facf9fc 29039+#endif
c06a8ce3 29040+ pr("files\n");
523b37e3
AM
29041+ files = &au_sbi(sb)->si_files;
29042+ spin_lock(&files->spin);
29043+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
4a4d8108 29044+ umode_t mode;
076b876e 29045+
523b37e3 29046+ file = finfo->fi_file;
c06a8ce3 29047+ mode = file_inode(file)->i_mode;
38d290e6 29048+ if (!special_file(mode))
1facf9fc 29049+ au_dpri_file(file);
523b37e3
AM
29050+ }
29051+ spin_unlock(&files->spin);
c06a8ce3 29052+ pr("done\n");
1facf9fc 29053+
c06a8ce3 29054+#undef pr
1facf9fc 29055+ au_plevel = plevel;
1facf9fc 29056+}
29057+
29058+/* ---------------------------------------------------------------------- */
29059+
29060+/* module parameter */
29061+static char *aufs_sysrq_key = "a";
29062+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29063+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29064+
0c5527e5 29065+static void au_sysrq(int key __maybe_unused)
1facf9fc 29066+{
1facf9fc 29067+ struct au_sbinfo *sbinfo;
29068+
027c5e7a 29069+ lockdep_off();
53392da6 29070+ au_sbilist_lock();
e49829fe 29071+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 29072+ sysrq_sb(sbinfo->si_sb);
53392da6 29073+ au_sbilist_unlock();
027c5e7a 29074+ lockdep_on();
1facf9fc 29075+}
29076+
29077+static struct sysrq_key_op au_sysrq_op = {
29078+ .handler = au_sysrq,
29079+ .help_msg = "Aufs",
29080+ .action_msg = "Aufs",
29081+ .enable_mask = SYSRQ_ENABLE_DUMP
29082+};
29083+
29084+/* ---------------------------------------------------------------------- */
29085+
29086+int __init au_sysrq_init(void)
29087+{
29088+ int err;
29089+ char key;
29090+
29091+ err = -1;
29092+ key = *aufs_sysrq_key;
29093+ if ('a' <= key && key <= 'z')
29094+ err = register_sysrq_key(key, &au_sysrq_op);
29095+ if (unlikely(err))
4a4d8108 29096+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 29097+ return err;
29098+}
29099+
29100+void au_sysrq_fin(void)
29101+{
29102+ int err;
076b876e 29103+
1facf9fc 29104+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29105+ if (unlikely(err))
4a4d8108 29106+ pr_err("err %d (ignored)\n", err);
1facf9fc 29107+}
7f207e10
AM
29108diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29109--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 29110+++ linux/fs/aufs/vdir.c 2015-11-11 17:21:46.922197217 +0100
b912730e 29111@@ -0,0 +1,888 @@
1facf9fc 29112+/*
2000de60 29113+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 29114+ *
29115+ * This program, aufs is free software; you can redistribute it and/or modify
29116+ * it under the terms of the GNU General Public License as published by
29117+ * the Free Software Foundation; either version 2 of the License, or
29118+ * (at your option) any later version.
dece6358
AM
29119+ *
29120+ * This program is distributed in the hope that it will be useful,
29121+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29122+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29123+ * GNU General Public License for more details.
29124+ *
29125+ * You should have received a copy of the GNU General Public License
523b37e3 29126+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29127+ */
29128+
29129+/*
29130+ * virtual or vertical directory
29131+ */
29132+
29133+#include "aufs.h"
29134+
dece6358 29135+static unsigned int calc_size(int nlen)
1facf9fc 29136+{
dece6358 29137+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 29138+}
29139+
29140+static int set_deblk_end(union au_vdir_deblk_p *p,
29141+ union au_vdir_deblk_p *deblk_end)
29142+{
29143+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29144+ p->de->de_str.len = 0;
29145+ /* smp_mb(); */
29146+ return 0;
29147+ }
29148+ return -1; /* error */
29149+}
29150+
29151+/* returns true or false */
29152+static int is_deblk_end(union au_vdir_deblk_p *p,
29153+ union au_vdir_deblk_p *deblk_end)
29154+{
29155+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29156+ return !p->de->de_str.len;
29157+ return 1;
29158+}
29159+
29160+static unsigned char *last_deblk(struct au_vdir *vdir)
29161+{
29162+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29163+}
29164+
29165+/* ---------------------------------------------------------------------- */
29166+
ab036dbd 29167+/* estimate the appropriate size for name hash table */
1308ab2a 29168+unsigned int au_rdhash_est(loff_t sz)
29169+{
29170+ unsigned int n;
29171+
29172+ n = UINT_MAX;
29173+ sz >>= 10;
29174+ if (sz < n)
29175+ n = sz;
29176+ if (sz < AUFS_RDHASH_DEF)
29177+ n = AUFS_RDHASH_DEF;
4a4d8108 29178+ /* pr_info("n %u\n", n); */
1308ab2a 29179+ return n;
29180+}
29181+
1facf9fc 29182+/*
29183+ * the allocated memory has to be freed by
dece6358 29184+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 29185+ */
dece6358 29186+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 29187+{
1facf9fc 29188+ struct hlist_head *head;
dece6358 29189+ unsigned int u;
076b876e 29190+ size_t sz;
1facf9fc 29191+
076b876e
AM
29192+ sz = sizeof(*nhash->nh_head) * num_hash;
29193+ head = kmalloc(sz, gfp);
dece6358
AM
29194+ if (head) {
29195+ nhash->nh_num = num_hash;
29196+ nhash->nh_head = head;
29197+ for (u = 0; u < num_hash; u++)
1facf9fc 29198+ INIT_HLIST_HEAD(head++);
dece6358 29199+ return 0; /* success */
1facf9fc 29200+ }
1facf9fc 29201+
dece6358 29202+ return -ENOMEM;
1facf9fc 29203+}
29204+
dece6358
AM
29205+static void nhash_count(struct hlist_head *head)
29206+{
29207+#if 0
29208+ unsigned long n;
29209+ struct hlist_node *pos;
29210+
29211+ n = 0;
29212+ hlist_for_each(pos, head)
29213+ n++;
4a4d8108 29214+ pr_info("%lu\n", n);
dece6358
AM
29215+#endif
29216+}
29217+
29218+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 29219+{
c06a8ce3
AM
29220+ struct au_vdir_wh *pos;
29221+ struct hlist_node *node;
1facf9fc 29222+
c06a8ce3
AM
29223+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
29224+ kfree(pos);
1facf9fc 29225+}
29226+
dece6358 29227+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 29228+{
c06a8ce3
AM
29229+ struct au_vdir_dehstr *pos;
29230+ struct hlist_node *node;
1facf9fc 29231+
c06a8ce3
AM
29232+ hlist_for_each_entry_safe(pos, node, head, hash)
29233+ au_cache_free_vdir_dehstr(pos);
1facf9fc 29234+}
29235+
dece6358
AM
29236+static void au_nhash_do_free(struct au_nhash *nhash,
29237+ void (*free)(struct hlist_head *head))
1facf9fc 29238+{
1308ab2a 29239+ unsigned int n;
1facf9fc 29240+ struct hlist_head *head;
1facf9fc 29241+
dece6358 29242+ n = nhash->nh_num;
1308ab2a 29243+ if (!n)
29244+ return;
29245+
dece6358 29246+ head = nhash->nh_head;
1308ab2a 29247+ while (n-- > 0) {
dece6358
AM
29248+ nhash_count(head);
29249+ free(head++);
1facf9fc 29250+ }
dece6358 29251+ kfree(nhash->nh_head);
1facf9fc 29252+}
29253+
dece6358 29254+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 29255+{
dece6358
AM
29256+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29257+}
1facf9fc 29258+
dece6358
AM
29259+static void au_nhash_de_free(struct au_nhash *delist)
29260+{
29261+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 29262+}
29263+
29264+/* ---------------------------------------------------------------------- */
29265+
29266+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29267+ int limit)
29268+{
29269+ int num;
29270+ unsigned int u, n;
29271+ struct hlist_head *head;
c06a8ce3 29272+ struct au_vdir_wh *pos;
1facf9fc 29273+
29274+ num = 0;
29275+ n = whlist->nh_num;
29276+ head = whlist->nh_head;
1308ab2a 29277+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
29278+ hlist_for_each_entry(pos, head, wh_hash)
29279+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 29280+ return 1;
1facf9fc 29281+ return 0;
29282+}
29283+
29284+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 29285+ unsigned char *name,
1facf9fc 29286+ unsigned int len)
29287+{
dece6358
AM
29288+ unsigned int v;
29289+ /* const unsigned int magic_bit = 12; */
29290+
1308ab2a 29291+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29292+
dece6358
AM
29293+ v = 0;
29294+ while (len--)
29295+ v += *name++;
29296+ /* v = hash_long(v, magic_bit); */
29297+ v %= nhash->nh_num;
29298+ return nhash->nh_head + v;
29299+}
29300+
29301+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29302+ int nlen)
29303+{
29304+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 29305+}
29306+
29307+/* returns found or not */
dece6358 29308+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 29309+{
29310+ struct hlist_head *head;
c06a8ce3 29311+ struct au_vdir_wh *pos;
1facf9fc 29312+ struct au_vdir_destr *str;
29313+
dece6358 29314+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
29315+ hlist_for_each_entry(pos, head, wh_hash) {
29316+ str = &pos->wh_str;
1facf9fc 29317+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
29318+ if (au_nhash_test_name(str, name, nlen))
29319+ return 1;
29320+ }
29321+ return 0;
29322+}
29323+
29324+/* returns found(true) or not */
29325+static int test_known(struct au_nhash *delist, char *name, int nlen)
29326+{
29327+ struct hlist_head *head;
c06a8ce3 29328+ struct au_vdir_dehstr *pos;
dece6358
AM
29329+ struct au_vdir_destr *str;
29330+
29331+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
29332+ hlist_for_each_entry(pos, head, hash) {
29333+ str = pos->str;
dece6358
AM
29334+ AuDbg("%.*s\n", str->len, str->name);
29335+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 29336+ return 1;
29337+ }
29338+ return 0;
29339+}
29340+
dece6358
AM
29341+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29342+ unsigned char d_type)
29343+{
29344+#ifdef CONFIG_AUFS_SHWH
29345+ wh->wh_ino = ino;
29346+ wh->wh_type = d_type;
29347+#endif
29348+}
29349+
29350+/* ---------------------------------------------------------------------- */
29351+
29352+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29353+ unsigned int d_type, aufs_bindex_t bindex,
29354+ unsigned char shwh)
1facf9fc 29355+{
29356+ int err;
29357+ struct au_vdir_destr *str;
29358+ struct au_vdir_wh *wh;
29359+
dece6358 29360+ AuDbg("%.*s\n", nlen, name);
1308ab2a 29361+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29362+
1facf9fc 29363+ err = -ENOMEM;
dece6358 29364+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 29365+ if (unlikely(!wh))
29366+ goto out;
29367+
29368+ err = 0;
29369+ wh->wh_bindex = bindex;
dece6358
AM
29370+ if (shwh)
29371+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 29372+ str = &wh->wh_str;
dece6358
AM
29373+ str->len = nlen;
29374+ memcpy(str->name, name, nlen);
29375+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 29376+ /* smp_mb(); */
29377+
4f0767ce 29378+out:
1facf9fc 29379+ return err;
29380+}
29381+
1facf9fc 29382+static int append_deblk(struct au_vdir *vdir)
29383+{
29384+ int err;
dece6358 29385+ unsigned long ul;
1facf9fc 29386+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29387+ union au_vdir_deblk_p p, deblk_end;
29388+ unsigned char **o;
29389+
29390+ err = -ENOMEM;
dece6358
AM
29391+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29392+ GFP_NOFS);
1facf9fc 29393+ if (unlikely(!o))
29394+ goto out;
29395+
29396+ vdir->vd_deblk = o;
29397+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29398+ if (p.deblk) {
29399+ ul = vdir->vd_nblk++;
29400+ vdir->vd_deblk[ul] = p.deblk;
29401+ vdir->vd_last.ul = ul;
29402+ vdir->vd_last.p.deblk = p.deblk;
29403+ deblk_end.deblk = p.deblk + deblk_sz;
29404+ err = set_deblk_end(&p, &deblk_end);
29405+ }
29406+
4f0767ce 29407+out:
1facf9fc 29408+ return err;
29409+}
29410+
dece6358
AM
29411+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
29412+ unsigned int d_type, struct au_nhash *delist)
29413+{
29414+ int err;
29415+ unsigned int sz;
29416+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29417+ union au_vdir_deblk_p p, *room, deblk_end;
29418+ struct au_vdir_dehstr *dehstr;
29419+
29420+ p.deblk = last_deblk(vdir);
29421+ deblk_end.deblk = p.deblk + deblk_sz;
29422+ room = &vdir->vd_last.p;
29423+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
29424+ || !is_deblk_end(room, &deblk_end));
29425+
29426+ sz = calc_size(nlen);
29427+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
29428+ err = append_deblk(vdir);
29429+ if (unlikely(err))
29430+ goto out;
29431+
29432+ p.deblk = last_deblk(vdir);
29433+ deblk_end.deblk = p.deblk + deblk_sz;
29434+ /* smp_mb(); */
29435+ AuDebugOn(room->deblk != p.deblk);
29436+ }
29437+
29438+ err = -ENOMEM;
4a4d8108 29439+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
29440+ if (unlikely(!dehstr))
29441+ goto out;
29442+
29443+ dehstr->str = &room->de->de_str;
29444+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
29445+ room->de->de_ino = ino;
29446+ room->de->de_type = d_type;
29447+ room->de->de_str.len = nlen;
29448+ memcpy(room->de->de_str.name, name, nlen);
29449+
29450+ err = 0;
29451+ room->deblk += sz;
29452+ if (unlikely(set_deblk_end(room, &deblk_end)))
29453+ err = append_deblk(vdir);
29454+ /* smp_mb(); */
29455+
4f0767ce 29456+out:
dece6358
AM
29457+ return err;
29458+}
29459+
29460+/* ---------------------------------------------------------------------- */
29461+
29462+void au_vdir_free(struct au_vdir *vdir)
29463+{
29464+ unsigned char **deblk;
29465+
29466+ deblk = vdir->vd_deblk;
29467+ while (vdir->vd_nblk--)
29468+ kfree(*deblk++);
29469+ kfree(vdir->vd_deblk);
29470+ au_cache_free_vdir(vdir);
29471+}
29472+
1308ab2a 29473+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 29474+{
29475+ struct au_vdir *vdir;
1308ab2a 29476+ struct super_block *sb;
1facf9fc 29477+ int err;
29478+
2000de60 29479+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29480+ SiMustAnyLock(sb);
29481+
1facf9fc 29482+ err = -ENOMEM;
29483+ vdir = au_cache_alloc_vdir();
29484+ if (unlikely(!vdir))
29485+ goto out;
29486+
29487+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
29488+ if (unlikely(!vdir->vd_deblk))
29489+ goto out_free;
29490+
29491+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 29492+ if (!vdir->vd_deblk_sz) {
ab036dbd 29493+ /* estimate the appropriate size for deblk */
1308ab2a 29494+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 29495+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 29496+ }
1facf9fc 29497+ vdir->vd_nblk = 0;
29498+ vdir->vd_version = 0;
29499+ vdir->vd_jiffy = 0;
29500+ err = append_deblk(vdir);
29501+ if (!err)
29502+ return vdir; /* success */
29503+
29504+ kfree(vdir->vd_deblk);
29505+
4f0767ce 29506+out_free:
1facf9fc 29507+ au_cache_free_vdir(vdir);
4f0767ce 29508+out:
1facf9fc 29509+ vdir = ERR_PTR(err);
29510+ return vdir;
29511+}
29512+
29513+static int reinit_vdir(struct au_vdir *vdir)
29514+{
29515+ int err;
29516+ union au_vdir_deblk_p p, deblk_end;
29517+
29518+ while (vdir->vd_nblk > 1) {
29519+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
29520+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
29521+ vdir->vd_nblk--;
29522+ }
29523+ p.deblk = vdir->vd_deblk[0];
29524+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
29525+ err = set_deblk_end(&p, &deblk_end);
29526+ /* keep vd_dblk_sz */
29527+ vdir->vd_last.ul = 0;
29528+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29529+ vdir->vd_version = 0;
29530+ vdir->vd_jiffy = 0;
29531+ /* smp_mb(); */
29532+ return err;
29533+}
29534+
29535+/* ---------------------------------------------------------------------- */
29536+
1facf9fc 29537+#define AuFillVdir_CALLED 1
29538+#define AuFillVdir_WHABLE (1 << 1)
dece6358 29539+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 29540+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
29541+#define au_fset_fillvdir(flags, name) \
29542+ do { (flags) |= AuFillVdir_##name; } while (0)
29543+#define au_fclr_fillvdir(flags, name) \
29544+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 29545+
dece6358
AM
29546+#ifndef CONFIG_AUFS_SHWH
29547+#undef AuFillVdir_SHWH
29548+#define AuFillVdir_SHWH 0
29549+#endif
29550+
1facf9fc 29551+struct fillvdir_arg {
392086de 29552+ struct dir_context ctx;
1facf9fc 29553+ struct file *file;
29554+ struct au_vdir *vdir;
dece6358
AM
29555+ struct au_nhash delist;
29556+ struct au_nhash whlist;
1facf9fc 29557+ aufs_bindex_t bindex;
29558+ unsigned int flags;
29559+ int err;
29560+};
29561+
392086de 29562+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 29563+ loff_t offset __maybe_unused, u64 h_ino,
29564+ unsigned int d_type)
29565+{
392086de 29566+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 29567+ char *name = (void *)__name;
29568+ struct super_block *sb;
1facf9fc 29569+ ino_t ino;
dece6358 29570+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 29571+
1facf9fc 29572+ arg->err = 0;
2000de60 29573+ sb = arg->file->f_path.dentry->d_sb;
1facf9fc 29574+ au_fset_fillvdir(arg->flags, CALLED);
29575+ /* smp_mb(); */
dece6358 29576+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 29577+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
29578+ if (test_known(&arg->delist, name, nlen)
29579+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
29580+ goto out; /* already exists or whiteouted */
1facf9fc 29581+
dece6358 29582+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
29583+ if (!arg->err) {
29584+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
29585+ d_type = DT_UNKNOWN;
dece6358
AM
29586+ arg->err = append_de(arg->vdir, name, nlen, ino,
29587+ d_type, &arg->delist);
4a4d8108 29588+ }
1facf9fc 29589+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
29590+ name += AUFS_WH_PFX_LEN;
dece6358
AM
29591+ nlen -= AUFS_WH_PFX_LEN;
29592+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
29593+ goto out; /* already whiteouted */
1facf9fc 29594+
dece6358
AM
29595+ if (shwh)
29596+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
29597+ &ino);
4a4d8108
AM
29598+ if (!arg->err) {
29599+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
29600+ d_type = DT_UNKNOWN;
1facf9fc 29601+ arg->err = au_nhash_append_wh
dece6358
AM
29602+ (&arg->whlist, name, nlen, ino, d_type,
29603+ arg->bindex, shwh);
4a4d8108 29604+ }
1facf9fc 29605+ }
29606+
4f0767ce 29607+out:
1facf9fc 29608+ if (!arg->err)
29609+ arg->vdir->vd_jiffy = jiffies;
29610+ /* smp_mb(); */
29611+ AuTraceErr(arg->err);
29612+ return arg->err;
29613+}
29614+
dece6358
AM
29615+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
29616+ struct au_nhash *whlist, struct au_nhash *delist)
29617+{
29618+#ifdef CONFIG_AUFS_SHWH
29619+ int err;
29620+ unsigned int nh, u;
29621+ struct hlist_head *head;
c06a8ce3
AM
29622+ struct au_vdir_wh *pos;
29623+ struct hlist_node *n;
dece6358
AM
29624+ char *p, *o;
29625+ struct au_vdir_destr *destr;
29626+
29627+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
29628+
29629+ err = -ENOMEM;
537831f9 29630+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
29631+ if (unlikely(!p))
29632+ goto out;
29633+
29634+ err = 0;
29635+ nh = whlist->nh_num;
29636+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
29637+ p += AUFS_WH_PFX_LEN;
29638+ for (u = 0; u < nh; u++) {
29639+ head = whlist->nh_head + u;
c06a8ce3
AM
29640+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
29641+ destr = &pos->wh_str;
dece6358
AM
29642+ memcpy(p, destr->name, destr->len);
29643+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 29644+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
29645+ if (unlikely(err))
29646+ break;
29647+ }
29648+ }
29649+
537831f9 29650+ free_page((unsigned long)o);
dece6358 29651+
4f0767ce 29652+out:
dece6358
AM
29653+ AuTraceErr(err);
29654+ return err;
29655+#else
29656+ return 0;
29657+#endif
29658+}
29659+
1facf9fc 29660+static int au_do_read_vdir(struct fillvdir_arg *arg)
29661+{
29662+ int err;
dece6358 29663+ unsigned int rdhash;
1facf9fc 29664+ loff_t offset;
dece6358
AM
29665+ aufs_bindex_t bend, bindex, bstart;
29666+ unsigned char shwh;
1facf9fc 29667+ struct file *hf, *file;
29668+ struct super_block *sb;
29669+
1facf9fc 29670+ file = arg->file;
2000de60 29671+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29672+ SiMustAnyLock(sb);
29673+
29674+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 29675+ if (!rdhash)
29676+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
29677+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
29678+ if (unlikely(err))
1facf9fc 29679+ goto out;
dece6358
AM
29680+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
29681+ if (unlikely(err))
1facf9fc 29682+ goto out_delist;
29683+
29684+ err = 0;
29685+ arg->flags = 0;
dece6358
AM
29686+ shwh = 0;
29687+ if (au_opt_test(au_mntflags(sb), SHWH)) {
29688+ shwh = 1;
29689+ au_fset_fillvdir(arg->flags, SHWH);
29690+ }
29691+ bstart = au_fbstart(file);
4a4d8108 29692+ bend = au_fbend_dir(file);
dece6358 29693+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 29694+ hf = au_hf_dir(file, bindex);
1facf9fc 29695+ if (!hf)
29696+ continue;
29697+
29698+ offset = vfsub_llseek(hf, 0, SEEK_SET);
29699+ err = offset;
29700+ if (unlikely(offset))
29701+ break;
29702+
29703+ arg->bindex = bindex;
29704+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
29705+ if (shwh
29706+ || (bindex != bend
29707+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 29708+ au_fset_fillvdir(arg->flags, WHABLE);
29709+ do {
29710+ arg->err = 0;
29711+ au_fclr_fillvdir(arg->flags, CALLED);
29712+ /* smp_mb(); */
392086de 29713+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 29714+ if (err >= 0)
29715+ err = arg->err;
29716+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
29717+
29718+ /*
29719+ * dir_relax() may be good for concurrency, but aufs should not
29720+ * use it since it will cause a lockdep problem.
29721+ */
1facf9fc 29722+ }
dece6358
AM
29723+
29724+ if (!err && shwh)
29725+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
29726+
29727+ au_nhash_wh_free(&arg->whlist);
1facf9fc 29728+
4f0767ce 29729+out_delist:
dece6358 29730+ au_nhash_de_free(&arg->delist);
4f0767ce 29731+out:
1facf9fc 29732+ return err;
29733+}
29734+
29735+static int read_vdir(struct file *file, int may_read)
29736+{
29737+ int err;
29738+ unsigned long expire;
29739+ unsigned char do_read;
392086de
AM
29740+ struct fillvdir_arg arg = {
29741+ .ctx = {
2000de60 29742+ .actor = fillvdir
392086de
AM
29743+ }
29744+ };
1facf9fc 29745+ struct inode *inode;
29746+ struct au_vdir *vdir, *allocated;
29747+
29748+ err = 0;
c06a8ce3 29749+ inode = file_inode(file);
1facf9fc 29750+ IMustLock(inode);
dece6358
AM
29751+ SiMustAnyLock(inode->i_sb);
29752+
1facf9fc 29753+ allocated = NULL;
29754+ do_read = 0;
29755+ expire = au_sbi(inode->i_sb)->si_rdcache;
29756+ vdir = au_ivdir(inode);
29757+ if (!vdir) {
29758+ do_read = 1;
1308ab2a 29759+ vdir = alloc_vdir(file);
1facf9fc 29760+ err = PTR_ERR(vdir);
29761+ if (IS_ERR(vdir))
29762+ goto out;
29763+ err = 0;
29764+ allocated = vdir;
29765+ } else if (may_read
29766+ && (inode->i_version != vdir->vd_version
29767+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
29768+ do_read = 1;
29769+ err = reinit_vdir(vdir);
29770+ if (unlikely(err))
29771+ goto out;
29772+ }
29773+
29774+ if (!do_read)
29775+ return 0; /* success */
29776+
29777+ arg.file = file;
29778+ arg.vdir = vdir;
29779+ err = au_do_read_vdir(&arg);
29780+ if (!err) {
392086de 29781+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 29782+ vdir->vd_version = inode->i_version;
29783+ vdir->vd_last.ul = 0;
29784+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29785+ if (allocated)
29786+ au_set_ivdir(inode, allocated);
29787+ } else if (allocated)
29788+ au_vdir_free(allocated);
29789+
4f0767ce 29790+out:
1facf9fc 29791+ return err;
29792+}
29793+
29794+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
29795+{
29796+ int err, rerr;
29797+ unsigned long ul, n;
29798+ const unsigned int deblk_sz = src->vd_deblk_sz;
29799+
29800+ AuDebugOn(tgt->vd_nblk != 1);
29801+
29802+ err = -ENOMEM;
29803+ if (tgt->vd_nblk < src->vd_nblk) {
29804+ unsigned char **p;
29805+
dece6358
AM
29806+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
29807+ GFP_NOFS);
1facf9fc 29808+ if (unlikely(!p))
29809+ goto out;
29810+ tgt->vd_deblk = p;
29811+ }
29812+
1308ab2a 29813+ if (tgt->vd_deblk_sz != deblk_sz) {
29814+ unsigned char *p;
29815+
29816+ tgt->vd_deblk_sz = deblk_sz;
29817+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
29818+ if (unlikely(!p))
29819+ goto out;
29820+ tgt->vd_deblk[0] = p;
29821+ }
1facf9fc 29822+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 29823+ tgt->vd_version = src->vd_version;
29824+ tgt->vd_jiffy = src->vd_jiffy;
29825+
29826+ n = src->vd_nblk;
29827+ for (ul = 1; ul < n; ul++) {
dece6358
AM
29828+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
29829+ GFP_NOFS);
29830+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 29831+ goto out;
1308ab2a 29832+ tgt->vd_nblk++;
1facf9fc 29833+ }
1308ab2a 29834+ tgt->vd_nblk = n;
29835+ tgt->vd_last.ul = tgt->vd_last.ul;
29836+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
29837+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
29838+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 29839+ /* smp_mb(); */
29840+ return 0; /* success */
29841+
4f0767ce 29842+out:
1facf9fc 29843+ rerr = reinit_vdir(tgt);
29844+ BUG_ON(rerr);
29845+ return err;
29846+}
29847+
29848+int au_vdir_init(struct file *file)
29849+{
29850+ int err;
29851+ struct inode *inode;
29852+ struct au_vdir *vdir_cache, *allocated;
29853+
392086de 29854+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 29855+ err = read_vdir(file, !file->f_pos);
29856+ if (unlikely(err))
29857+ goto out;
29858+
29859+ allocated = NULL;
29860+ vdir_cache = au_fvdir_cache(file);
29861+ if (!vdir_cache) {
1308ab2a 29862+ vdir_cache = alloc_vdir(file);
1facf9fc 29863+ err = PTR_ERR(vdir_cache);
29864+ if (IS_ERR(vdir_cache))
29865+ goto out;
29866+ allocated = vdir_cache;
29867+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 29868+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 29869+ err = reinit_vdir(vdir_cache);
29870+ if (unlikely(err))
29871+ goto out;
29872+ } else
29873+ return 0; /* success */
29874+
c06a8ce3 29875+ inode = file_inode(file);
1facf9fc 29876+ err = copy_vdir(vdir_cache, au_ivdir(inode));
29877+ if (!err) {
29878+ file->f_version = inode->i_version;
29879+ if (allocated)
29880+ au_set_fvdir_cache(file, allocated);
29881+ } else if (allocated)
29882+ au_vdir_free(allocated);
29883+
4f0767ce 29884+out:
1facf9fc 29885+ return err;
29886+}
29887+
29888+static loff_t calc_offset(struct au_vdir *vdir)
29889+{
29890+ loff_t offset;
29891+ union au_vdir_deblk_p p;
29892+
29893+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
29894+ offset = vdir->vd_last.p.deblk - p.deblk;
29895+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
29896+ return offset;
29897+}
29898+
29899+/* returns true or false */
392086de 29900+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 29901+{
29902+ int valid;
29903+ unsigned int deblk_sz;
29904+ unsigned long ul, n;
29905+ loff_t offset;
29906+ union au_vdir_deblk_p p, deblk_end;
29907+ struct au_vdir *vdir_cache;
29908+
29909+ valid = 1;
29910+ vdir_cache = au_fvdir_cache(file);
29911+ offset = calc_offset(vdir_cache);
29912+ AuDbg("offset %lld\n", offset);
392086de 29913+ if (ctx->pos == offset)
1facf9fc 29914+ goto out;
29915+
29916+ vdir_cache->vd_last.ul = 0;
29917+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 29918+ if (!ctx->pos)
1facf9fc 29919+ goto out;
29920+
29921+ valid = 0;
29922+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 29923+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 29924+ AuDbg("ul %lu\n", ul);
29925+ if (ul >= vdir_cache->vd_nblk)
29926+ goto out;
29927+
29928+ n = vdir_cache->vd_nblk;
29929+ for (; ul < n; ul++) {
29930+ p.deblk = vdir_cache->vd_deblk[ul];
29931+ deblk_end.deblk = p.deblk + deblk_sz;
29932+ offset = ul;
29933+ offset *= deblk_sz;
392086de 29934+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 29935+ unsigned int l;
29936+
29937+ l = calc_size(p.de->de_str.len);
29938+ offset += l;
29939+ p.deblk += l;
29940+ }
29941+ if (!is_deblk_end(&p, &deblk_end)) {
29942+ valid = 1;
29943+ vdir_cache->vd_last.ul = ul;
29944+ vdir_cache->vd_last.p = p;
29945+ break;
29946+ }
29947+ }
29948+
4f0767ce 29949+out:
1facf9fc 29950+ /* smp_mb(); */
29951+ AuTraceErr(!valid);
29952+ return valid;
29953+}
29954+
392086de 29955+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 29956+{
1facf9fc 29957+ unsigned int l, deblk_sz;
29958+ union au_vdir_deblk_p deblk_end;
29959+ struct au_vdir *vdir_cache;
29960+ struct au_vdir_de *de;
29961+
29962+ vdir_cache = au_fvdir_cache(file);
392086de 29963+ if (!seek_vdir(file, ctx))
1facf9fc 29964+ return 0;
29965+
29966+ deblk_sz = vdir_cache->vd_deblk_sz;
29967+ while (1) {
29968+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
29969+ deblk_end.deblk += deblk_sz;
29970+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
29971+ de = vdir_cache->vd_last.p.de;
29972+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 29973+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 29974+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
29975+ if (unlikely(!dir_emit(ctx, de->de_str.name,
29976+ de->de_str.len, de->de_ino,
29977+ de->de_type))) {
1facf9fc 29978+ /* todo: ignore the error caused by udba? */
29979+ /* return err; */
29980+ return 0;
29981+ }
29982+
29983+ l = calc_size(de->de_str.len);
29984+ vdir_cache->vd_last.p.deblk += l;
392086de 29985+ ctx->pos += l;
1facf9fc 29986+ }
29987+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
29988+ vdir_cache->vd_last.ul++;
29989+ vdir_cache->vd_last.p.deblk
29990+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 29991+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 29992+ continue;
29993+ }
29994+ break;
29995+ }
29996+
29997+ /* smp_mb(); */
29998+ return 0;
29999+}
7f207e10
AM
30000diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30001--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 30002+++ linux/fs/aufs/vfsub.c 2015-09-24 10:47:58.258053165 +0200
5527c038 30003@@ -0,0 +1,848 @@
1facf9fc 30004+/*
2000de60 30005+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 30006+ *
30007+ * This program, aufs is free software; you can redistribute it and/or modify
30008+ * it under the terms of the GNU General Public License as published by
30009+ * the Free Software Foundation; either version 2 of the License, or
30010+ * (at your option) any later version.
dece6358
AM
30011+ *
30012+ * This program is distributed in the hope that it will be useful,
30013+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30014+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30015+ * GNU General Public License for more details.
30016+ *
30017+ * You should have received a copy of the GNU General Public License
523b37e3 30018+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30019+ */
30020+
30021+/*
30022+ * sub-routines for VFS
30023+ */
30024+
dece6358
AM
30025+#include <linux/namei.h>
30026+#include <linux/security.h>
30027+#include <linux/splice.h>
1facf9fc 30028+#include "aufs.h"
30029+
30030+int vfsub_update_h_iattr(struct path *h_path, int *did)
30031+{
30032+ int err;
30033+ struct kstat st;
30034+ struct super_block *h_sb;
30035+
30036+ /* for remote fs, leave work for its getattr or d_revalidate */
30037+ /* for bad i_attr fs, handle them in aufs_getattr() */
30038+ /* still some fs may acquire i_mutex. we need to skip them */
30039+ err = 0;
30040+ if (!did)
30041+ did = &err;
30042+ h_sb = h_path->dentry->d_sb;
30043+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30044+ if (*did)
c06a8ce3 30045+ err = vfs_getattr(h_path, &st);
1facf9fc 30046+
30047+ return err;
30048+}
30049+
30050+/* ---------------------------------------------------------------------- */
30051+
4a4d8108 30052+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 30053+{
30054+ struct file *file;
30055+
b4510431 30056+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 30057+ current_cred());
2cbb1c4b
JR
30058+ if (!IS_ERR_OR_NULL(file)
30059+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
5527c038 30060+ i_readcount_inc(d_inode(path->dentry));
4a4d8108 30061+
1308ab2a 30062+ return file;
30063+}
30064+
1facf9fc 30065+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30066+{
30067+ struct file *file;
30068+
2cbb1c4b 30069+ lockdep_off();
7f207e10 30070+ file = filp_open(path,
2cbb1c4b 30071+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 30072+ mode);
2cbb1c4b 30073+ lockdep_on();
1facf9fc 30074+ if (IS_ERR(file))
30075+ goto out;
30076+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30077+
4f0767ce 30078+out:
1facf9fc 30079+ return file;
30080+}
30081+
b912730e
AM
30082+/*
30083+ * Ideally this function should call VFS:do_last() in order to keep all its
30084+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30085+ * structure such as nameidata. This is a second (or third) best approach.
30086+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30087+ */
30088+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30089+ struct vfsub_aopen_args *args, struct au_branch *br)
30090+{
30091+ int err;
30092+ struct file *file = args->file;
30093+ /* copied from linux/fs/namei.c:atomic_open() */
30094+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30095+
30096+ IMustLock(dir);
30097+ AuDebugOn(!dir->i_op->atomic_open);
30098+
30099+ err = au_br_test_oflag(args->open_flag, br);
30100+ if (unlikely(err))
30101+ goto out;
30102+
30103+ args->file->f_path.dentry = DENTRY_NOT_SET;
30104+ args->file->f_path.mnt = au_br_mnt(br);
30105+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30106+ args->create_mode, args->opened);
30107+ if (err >= 0) {
30108+ /* some filesystems don't set FILE_CREATED while succeeded? */
30109+ if (*args->opened & FILE_CREATED)
30110+ fsnotify_create(dir, dentry);
30111+ } else
30112+ goto out;
30113+
30114+
30115+ if (!err) {
30116+ /* todo: call VFS:may_open() here */
30117+ err = open_check_o_direct(file);
30118+ /* todo: ima_file_check() too? */
30119+ if (!err && (args->open_flag & __FMODE_EXEC))
30120+ err = deny_write_access(file);
30121+ if (unlikely(err))
30122+ /* note that the file is created and still opened */
30123+ goto out;
30124+ }
30125+
30126+ atomic_inc(&br->br_count);
30127+ fsnotify_open(file);
30128+
30129+out:
30130+ return err;
30131+}
30132+
1facf9fc 30133+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30134+{
30135+ int err;
30136+
1facf9fc 30137+ err = kern_path(name, flags, path);
5527c038 30138+ if (!err && d_is_positive(path->dentry))
1facf9fc 30139+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30140+ return err;
30141+}
30142+
30143+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30144+ int len)
30145+{
30146+ struct path path = {
30147+ .mnt = NULL
30148+ };
30149+
1308ab2a 30150+ /* VFS checks it too, but by WARN_ON_ONCE() */
5527c038 30151+ IMustLock(d_inode(parent));
1facf9fc 30152+
30153+ path.dentry = lookup_one_len(name, parent, len);
30154+ if (IS_ERR(path.dentry))
30155+ goto out;
5527c038 30156+ if (d_is_positive(path.dentry))
1facf9fc 30157+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30158+
4f0767ce 30159+out:
4a4d8108 30160+ AuTraceErrPtr(path.dentry);
1facf9fc 30161+ return path.dentry;
30162+}
30163+
b4510431 30164+void vfsub_call_lkup_one(void *args)
2cbb1c4b 30165+{
b4510431
AM
30166+ struct vfsub_lkup_one_args *a = args;
30167+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
30168+}
30169+
1facf9fc 30170+/* ---------------------------------------------------------------------- */
30171+
30172+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30173+ struct dentry *d2, struct au_hinode *hdir2)
30174+{
30175+ struct dentry *d;
30176+
2cbb1c4b 30177+ lockdep_off();
1facf9fc 30178+ d = lock_rename(d1, d2);
2cbb1c4b 30179+ lockdep_on();
4a4d8108 30180+ au_hn_suspend(hdir1);
1facf9fc 30181+ if (hdir1 != hdir2)
4a4d8108 30182+ au_hn_suspend(hdir2);
1facf9fc 30183+
30184+ return d;
30185+}
30186+
30187+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30188+ struct dentry *d2, struct au_hinode *hdir2)
30189+{
4a4d8108 30190+ au_hn_resume(hdir1);
1facf9fc 30191+ if (hdir1 != hdir2)
4a4d8108 30192+ au_hn_resume(hdir2);
2cbb1c4b 30193+ lockdep_off();
1facf9fc 30194+ unlock_rename(d1, d2);
2cbb1c4b 30195+ lockdep_on();
1facf9fc 30196+}
30197+
30198+/* ---------------------------------------------------------------------- */
30199+
b4510431 30200+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 30201+{
30202+ int err;
30203+ struct dentry *d;
30204+
30205+ IMustLock(dir);
30206+
30207+ d = path->dentry;
30208+ path->dentry = d->d_parent;
b752ccd1 30209+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 30210+ path->dentry = d;
30211+ if (unlikely(err))
30212+ goto out;
30213+
c1595e42 30214+ lockdep_off();
b4510431 30215+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 30216+ lockdep_on();
1facf9fc 30217+ if (!err) {
30218+ struct path tmp = *path;
30219+ int did;
30220+
30221+ vfsub_update_h_iattr(&tmp, &did);
30222+ if (did) {
30223+ tmp.dentry = path->dentry->d_parent;
30224+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30225+ }
30226+ /*ignore*/
30227+ }
30228+
4f0767ce 30229+out:
1facf9fc 30230+ return err;
30231+}
30232+
30233+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30234+{
30235+ int err;
30236+ struct dentry *d;
30237+
30238+ IMustLock(dir);
30239+
30240+ d = path->dentry;
30241+ path->dentry = d->d_parent;
b752ccd1 30242+ err = security_path_symlink(path, d, symname);
1facf9fc 30243+ path->dentry = d;
30244+ if (unlikely(err))
30245+ goto out;
30246+
c1595e42 30247+ lockdep_off();
1facf9fc 30248+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 30249+ lockdep_on();
1facf9fc 30250+ if (!err) {
30251+ struct path tmp = *path;
30252+ int did;
30253+
30254+ vfsub_update_h_iattr(&tmp, &did);
30255+ if (did) {
30256+ tmp.dentry = path->dentry->d_parent;
30257+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30258+ }
30259+ /*ignore*/
30260+ }
30261+
4f0767ce 30262+out:
1facf9fc 30263+ return err;
30264+}
30265+
30266+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30267+{
30268+ int err;
30269+ struct dentry *d;
30270+
30271+ IMustLock(dir);
30272+
30273+ d = path->dentry;
30274+ path->dentry = d->d_parent;
027c5e7a 30275+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 30276+ path->dentry = d;
30277+ if (unlikely(err))
30278+ goto out;
30279+
c1595e42 30280+ lockdep_off();
1facf9fc 30281+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 30282+ lockdep_on();
1facf9fc 30283+ if (!err) {
30284+ struct path tmp = *path;
30285+ int did;
30286+
30287+ vfsub_update_h_iattr(&tmp, &did);
30288+ if (did) {
30289+ tmp.dentry = path->dentry->d_parent;
30290+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30291+ }
30292+ /*ignore*/
30293+ }
30294+
4f0767ce 30295+out:
1facf9fc 30296+ return err;
30297+}
30298+
30299+static int au_test_nlink(struct inode *inode)
30300+{
30301+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30302+
30303+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30304+ || inode->i_nlink < link_max)
30305+ return 0;
30306+ return -EMLINK;
30307+}
30308+
523b37e3
AM
30309+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30310+ struct inode **delegated_inode)
1facf9fc 30311+{
30312+ int err;
30313+ struct dentry *d;
30314+
30315+ IMustLock(dir);
30316+
5527c038 30317+ err = au_test_nlink(d_inode(src_dentry));
1facf9fc 30318+ if (unlikely(err))
30319+ return err;
30320+
b4510431 30321+ /* we don't call may_linkat() */
1facf9fc 30322+ d = path->dentry;
30323+ path->dentry = d->d_parent;
b752ccd1 30324+ err = security_path_link(src_dentry, path, d);
1facf9fc 30325+ path->dentry = d;
30326+ if (unlikely(err))
30327+ goto out;
30328+
2cbb1c4b 30329+ lockdep_off();
523b37e3 30330+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 30331+ lockdep_on();
1facf9fc 30332+ if (!err) {
30333+ struct path tmp = *path;
30334+ int did;
30335+
30336+ /* fuse has different memory inode for the same inumber */
30337+ vfsub_update_h_iattr(&tmp, &did);
30338+ if (did) {
30339+ tmp.dentry = path->dentry->d_parent;
30340+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30341+ tmp.dentry = src_dentry;
30342+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30343+ }
30344+ /*ignore*/
30345+ }
30346+
4f0767ce 30347+out:
1facf9fc 30348+ return err;
30349+}
30350+
30351+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3
AM
30352+ struct inode *dir, struct path *path,
30353+ struct inode **delegated_inode)
1facf9fc 30354+{
30355+ int err;
30356+ struct path tmp = {
30357+ .mnt = path->mnt
30358+ };
30359+ struct dentry *d;
30360+
30361+ IMustLock(dir);
30362+ IMustLock(src_dir);
30363+
30364+ d = path->dentry;
30365+ path->dentry = d->d_parent;
30366+ tmp.dentry = src_dentry->d_parent;
38d290e6 30367+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 30368+ path->dentry = d;
30369+ if (unlikely(err))
30370+ goto out;
30371+
2cbb1c4b 30372+ lockdep_off();
523b37e3 30373+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
38d290e6 30374+ delegated_inode, /*flags*/0);
2cbb1c4b 30375+ lockdep_on();
1facf9fc 30376+ if (!err) {
30377+ int did;
30378+
30379+ tmp.dentry = d->d_parent;
30380+ vfsub_update_h_iattr(&tmp, &did);
30381+ if (did) {
30382+ tmp.dentry = src_dentry;
30383+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30384+ tmp.dentry = src_dentry->d_parent;
30385+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30386+ }
30387+ /*ignore*/
30388+ }
30389+
4f0767ce 30390+out:
1facf9fc 30391+ return err;
30392+}
30393+
30394+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
30395+{
30396+ int err;
30397+ struct dentry *d;
30398+
30399+ IMustLock(dir);
30400+
30401+ d = path->dentry;
30402+ path->dentry = d->d_parent;
b752ccd1 30403+ err = security_path_mkdir(path, d, mode);
1facf9fc 30404+ path->dentry = d;
30405+ if (unlikely(err))
30406+ goto out;
30407+
c1595e42 30408+ lockdep_off();
1facf9fc 30409+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 30410+ lockdep_on();
1facf9fc 30411+ if (!err) {
30412+ struct path tmp = *path;
30413+ int did;
30414+
30415+ vfsub_update_h_iattr(&tmp, &did);
30416+ if (did) {
30417+ tmp.dentry = path->dentry->d_parent;
30418+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30419+ }
30420+ /*ignore*/
30421+ }
30422+
4f0767ce 30423+out:
1facf9fc 30424+ return err;
30425+}
30426+
30427+int vfsub_rmdir(struct inode *dir, struct path *path)
30428+{
30429+ int err;
30430+ struct dentry *d;
30431+
30432+ IMustLock(dir);
30433+
30434+ d = path->dentry;
30435+ path->dentry = d->d_parent;
b752ccd1 30436+ err = security_path_rmdir(path, d);
1facf9fc 30437+ path->dentry = d;
30438+ if (unlikely(err))
30439+ goto out;
30440+
2cbb1c4b 30441+ lockdep_off();
1facf9fc 30442+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 30443+ lockdep_on();
1facf9fc 30444+ if (!err) {
30445+ struct path tmp = {
30446+ .dentry = path->dentry->d_parent,
30447+ .mnt = path->mnt
30448+ };
30449+
30450+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30451+ }
30452+
4f0767ce 30453+out:
1facf9fc 30454+ return err;
30455+}
30456+
30457+/* ---------------------------------------------------------------------- */
30458+
9dbd164d 30459+/* todo: support mmap_sem? */
1facf9fc 30460+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
30461+ loff_t *ppos)
30462+{
30463+ ssize_t err;
30464+
2cbb1c4b 30465+ lockdep_off();
1facf9fc 30466+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 30467+ lockdep_on();
1facf9fc 30468+ if (err >= 0)
30469+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30470+ return err;
30471+}
30472+
30473+/* todo: kernel_read()? */
30474+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
30475+ loff_t *ppos)
30476+{
30477+ ssize_t err;
30478+ mm_segment_t oldfs;
b752ccd1
AM
30479+ union {
30480+ void *k;
30481+ char __user *u;
30482+ } buf;
1facf9fc 30483+
b752ccd1 30484+ buf.k = kbuf;
1facf9fc 30485+ oldfs = get_fs();
30486+ set_fs(KERNEL_DS);
b752ccd1 30487+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 30488+ set_fs(oldfs);
30489+ return err;
30490+}
30491+
30492+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
30493+ loff_t *ppos)
30494+{
30495+ ssize_t err;
30496+
2cbb1c4b 30497+ lockdep_off();
1facf9fc 30498+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 30499+ lockdep_on();
1facf9fc 30500+ if (err >= 0)
30501+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30502+ return err;
30503+}
30504+
30505+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
30506+{
30507+ ssize_t err;
30508+ mm_segment_t oldfs;
b752ccd1
AM
30509+ union {
30510+ void *k;
30511+ const char __user *u;
30512+ } buf;
1facf9fc 30513+
b752ccd1 30514+ buf.k = kbuf;
1facf9fc 30515+ oldfs = get_fs();
30516+ set_fs(KERNEL_DS);
b752ccd1 30517+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 30518+ set_fs(oldfs);
30519+ return err;
30520+}
30521+
4a4d8108
AM
30522+int vfsub_flush(struct file *file, fl_owner_t id)
30523+{
30524+ int err;
30525+
30526+ err = 0;
523b37e3 30527+ if (file->f_op->flush) {
2000de60 30528+ if (!au_test_nfs(file->f_path.dentry->d_sb))
2cbb1c4b
JR
30529+ err = file->f_op->flush(file, id);
30530+ else {
30531+ lockdep_off();
30532+ err = file->f_op->flush(file, id);
30533+ lockdep_on();
30534+ }
4a4d8108
AM
30535+ if (!err)
30536+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
30537+ /*ignore*/
30538+ }
30539+ return err;
30540+}
30541+
392086de 30542+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 30543+{
30544+ int err;
30545+
523b37e3 30546+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 30547+
2cbb1c4b 30548+ lockdep_off();
392086de 30549+ err = iterate_dir(file, ctx);
2cbb1c4b 30550+ lockdep_on();
1facf9fc 30551+ if (err >= 0)
30552+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30553+ return err;
30554+}
30555+
30556+long vfsub_splice_to(struct file *in, loff_t *ppos,
30557+ struct pipe_inode_info *pipe, size_t len,
30558+ unsigned int flags)
30559+{
30560+ long err;
30561+
2cbb1c4b 30562+ lockdep_off();
0fc653ad 30563+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 30564+ lockdep_on();
4a4d8108 30565+ file_accessed(in);
1facf9fc 30566+ if (err >= 0)
30567+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
30568+ return err;
30569+}
30570+
30571+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
30572+ loff_t *ppos, size_t len, unsigned int flags)
30573+{
30574+ long err;
30575+
2cbb1c4b 30576+ lockdep_off();
0fc653ad 30577+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 30578+ lockdep_on();
1facf9fc 30579+ if (err >= 0)
30580+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
30581+ return err;
30582+}
30583+
53392da6
AM
30584+int vfsub_fsync(struct file *file, struct path *path, int datasync)
30585+{
30586+ int err;
30587+
30588+ /* file can be NULL */
30589+ lockdep_off();
30590+ err = vfs_fsync(file, datasync);
30591+ lockdep_on();
30592+ if (!err) {
30593+ if (!path) {
30594+ AuDebugOn(!file);
30595+ path = &file->f_path;
30596+ }
30597+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30598+ }
30599+ return err;
30600+}
30601+
1facf9fc 30602+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
30603+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
30604+ struct file *h_file)
30605+{
30606+ int err;
30607+ struct inode *h_inode;
c06a8ce3 30608+ struct super_block *h_sb;
1facf9fc 30609+
1facf9fc 30610+ if (!h_file) {
c06a8ce3
AM
30611+ err = vfsub_truncate(h_path, length);
30612+ goto out;
1facf9fc 30613+ }
30614+
5527c038 30615+ h_inode = d_inode(h_path->dentry);
c06a8ce3
AM
30616+ h_sb = h_inode->i_sb;
30617+ lockdep_off();
30618+ sb_start_write(h_sb);
30619+ lockdep_on();
1facf9fc 30620+ err = locks_verify_truncate(h_inode, h_file, length);
30621+ if (!err)
953406b4 30622+ err = security_path_truncate(h_path);
2cbb1c4b
JR
30623+ if (!err) {
30624+ lockdep_off();
1facf9fc 30625+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
30626+ lockdep_on();
30627+ }
c06a8ce3
AM
30628+ lockdep_off();
30629+ sb_end_write(h_sb);
30630+ lockdep_on();
1facf9fc 30631+
4f0767ce 30632+out:
1facf9fc 30633+ return err;
30634+}
30635+
30636+/* ---------------------------------------------------------------------- */
30637+
30638+struct au_vfsub_mkdir_args {
30639+ int *errp;
30640+ struct inode *dir;
30641+ struct path *path;
30642+ int mode;
30643+};
30644+
30645+static void au_call_vfsub_mkdir(void *args)
30646+{
30647+ struct au_vfsub_mkdir_args *a = args;
30648+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
30649+}
30650+
30651+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
30652+{
30653+ int err, do_sio, wkq_err;
30654+
30655+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30656+ if (!do_sio) {
30657+ lockdep_off();
1facf9fc 30658+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
30659+ lockdep_on();
30660+ } else {
1facf9fc 30661+ struct au_vfsub_mkdir_args args = {
30662+ .errp = &err,
30663+ .dir = dir,
30664+ .path = path,
30665+ .mode = mode
30666+ };
30667+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
30668+ if (unlikely(wkq_err))
30669+ err = wkq_err;
30670+ }
30671+
30672+ return err;
30673+}
30674+
30675+struct au_vfsub_rmdir_args {
30676+ int *errp;
30677+ struct inode *dir;
30678+ struct path *path;
30679+};
30680+
30681+static void au_call_vfsub_rmdir(void *args)
30682+{
30683+ struct au_vfsub_rmdir_args *a = args;
30684+ *a->errp = vfsub_rmdir(a->dir, a->path);
30685+}
30686+
30687+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
30688+{
30689+ int err, do_sio, wkq_err;
30690+
30691+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30692+ if (!do_sio) {
30693+ lockdep_off();
1facf9fc 30694+ err = vfsub_rmdir(dir, path);
c1595e42
JR
30695+ lockdep_on();
30696+ } else {
1facf9fc 30697+ struct au_vfsub_rmdir_args args = {
30698+ .errp = &err,
30699+ .dir = dir,
30700+ .path = path
30701+ };
30702+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
30703+ if (unlikely(wkq_err))
30704+ err = wkq_err;
30705+ }
30706+
30707+ return err;
30708+}
30709+
30710+/* ---------------------------------------------------------------------- */
30711+
30712+struct notify_change_args {
30713+ int *errp;
30714+ struct path *path;
30715+ struct iattr *ia;
523b37e3 30716+ struct inode **delegated_inode;
1facf9fc 30717+};
30718+
30719+static void call_notify_change(void *args)
30720+{
30721+ struct notify_change_args *a = args;
30722+ struct inode *h_inode;
30723+
5527c038 30724+ h_inode = d_inode(a->path->dentry);
1facf9fc 30725+ IMustLock(h_inode);
30726+
30727+ *a->errp = -EPERM;
30728+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 30729+ lockdep_off();
523b37e3
AM
30730+ *a->errp = notify_change(a->path->dentry, a->ia,
30731+ a->delegated_inode);
c1595e42 30732+ lockdep_on();
1facf9fc 30733+ if (!*a->errp)
30734+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
30735+ }
30736+ AuTraceErr(*a->errp);
30737+}
30738+
523b37e3
AM
30739+int vfsub_notify_change(struct path *path, struct iattr *ia,
30740+ struct inode **delegated_inode)
1facf9fc 30741+{
30742+ int err;
30743+ struct notify_change_args args = {
523b37e3
AM
30744+ .errp = &err,
30745+ .path = path,
30746+ .ia = ia,
30747+ .delegated_inode = delegated_inode
1facf9fc 30748+ };
30749+
30750+ call_notify_change(&args);
30751+
30752+ return err;
30753+}
30754+
523b37e3
AM
30755+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
30756+ struct inode **delegated_inode)
1facf9fc 30757+{
30758+ int err, wkq_err;
30759+ struct notify_change_args args = {
523b37e3
AM
30760+ .errp = &err,
30761+ .path = path,
30762+ .ia = ia,
30763+ .delegated_inode = delegated_inode
1facf9fc 30764+ };
30765+
30766+ wkq_err = au_wkq_wait(call_notify_change, &args);
30767+ if (unlikely(wkq_err))
30768+ err = wkq_err;
30769+
30770+ return err;
30771+}
30772+
30773+/* ---------------------------------------------------------------------- */
30774+
30775+struct unlink_args {
30776+ int *errp;
30777+ struct inode *dir;
30778+ struct path *path;
523b37e3 30779+ struct inode **delegated_inode;
1facf9fc 30780+};
30781+
30782+static void call_unlink(void *args)
30783+{
30784+ struct unlink_args *a = args;
30785+ struct dentry *d = a->path->dentry;
30786+ struct inode *h_inode;
30787+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 30788+ && au_dcount(d) == 1);
1facf9fc 30789+
30790+ IMustLock(a->dir);
30791+
30792+ a->path->dentry = d->d_parent;
30793+ *a->errp = security_path_unlink(a->path, d);
30794+ a->path->dentry = d;
30795+ if (unlikely(*a->errp))
30796+ return;
30797+
30798+ if (!stop_sillyrename)
30799+ dget(d);
5527c038
JR
30800+ h_inode = NULL;
30801+ if (d_is_positive(d)) {
30802+ h_inode = d_inode(d);
027c5e7a 30803+ ihold(h_inode);
5527c038 30804+ }
1facf9fc 30805+
2cbb1c4b 30806+ lockdep_off();
523b37e3 30807+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 30808+ lockdep_on();
1facf9fc 30809+ if (!*a->errp) {
30810+ struct path tmp = {
30811+ .dentry = d->d_parent,
30812+ .mnt = a->path->mnt
30813+ };
30814+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30815+ }
30816+
30817+ if (!stop_sillyrename)
30818+ dput(d);
30819+ if (h_inode)
30820+ iput(h_inode);
30821+
30822+ AuTraceErr(*a->errp);
30823+}
30824+
30825+/*
30826+ * @dir: must be locked.
30827+ * @dentry: target dentry.
30828+ */
523b37e3
AM
30829+int vfsub_unlink(struct inode *dir, struct path *path,
30830+ struct inode **delegated_inode, int force)
1facf9fc 30831+{
30832+ int err;
30833+ struct unlink_args args = {
523b37e3
AM
30834+ .errp = &err,
30835+ .dir = dir,
30836+ .path = path,
30837+ .delegated_inode = delegated_inode
1facf9fc 30838+ };
30839+
30840+ if (!force)
30841+ call_unlink(&args);
30842+ else {
30843+ int wkq_err;
30844+
30845+ wkq_err = au_wkq_wait(call_unlink, &args);
30846+ if (unlikely(wkq_err))
30847+ err = wkq_err;
30848+ }
30849+
30850+ return err;
30851+}
7f207e10
AM
30852diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
30853--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 30854+++ linux/fs/aufs/vfsub.h 2015-12-10 18:46:31.223310574 +0100
b912730e 30855@@ -0,0 +1,286 @@
1facf9fc 30856+/*
2000de60 30857+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 30858+ *
30859+ * This program, aufs is free software; you can redistribute it and/or modify
30860+ * it under the terms of the GNU General Public License as published by
30861+ * the Free Software Foundation; either version 2 of the License, or
30862+ * (at your option) any later version.
dece6358
AM
30863+ *
30864+ * This program is distributed in the hope that it will be useful,
30865+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30866+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30867+ * GNU General Public License for more details.
30868+ *
30869+ * You should have received a copy of the GNU General Public License
523b37e3 30870+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30871+ */
30872+
30873+/*
30874+ * sub-routines for VFS
30875+ */
30876+
30877+#ifndef __AUFS_VFSUB_H__
30878+#define __AUFS_VFSUB_H__
30879+
30880+#ifdef __KERNEL__
30881+
30882+#include <linux/fs.h>
b4510431 30883+#include <linux/mount.h>
c1595e42 30884+#include <linux/xattr.h>
7f207e10 30885+#include "debug.h"
1facf9fc 30886+
7f207e10 30887+/* copied from linux/fs/internal.h */
2cbb1c4b 30888+/* todo: BAD approach!! */
c06a8ce3 30889+extern void __mnt_drop_write(struct vfsmount *);
2cbb1c4b 30890+extern spinlock_t inode_sb_list_lock;
b912730e 30891+extern int open_check_o_direct(struct file *f);
7f207e10
AM
30892+
30893+/* ---------------------------------------------------------------------- */
1facf9fc 30894+
30895+/* lock subclass for lower inode */
30896+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
30897+/* reduce? gave up. */
30898+enum {
c1595e42 30899+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 30900+ AuLsc_I_PARENT, /* lower inode, parent first */
30901+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 30902+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 30903+ AuLsc_I_CHILD,
30904+ AuLsc_I_CHILD2,
30905+ AuLsc_I_End
30906+};
30907+
30908+/* to debug easier, do not make them inlined functions */
30909+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
30910+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
30911+
30912+/* ---------------------------------------------------------------------- */
30913+
7f207e10
AM
30914+static inline void vfsub_drop_nlink(struct inode *inode)
30915+{
30916+ AuDebugOn(!inode->i_nlink);
30917+ drop_nlink(inode);
30918+}
30919+
027c5e7a
AM
30920+static inline void vfsub_dead_dir(struct inode *inode)
30921+{
30922+ AuDebugOn(!S_ISDIR(inode->i_mode));
30923+ inode->i_flags |= S_DEAD;
30924+ clear_nlink(inode);
30925+}
30926+
392086de
AM
30927+static inline int vfsub_native_ro(struct inode *inode)
30928+{
30929+ return (inode->i_sb->s_flags & MS_RDONLY)
30930+ || IS_RDONLY(inode)
30931+ /* || IS_APPEND(inode) */
30932+ || IS_IMMUTABLE(inode);
30933+}
30934+
7f207e10
AM
30935+/* ---------------------------------------------------------------------- */
30936+
30937+int vfsub_update_h_iattr(struct path *h_path, int *did);
30938+struct file *vfsub_dentry_open(struct path *path, int flags);
30939+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
b912730e
AM
30940+struct vfsub_aopen_args {
30941+ struct file *file;
30942+ unsigned int open_flag;
30943+ umode_t create_mode;
30944+ int *opened;
30945+};
30946+struct au_branch;
30947+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30948+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 30949+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 30950+
1facf9fc 30951+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30952+ int len);
b4510431
AM
30953+
30954+struct vfsub_lkup_one_args {
30955+ struct dentry **errp;
30956+ struct qstr *name;
30957+ struct dentry *parent;
30958+};
30959+
30960+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
30961+ struct dentry *parent)
30962+{
30963+ return vfsub_lookup_one_len(name->name, parent, name->len);
30964+}
30965+
30966+void vfsub_call_lkup_one(void *args);
30967+
30968+/* ---------------------------------------------------------------------- */
30969+
30970+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
30971+{
30972+ int err;
076b876e 30973+
b4510431
AM
30974+ lockdep_off();
30975+ err = mnt_want_write(mnt);
30976+ lockdep_on();
30977+ return err;
30978+}
30979+
30980+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
30981+{
30982+ lockdep_off();
30983+ mnt_drop_write(mnt);
30984+ lockdep_on();
30985+}
1facf9fc 30986+
7e9cd9fe 30987+#if 0 /* reserved */
c06a8ce3
AM
30988+static inline void vfsub_mnt_drop_write_file(struct file *file)
30989+{
30990+ lockdep_off();
30991+ mnt_drop_write_file(file);
30992+ lockdep_on();
30993+}
7e9cd9fe 30994+#endif
c06a8ce3 30995+
1facf9fc 30996+/* ---------------------------------------------------------------------- */
30997+
30998+struct au_hinode;
30999+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31000+ struct dentry *d2, struct au_hinode *hdir2);
31001+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31002+ struct dentry *d2, struct au_hinode *hdir2);
31003+
537831f9
AM
31004+int vfsub_create(struct inode *dir, struct path *path, int mode,
31005+ bool want_excl);
1facf9fc 31006+int vfsub_symlink(struct inode *dir, struct path *path,
31007+ const char *symname);
31008+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31009+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 31010+ struct path *path, struct inode **delegated_inode);
1facf9fc 31011+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3
AM
31012+ struct inode *hdir, struct path *path,
31013+ struct inode **delegated_inode);
1facf9fc 31014+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31015+int vfsub_rmdir(struct inode *dir, struct path *path);
31016+
31017+/* ---------------------------------------------------------------------- */
31018+
31019+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31020+ loff_t *ppos);
31021+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31022+ loff_t *ppos);
31023+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31024+ loff_t *ppos);
31025+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31026+ loff_t *ppos);
4a4d8108 31027+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
31028+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31029+
c06a8ce3
AM
31030+static inline loff_t vfsub_f_size_read(struct file *file)
31031+{
31032+ return i_size_read(file_inode(file));
31033+}
31034+
4a4d8108
AM
31035+static inline unsigned int vfsub_file_flags(struct file *file)
31036+{
31037+ unsigned int flags;
31038+
31039+ spin_lock(&file->f_lock);
31040+ flags = file->f_flags;
31041+ spin_unlock(&file->f_lock);
31042+
31043+ return flags;
31044+}
1308ab2a 31045+
7e9cd9fe 31046+#if 0 /* reserved */
1facf9fc 31047+static inline void vfsub_file_accessed(struct file *h_file)
31048+{
31049+ file_accessed(h_file);
31050+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31051+}
7e9cd9fe 31052+#endif
1facf9fc 31053+
31054+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31055+ struct dentry *h_dentry)
31056+{
31057+ struct path h_path = {
31058+ .dentry = h_dentry,
31059+ .mnt = h_mnt
31060+ };
92d182d2 31061+ touch_atime(&h_path);
1facf9fc 31062+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31063+}
31064+
0c3ec466
AM
31065+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31066+ int flags)
31067+{
7e9cd9fe 31068+ return generic_update_time(h_inode, ts, flags);
0c3ec466
AM
31069+ /* no vfsub_update_h_iattr() since we don't have struct path */
31070+}
31071+
4a4d8108
AM
31072+long vfsub_splice_to(struct file *in, loff_t *ppos,
31073+ struct pipe_inode_info *pipe, size_t len,
31074+ unsigned int flags);
31075+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31076+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
31077+
31078+static inline long vfsub_truncate(struct path *path, loff_t length)
31079+{
31080+ long err;
076b876e 31081+
c06a8ce3
AM
31082+ lockdep_off();
31083+ err = vfs_truncate(path, length);
31084+ lockdep_on();
31085+ return err;
31086+}
31087+
4a4d8108
AM
31088+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31089+ struct file *h_file);
53392da6 31090+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 31091+
1facf9fc 31092+/* ---------------------------------------------------------------------- */
31093+
31094+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31095+{
31096+ loff_t err;
31097+
2cbb1c4b 31098+ lockdep_off();
1facf9fc 31099+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 31100+ lockdep_on();
1facf9fc 31101+ return err;
31102+}
31103+
31104+/* ---------------------------------------------------------------------- */
31105+
4a4d8108
AM
31106+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31107+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
31108+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31109+ struct inode **delegated_inode);
31110+int vfsub_notify_change(struct path *path, struct iattr *ia,
31111+ struct inode **delegated_inode);
31112+int vfsub_unlink(struct inode *dir, struct path *path,
31113+ struct inode **delegated_inode, int force);
4a4d8108 31114+
c1595e42
JR
31115+/* ---------------------------------------------------------------------- */
31116+
31117+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31118+ const void *value, size_t size, int flags)
31119+{
31120+ int err;
31121+
31122+ lockdep_off();
31123+ err = vfs_setxattr(dentry, name, value, size, flags);
31124+ lockdep_on();
31125+
31126+ return err;
31127+}
31128+
31129+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31130+{
31131+ int err;
31132+
31133+ lockdep_off();
31134+ err = vfs_removexattr(dentry, name);
31135+ lockdep_on();
31136+
31137+ return err;
31138+}
31139+
1facf9fc 31140+#endif /* __KERNEL__ */
31141+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
31142diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31143--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 31144+++ linux/fs/aufs/wbr_policy.c 2015-09-24 10:47:58.258053165 +0200
076b876e 31145@@ -0,0 +1,765 @@
1facf9fc 31146+/*
2000de60 31147+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 31148+ *
31149+ * This program, aufs is free software; you can redistribute it and/or modify
31150+ * it under the terms of the GNU General Public License as published by
31151+ * the Free Software Foundation; either version 2 of the License, or
31152+ * (at your option) any later version.
dece6358
AM
31153+ *
31154+ * This program is distributed in the hope that it will be useful,
31155+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31156+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31157+ * GNU General Public License for more details.
31158+ *
31159+ * You should have received a copy of the GNU General Public License
523b37e3 31160+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31161+ */
31162+
31163+/*
31164+ * policies for selecting one among multiple writable branches
31165+ */
31166+
31167+#include <linux/statfs.h>
31168+#include "aufs.h"
31169+
31170+/* subset of cpup_attr() */
31171+static noinline_for_stack
31172+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31173+{
31174+ int err, sbits;
31175+ struct iattr ia;
31176+ struct inode *h_isrc;
31177+
5527c038 31178+ h_isrc = d_inode(h_src);
1facf9fc 31179+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31180+ ia.ia_mode = h_isrc->i_mode;
31181+ ia.ia_uid = h_isrc->i_uid;
31182+ ia.ia_gid = h_isrc->i_gid;
31183+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
5527c038 31184+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
523b37e3
AM
31185+ /* no delegation since it is just created */
31186+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31187+
31188+ /* is this nfs only? */
31189+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31190+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31191+ ia.ia_mode = h_isrc->i_mode;
523b37e3 31192+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31193+ }
31194+
31195+ return err;
31196+}
31197+
31198+#define AuCpdown_PARENT_OPQ 1
31199+#define AuCpdown_WHED (1 << 1)
31200+#define AuCpdown_MADE_DIR (1 << 2)
31201+#define AuCpdown_DIROPQ (1 << 3)
31202+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
31203+#define au_fset_cpdown(flags, name) \
31204+ do { (flags) |= AuCpdown_##name; } while (0)
31205+#define au_fclr_cpdown(flags, name) \
31206+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 31207+
1facf9fc 31208+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 31209+ unsigned int *flags)
1facf9fc 31210+{
31211+ int err;
31212+ struct dentry *opq_dentry;
31213+
31214+ opq_dentry = au_diropq_create(dentry, bdst);
31215+ err = PTR_ERR(opq_dentry);
31216+ if (IS_ERR(opq_dentry))
31217+ goto out;
31218+ dput(opq_dentry);
c2b27bf2 31219+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 31220+
4f0767ce 31221+out:
1facf9fc 31222+ return err;
31223+}
31224+
31225+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31226+ struct inode *dir, aufs_bindex_t bdst)
31227+{
31228+ int err;
31229+ struct path h_path;
31230+ struct au_branch *br;
31231+
31232+ br = au_sbr(dentry->d_sb, bdst);
31233+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31234+ err = PTR_ERR(h_path.dentry);
31235+ if (IS_ERR(h_path.dentry))
31236+ goto out;
31237+
31238+ err = 0;
5527c038 31239+ if (d_is_positive(h_path.dentry)) {
86dc4139 31240+ h_path.mnt = au_br_mnt(br);
1facf9fc 31241+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31242+ dentry);
31243+ }
31244+ dput(h_path.dentry);
31245+
4f0767ce 31246+out:
1facf9fc 31247+ return err;
31248+}
31249+
31250+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 31251+ struct au_pin *pin,
1facf9fc 31252+ struct dentry *h_parent, void *arg)
31253+{
31254+ int err, rerr;
4a4d8108 31255+ aufs_bindex_t bopq, bstart;
1facf9fc 31256+ struct path h_path;
31257+ struct dentry *parent;
31258+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 31259+ unsigned int *flags = arg;
1facf9fc 31260+
31261+ bstart = au_dbstart(dentry);
31262+ /* dentry is di-locked */
31263+ parent = dget_parent(dentry);
5527c038
JR
31264+ dir = d_inode(parent);
31265+ h_dir = d_inode(h_parent);
1facf9fc 31266+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31267+ IMustLock(h_dir);
31268+
86dc4139 31269+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 31270+ if (unlikely(err < 0))
31271+ goto out;
31272+ h_path.dentry = au_h_dptr(dentry, bdst);
31273+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31274+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31275+ S_IRWXU | S_IRUGO | S_IXUGO);
31276+ if (unlikely(err))
31277+ goto out_put;
c2b27bf2 31278+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 31279+
1facf9fc 31280+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
31281+ au_fclr_cpdown(*flags, WHED);
31282+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 31283+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
31284+ au_fset_cpdown(*flags, WHED);
31285+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31286+ au_fset_cpdown(*flags, PARENT_OPQ);
5527c038 31287+ h_inode = d_inode(h_path.dentry);
1facf9fc 31288+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2
AM
31289+ if (au_ftest_cpdown(*flags, WHED)) {
31290+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 31291+ if (unlikely(err)) {
31292+ mutex_unlock(&h_inode->i_mutex);
31293+ goto out_dir;
31294+ }
31295+ }
31296+
31297+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
31298+ mutex_unlock(&h_inode->i_mutex);
31299+ if (unlikely(err))
31300+ goto out_opq;
31301+
c2b27bf2 31302+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 31303+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31304+ if (unlikely(err))
31305+ goto out_opq;
31306+ }
31307+
5527c038 31308+ inode = d_inode(dentry);
1facf9fc 31309+ if (au_ibend(inode) < bdst)
31310+ au_set_ibend(inode, bdst);
31311+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31312+ au_hi_flags(inode, /*isdir*/1));
076b876e 31313+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 31314+ goto out; /* success */
31315+
31316+ /* revert */
4f0767ce 31317+out_opq:
c2b27bf2 31318+ if (au_ftest_cpdown(*flags, DIROPQ)) {
1facf9fc 31319+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
31320+ rerr = au_diropq_remove(dentry, bdst);
31321+ mutex_unlock(&h_inode->i_mutex);
31322+ if (unlikely(rerr)) {
523b37e3
AM
31323+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
31324+ dentry, bdst, rerr);
1facf9fc 31325+ err = -EIO;
31326+ goto out;
31327+ }
31328+ }
4f0767ce 31329+out_dir:
c2b27bf2 31330+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 31331+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
31332+ if (unlikely(rerr)) {
523b37e3
AM
31333+ AuIOErr("failed removing %pd b%d (%d)\n",
31334+ dentry, bdst, rerr);
1facf9fc 31335+ err = -EIO;
31336+ }
31337+ }
4f0767ce 31338+out_put:
1facf9fc 31339+ au_set_h_dptr(dentry, bdst, NULL);
31340+ if (au_dbend(dentry) == bdst)
31341+ au_update_dbend(dentry);
4f0767ce 31342+out:
1facf9fc 31343+ dput(parent);
31344+ return err;
31345+}
31346+
31347+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
31348+{
31349+ int err;
c2b27bf2 31350+ unsigned int flags;
1facf9fc 31351+
c2b27bf2
AM
31352+ flags = 0;
31353+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 31354+
31355+ return err;
31356+}
31357+
31358+/* ---------------------------------------------------------------------- */
31359+
31360+/* policies for create */
31361+
c2b27bf2 31362+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
31363+{
31364+ int err, i, j, ndentry;
31365+ aufs_bindex_t bopq;
31366+ struct au_dcsub_pages dpages;
31367+ struct au_dpage *dpage;
31368+ struct dentry **dentries, *parent, *d;
31369+
31370+ err = au_dpages_init(&dpages, GFP_NOFS);
31371+ if (unlikely(err))
31372+ goto out;
31373+ parent = dget_parent(dentry);
027c5e7a 31374+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
31375+ if (unlikely(err))
31376+ goto out_free;
31377+
31378+ err = bindex;
31379+ for (i = 0; i < dpages.ndpage; i++) {
31380+ dpage = dpages.dpages + i;
31381+ dentries = dpage->dentries;
31382+ ndentry = dpage->ndentry;
31383+ for (j = 0; j < ndentry; j++) {
31384+ d = dentries[j];
31385+ di_read_lock_parent2(d, !AuLock_IR);
31386+ bopq = au_dbdiropq(d);
31387+ di_read_unlock(d, !AuLock_IR);
31388+ if (bopq >= 0 && bopq < err)
31389+ err = bopq;
31390+ }
31391+ }
31392+
31393+out_free:
31394+ dput(parent);
31395+ au_dpages_free(&dpages);
31396+out:
31397+ return err;
31398+}
31399+
1facf9fc 31400+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
31401+{
31402+ for (; bindex >= 0; bindex--)
31403+ if (!au_br_rdonly(au_sbr(sb, bindex)))
31404+ return bindex;
31405+ return -EROFS;
31406+}
31407+
31408+/* top down parent */
392086de
AM
31409+static int au_wbr_create_tdp(struct dentry *dentry,
31410+ unsigned int flags __maybe_unused)
1facf9fc 31411+{
31412+ int err;
31413+ aufs_bindex_t bstart, bindex;
31414+ struct super_block *sb;
31415+ struct dentry *parent, *h_parent;
31416+
31417+ sb = dentry->d_sb;
31418+ bstart = au_dbstart(dentry);
31419+ err = bstart;
31420+ if (!au_br_rdonly(au_sbr(sb, bstart)))
31421+ goto out;
31422+
31423+ err = -EROFS;
31424+ parent = dget_parent(dentry);
31425+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
31426+ h_parent = au_h_dptr(parent, bindex);
5527c038 31427+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31428+ continue;
31429+
31430+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31431+ err = bindex;
31432+ break;
31433+ }
31434+ }
31435+ dput(parent);
31436+
31437+ /* bottom up here */
4a4d8108 31438+ if (unlikely(err < 0)) {
1facf9fc 31439+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
31440+ if (err >= 0)
31441+ err = au_wbr_nonopq(dentry, err);
31442+ }
1facf9fc 31443+
4f0767ce 31444+out:
1facf9fc 31445+ AuDbg("b%d\n", err);
31446+ return err;
31447+}
31448+
31449+/* ---------------------------------------------------------------------- */
31450+
31451+/* an exception for the policy other than tdp */
31452+static int au_wbr_create_exp(struct dentry *dentry)
31453+{
31454+ int err;
31455+ aufs_bindex_t bwh, bdiropq;
31456+ struct dentry *parent;
31457+
31458+ err = -1;
31459+ bwh = au_dbwh(dentry);
31460+ parent = dget_parent(dentry);
31461+ bdiropq = au_dbdiropq(parent);
31462+ if (bwh >= 0) {
31463+ if (bdiropq >= 0)
31464+ err = min(bdiropq, bwh);
31465+ else
31466+ err = bwh;
31467+ AuDbg("%d\n", err);
31468+ } else if (bdiropq >= 0) {
31469+ err = bdiropq;
31470+ AuDbg("%d\n", err);
31471+ }
31472+ dput(parent);
31473+
4a4d8108
AM
31474+ if (err >= 0)
31475+ err = au_wbr_nonopq(dentry, err);
31476+
1facf9fc 31477+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
31478+ err = -1;
31479+
31480+ AuDbg("%d\n", err);
31481+ return err;
31482+}
31483+
31484+/* ---------------------------------------------------------------------- */
31485+
31486+/* round robin */
31487+static int au_wbr_create_init_rr(struct super_block *sb)
31488+{
31489+ int err;
31490+
31491+ err = au_wbr_bu(sb, au_sbend(sb));
31492+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 31493+ /* smp_mb(); */
1facf9fc 31494+
31495+ AuDbg("b%d\n", err);
31496+ return err;
31497+}
31498+
392086de 31499+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 31500+{
31501+ int err, nbr;
31502+ unsigned int u;
31503+ aufs_bindex_t bindex, bend;
31504+ struct super_block *sb;
31505+ atomic_t *next;
31506+
31507+ err = au_wbr_create_exp(dentry);
31508+ if (err >= 0)
31509+ goto out;
31510+
31511+ sb = dentry->d_sb;
31512+ next = &au_sbi(sb)->si_wbr_rr_next;
31513+ bend = au_sbend(sb);
31514+ nbr = bend + 1;
31515+ for (bindex = 0; bindex <= bend; bindex++) {
392086de 31516+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 31517+ err = atomic_dec_return(next) + 1;
31518+ /* modulo for 0 is meaningless */
31519+ if (unlikely(!err))
31520+ err = atomic_dec_return(next) + 1;
31521+ } else
31522+ err = atomic_read(next);
31523+ AuDbg("%d\n", err);
31524+ u = err;
31525+ err = u % nbr;
31526+ AuDbg("%d\n", err);
31527+ if (!au_br_rdonly(au_sbr(sb, err)))
31528+ break;
31529+ err = -EROFS;
31530+ }
31531+
4a4d8108
AM
31532+ if (err >= 0)
31533+ err = au_wbr_nonopq(dentry, err);
31534+
4f0767ce 31535+out:
1facf9fc 31536+ AuDbg("%d\n", err);
31537+ return err;
31538+}
31539+
31540+/* ---------------------------------------------------------------------- */
31541+
31542+/* most free space */
392086de 31543+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 31544+{
31545+ struct super_block *sb;
31546+ struct au_branch *br;
31547+ struct au_wbr_mfs *mfs;
392086de 31548+ struct dentry *h_parent;
1facf9fc 31549+ aufs_bindex_t bindex, bend;
31550+ int err;
31551+ unsigned long long b, bavail;
7f207e10 31552+ struct path h_path;
1facf9fc 31553+ /* reduce the stack usage */
31554+ struct kstatfs *st;
31555+
31556+ st = kmalloc(sizeof(*st), GFP_NOFS);
31557+ if (unlikely(!st)) {
31558+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
31559+ return;
31560+ }
31561+
31562+ bavail = 0;
31563+ sb = dentry->d_sb;
31564+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 31565+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 31566+ mfs->mfs_bindex = -EROFS;
31567+ mfs->mfsrr_bytes = 0;
392086de
AM
31568+ if (!parent) {
31569+ bindex = 0;
31570+ bend = au_sbend(sb);
31571+ } else {
31572+ bindex = au_dbstart(parent);
31573+ bend = au_dbtaildir(parent);
31574+ }
31575+
31576+ for (; bindex <= bend; bindex++) {
31577+ if (parent) {
31578+ h_parent = au_h_dptr(parent, bindex);
5527c038 31579+ if (!h_parent || d_is_negative(h_parent))
392086de
AM
31580+ continue;
31581+ }
1facf9fc 31582+ br = au_sbr(sb, bindex);
31583+ if (au_br_rdonly(br))
31584+ continue;
31585+
31586+ /* sb->s_root for NFS is unreliable */
86dc4139 31587+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
31588+ h_path.dentry = h_path.mnt->mnt_root;
31589+ err = vfs_statfs(&h_path, st);
1facf9fc 31590+ if (unlikely(err)) {
31591+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
31592+ continue;
31593+ }
31594+
31595+ /* when the available size is equal, select the lower one */
31596+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
31597+ || sizeof(b) < sizeof(st->f_bsize));
31598+ b = st->f_bavail * st->f_bsize;
31599+ br->br_wbr->wbr_bytes = b;
31600+ if (b >= bavail) {
31601+ bavail = b;
31602+ mfs->mfs_bindex = bindex;
31603+ mfs->mfs_jiffy = jiffies;
31604+ }
31605+ }
31606+
31607+ mfs->mfsrr_bytes = bavail;
31608+ AuDbg("b%d\n", mfs->mfs_bindex);
31609+ kfree(st);
31610+}
31611+
392086de 31612+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31613+{
31614+ int err;
392086de 31615+ struct dentry *parent;
1facf9fc 31616+ struct super_block *sb;
31617+ struct au_wbr_mfs *mfs;
31618+
31619+ err = au_wbr_create_exp(dentry);
31620+ if (err >= 0)
31621+ goto out;
31622+
31623+ sb = dentry->d_sb;
392086de
AM
31624+ parent = NULL;
31625+ if (au_ftest_wbr(flags, PARENT))
31626+ parent = dget_parent(dentry);
1facf9fc 31627+ mfs = &au_sbi(sb)->si_wbr_mfs;
31628+ mutex_lock(&mfs->mfs_lock);
31629+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
31630+ || mfs->mfs_bindex < 0
31631+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 31632+ au_mfs(dentry, parent);
1facf9fc 31633+ mutex_unlock(&mfs->mfs_lock);
31634+ err = mfs->mfs_bindex;
392086de 31635+ dput(parent);
1facf9fc 31636+
4a4d8108
AM
31637+ if (err >= 0)
31638+ err = au_wbr_nonopq(dentry, err);
31639+
4f0767ce 31640+out:
1facf9fc 31641+ AuDbg("b%d\n", err);
31642+ return err;
31643+}
31644+
31645+static int au_wbr_create_init_mfs(struct super_block *sb)
31646+{
31647+ struct au_wbr_mfs *mfs;
31648+
31649+ mfs = &au_sbi(sb)->si_wbr_mfs;
31650+ mutex_init(&mfs->mfs_lock);
31651+ mfs->mfs_jiffy = 0;
31652+ mfs->mfs_bindex = -EROFS;
31653+
31654+ return 0;
31655+}
31656+
31657+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
31658+{
31659+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
31660+ return 0;
31661+}
31662+
31663+/* ---------------------------------------------------------------------- */
31664+
31665+/* most free space and then round robin */
392086de 31666+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 31667+{
31668+ int err;
31669+ struct au_wbr_mfs *mfs;
31670+
392086de 31671+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 31672+ if (err >= 0) {
31673+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 31674+ mutex_lock(&mfs->mfs_lock);
1facf9fc 31675+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 31676+ err = au_wbr_create_rr(dentry, flags);
dece6358 31677+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 31678+ }
31679+
31680+ AuDbg("b%d\n", err);
31681+ return err;
31682+}
31683+
31684+static int au_wbr_create_init_mfsrr(struct super_block *sb)
31685+{
31686+ int err;
31687+
31688+ au_wbr_create_init_mfs(sb); /* ignore */
31689+ err = au_wbr_create_init_rr(sb);
31690+
31691+ return err;
31692+}
31693+
31694+/* ---------------------------------------------------------------------- */
31695+
31696+/* top down parent and most free space */
392086de 31697+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31698+{
31699+ int err, e2;
31700+ unsigned long long b;
31701+ aufs_bindex_t bindex, bstart, bend;
31702+ struct super_block *sb;
31703+ struct dentry *parent, *h_parent;
31704+ struct au_branch *br;
31705+
392086de 31706+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 31707+ if (unlikely(err < 0))
31708+ goto out;
31709+ parent = dget_parent(dentry);
31710+ bstart = au_dbstart(parent);
31711+ bend = au_dbtaildir(parent);
31712+ if (bstart == bend)
31713+ goto out_parent; /* success */
31714+
392086de 31715+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 31716+ if (e2 < 0)
31717+ goto out_parent; /* success */
31718+
31719+ /* when the available size is equal, select upper one */
31720+ sb = dentry->d_sb;
31721+ br = au_sbr(sb, err);
31722+ b = br->br_wbr->wbr_bytes;
31723+ AuDbg("b%d, %llu\n", err, b);
31724+
31725+ for (bindex = bstart; bindex <= bend; bindex++) {
31726+ h_parent = au_h_dptr(parent, bindex);
5527c038 31727+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31728+ continue;
31729+
31730+ br = au_sbr(sb, bindex);
31731+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
31732+ b = br->br_wbr->wbr_bytes;
31733+ err = bindex;
31734+ AuDbg("b%d, %llu\n", err, b);
31735+ }
31736+ }
31737+
4a4d8108
AM
31738+ if (err >= 0)
31739+ err = au_wbr_nonopq(dentry, err);
31740+
4f0767ce 31741+out_parent:
1facf9fc 31742+ dput(parent);
4f0767ce 31743+out:
1facf9fc 31744+ AuDbg("b%d\n", err);
31745+ return err;
31746+}
31747+
31748+/* ---------------------------------------------------------------------- */
31749+
392086de
AM
31750+/*
31751+ * - top down parent
31752+ * - most free space with parent
31753+ * - most free space round-robin regardless parent
31754+ */
31755+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
31756+{
31757+ int err;
31758+ unsigned long long watermark;
31759+ struct super_block *sb;
31760+ struct au_branch *br;
31761+ struct au_wbr_mfs *mfs;
31762+
31763+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
31764+ if (unlikely(err < 0))
31765+ goto out;
31766+
31767+ sb = dentry->d_sb;
31768+ br = au_sbr(sb, err);
31769+ mfs = &au_sbi(sb)->si_wbr_mfs;
31770+ mutex_lock(&mfs->mfs_lock);
31771+ watermark = mfs->mfsrr_watermark;
31772+ mutex_unlock(&mfs->mfs_lock);
31773+ if (br->br_wbr->wbr_bytes < watermark)
31774+ /* regardless the parent dir */
31775+ err = au_wbr_create_mfsrr(dentry, flags);
31776+
31777+out:
31778+ AuDbg("b%d\n", err);
31779+ return err;
31780+}
31781+
31782+/* ---------------------------------------------------------------------- */
31783+
1facf9fc 31784+/* policies for copyup */
31785+
31786+/* top down parent */
31787+static int au_wbr_copyup_tdp(struct dentry *dentry)
31788+{
392086de 31789+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 31790+}
31791+
31792+/* bottom up parent */
31793+static int au_wbr_copyup_bup(struct dentry *dentry)
31794+{
31795+ int err;
31796+ aufs_bindex_t bindex, bstart;
31797+ struct dentry *parent, *h_parent;
31798+ struct super_block *sb;
31799+
31800+ err = -EROFS;
31801+ sb = dentry->d_sb;
31802+ parent = dget_parent(dentry);
31803+ bstart = au_dbstart(parent);
31804+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
31805+ h_parent = au_h_dptr(parent, bindex);
5527c038 31806+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31807+ continue;
31808+
31809+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31810+ err = bindex;
31811+ break;
31812+ }
31813+ }
31814+ dput(parent);
31815+
31816+ /* bottom up here */
31817+ if (unlikely(err < 0))
31818+ err = au_wbr_bu(sb, bstart - 1);
31819+
31820+ AuDbg("b%d\n", err);
31821+ return err;
31822+}
31823+
31824+/* bottom up */
076b876e 31825+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
1facf9fc 31826+{
31827+ int err;
31828+
4a4d8108
AM
31829+ err = au_wbr_bu(dentry->d_sb, bstart);
31830+ AuDbg("b%d\n", err);
31831+ if (err > bstart)
31832+ err = au_wbr_nonopq(dentry, err);
1facf9fc 31833+
31834+ AuDbg("b%d\n", err);
31835+ return err;
31836+}
31837+
076b876e
AM
31838+static int au_wbr_copyup_bu(struct dentry *dentry)
31839+{
31840+ int err;
31841+ aufs_bindex_t bstart;
31842+
31843+ bstart = au_dbstart(dentry);
31844+ err = au_wbr_do_copyup_bu(dentry, bstart);
31845+ return err;
31846+}
31847+
1facf9fc 31848+/* ---------------------------------------------------------------------- */
31849+
31850+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
31851+ [AuWbrCopyup_TDP] = {
31852+ .copyup = au_wbr_copyup_tdp
31853+ },
31854+ [AuWbrCopyup_BUP] = {
31855+ .copyup = au_wbr_copyup_bup
31856+ },
31857+ [AuWbrCopyup_BU] = {
31858+ .copyup = au_wbr_copyup_bu
31859+ }
31860+};
31861+
31862+struct au_wbr_create_operations au_wbr_create_ops[] = {
31863+ [AuWbrCreate_TDP] = {
31864+ .create = au_wbr_create_tdp
31865+ },
31866+ [AuWbrCreate_RR] = {
31867+ .create = au_wbr_create_rr,
31868+ .init = au_wbr_create_init_rr
31869+ },
31870+ [AuWbrCreate_MFS] = {
31871+ .create = au_wbr_create_mfs,
31872+ .init = au_wbr_create_init_mfs,
31873+ .fin = au_wbr_create_fin_mfs
31874+ },
31875+ [AuWbrCreate_MFSV] = {
31876+ .create = au_wbr_create_mfs,
31877+ .init = au_wbr_create_init_mfs,
31878+ .fin = au_wbr_create_fin_mfs
31879+ },
31880+ [AuWbrCreate_MFSRR] = {
31881+ .create = au_wbr_create_mfsrr,
31882+ .init = au_wbr_create_init_mfsrr,
31883+ .fin = au_wbr_create_fin_mfs
31884+ },
31885+ [AuWbrCreate_MFSRRV] = {
31886+ .create = au_wbr_create_mfsrr,
31887+ .init = au_wbr_create_init_mfsrr,
31888+ .fin = au_wbr_create_fin_mfs
31889+ },
31890+ [AuWbrCreate_PMFS] = {
31891+ .create = au_wbr_create_pmfs,
31892+ .init = au_wbr_create_init_mfs,
31893+ .fin = au_wbr_create_fin_mfs
31894+ },
31895+ [AuWbrCreate_PMFSV] = {
31896+ .create = au_wbr_create_pmfs,
31897+ .init = au_wbr_create_init_mfs,
31898+ .fin = au_wbr_create_fin_mfs
392086de
AM
31899+ },
31900+ [AuWbrCreate_PMFSRR] = {
31901+ .create = au_wbr_create_pmfsrr,
31902+ .init = au_wbr_create_init_mfsrr,
31903+ .fin = au_wbr_create_fin_mfs
31904+ },
31905+ [AuWbrCreate_PMFSRRV] = {
31906+ .create = au_wbr_create_pmfsrr,
31907+ .init = au_wbr_create_init_mfsrr,
31908+ .fin = au_wbr_create_fin_mfs
1facf9fc 31909+ }
31910+};
7f207e10
AM
31911diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
31912--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 31913+++ linux/fs/aufs/whout.c 2015-09-24 10:47:58.258053165 +0200
5527c038 31914@@ -0,0 +1,1063 @@
1facf9fc 31915+/*
2000de60 31916+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 31917+ *
31918+ * This program, aufs is free software; you can redistribute it and/or modify
31919+ * it under the terms of the GNU General Public License as published by
31920+ * the Free Software Foundation; either version 2 of the License, or
31921+ * (at your option) any later version.
dece6358
AM
31922+ *
31923+ * This program is distributed in the hope that it will be useful,
31924+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31925+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31926+ * GNU General Public License for more details.
31927+ *
31928+ * You should have received a copy of the GNU General Public License
523b37e3 31929+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31930+ */
31931+
31932+/*
31933+ * whiteout for logical deletion and opaque directory
31934+ */
31935+
1facf9fc 31936+#include "aufs.h"
31937+
31938+#define WH_MASK S_IRUGO
31939+
31940+/*
31941+ * If a directory contains this file, then it is opaque. We start with the
31942+ * .wh. flag so that it is blocked by lookup.
31943+ */
0c3ec466
AM
31944+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
31945+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 31946+
31947+/*
31948+ * generate whiteout name, which is NOT terminated by NULL.
31949+ * @name: original d_name.name
31950+ * @len: original d_name.len
31951+ * @wh: whiteout qstr
31952+ * returns zero when succeeds, otherwise error.
31953+ * succeeded value as wh->name should be freed by kfree().
31954+ */
31955+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
31956+{
31957+ char *p;
31958+
31959+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
31960+ return -ENAMETOOLONG;
31961+
31962+ wh->len = name->len + AUFS_WH_PFX_LEN;
31963+ p = kmalloc(wh->len, GFP_NOFS);
31964+ wh->name = p;
31965+ if (p) {
31966+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
31967+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
31968+ /* smp_mb(); */
31969+ return 0;
31970+ }
31971+ return -ENOMEM;
31972+}
31973+
31974+/* ---------------------------------------------------------------------- */
31975+
31976+/*
31977+ * test if the @wh_name exists under @h_parent.
31978+ * @try_sio specifies the necessary of super-io.
31979+ */
076b876e 31980+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 31981+{
31982+ int err;
31983+ struct dentry *wh_dentry;
1facf9fc 31984+
1facf9fc 31985+ if (!try_sio)
b4510431 31986+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 31987+ else
076b876e 31988+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 31989+ err = PTR_ERR(wh_dentry);
2000de60
JR
31990+ if (IS_ERR(wh_dentry)) {
31991+ if (err == -ENAMETOOLONG)
31992+ err = 0;
1facf9fc 31993+ goto out;
2000de60 31994+ }
1facf9fc 31995+
31996+ err = 0;
5527c038 31997+ if (d_is_negative(wh_dentry))
1facf9fc 31998+ goto out_wh; /* success */
31999+
32000+ err = 1;
7e9cd9fe 32001+ if (d_is_reg(wh_dentry))
1facf9fc 32002+ goto out_wh; /* success */
32003+
32004+ err = -EIO;
523b37e3 32005+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
5527c038 32006+ wh_dentry, d_inode(wh_dentry)->i_mode);
1facf9fc 32007+
4f0767ce 32008+out_wh:
1facf9fc 32009+ dput(wh_dentry);
4f0767ce 32010+out:
1facf9fc 32011+ return err;
32012+}
32013+
32014+/*
32015+ * test if the @h_dentry sets opaque or not.
32016+ */
076b876e 32017+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 32018+{
32019+ int err;
32020+ struct inode *h_dir;
32021+
5527c038 32022+ h_dir = d_inode(h_dentry);
076b876e 32023+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 32024+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32025+ return err;
32026+}
32027+
32028+/*
32029+ * returns a negative dentry whose name is unique and temporary.
32030+ */
32031+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32032+ struct qstr *prefix)
32033+{
1facf9fc 32034+ struct dentry *dentry;
32035+ int i;
027c5e7a 32036+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 32037+ *name, *p;
027c5e7a 32038+ /* strict atomic_t is unnecessary here */
1facf9fc 32039+ static unsigned short cnt;
32040+ struct qstr qs;
32041+
4a4d8108
AM
32042+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32043+
1facf9fc 32044+ name = defname;
027c5e7a
AM
32045+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32046+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 32047+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 32048+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 32049+ goto out;
32050+ dentry = ERR_PTR(-ENOMEM);
32051+ name = kmalloc(qs.len + 1, GFP_NOFS);
32052+ if (unlikely(!name))
32053+ goto out;
32054+ }
32055+
32056+ /* doubly whiteout-ed */
32057+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32058+ p = name + AUFS_WH_PFX_LEN * 2;
32059+ memcpy(p, prefix->name, prefix->len);
32060+ p += prefix->len;
32061+ *p++ = '.';
4a4d8108 32062+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 32063+
32064+ qs.name = name;
32065+ for (i = 0; i < 3; i++) {
b752ccd1 32066+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 32067+ dentry = au_sio_lkup_one(&qs, h_parent);
5527c038 32068+ if (IS_ERR(dentry) || d_is_negative(dentry))
1facf9fc 32069+ goto out_name;
32070+ dput(dentry);
32071+ }
0c3ec466 32072+ /* pr_warn("could not get random name\n"); */
1facf9fc 32073+ dentry = ERR_PTR(-EEXIST);
32074+ AuDbg("%.*s\n", AuLNPair(&qs));
32075+ BUG();
32076+
4f0767ce 32077+out_name:
1facf9fc 32078+ if (name != defname)
32079+ kfree(name);
4f0767ce 32080+out:
4a4d8108 32081+ AuTraceErrPtr(dentry);
1facf9fc 32082+ return dentry;
1facf9fc 32083+}
32084+
32085+/*
32086+ * rename the @h_dentry on @br to the whiteouted temporary name.
32087+ */
32088+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32089+{
32090+ int err;
32091+ struct path h_path = {
86dc4139 32092+ .mnt = au_br_mnt(br)
1facf9fc 32093+ };
523b37e3 32094+ struct inode *h_dir, *delegated;
1facf9fc 32095+ struct dentry *h_parent;
32096+
32097+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 32098+ h_dir = d_inode(h_parent);
1facf9fc 32099+ IMustLock(h_dir);
32100+
32101+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32102+ err = PTR_ERR(h_path.dentry);
32103+ if (IS_ERR(h_path.dentry))
32104+ goto out;
32105+
32106+ /* under the same dir, no need to lock_rename() */
523b37e3
AM
32107+ delegated = NULL;
32108+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated);
1facf9fc 32109+ AuTraceErr(err);
523b37e3
AM
32110+ if (unlikely(err == -EWOULDBLOCK)) {
32111+ pr_warn("cannot retry for NFSv4 delegation"
32112+ " for an internal rename\n");
32113+ iput(delegated);
32114+ }
1facf9fc 32115+ dput(h_path.dentry);
32116+
4f0767ce 32117+out:
4a4d8108 32118+ AuTraceErr(err);
1facf9fc 32119+ return err;
32120+}
32121+
32122+/* ---------------------------------------------------------------------- */
32123+/*
32124+ * functions for removing a whiteout
32125+ */
32126+
32127+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32128+{
523b37e3
AM
32129+ int err, force;
32130+ struct inode *delegated;
1facf9fc 32131+
32132+ /*
32133+ * forces superio when the dir has a sticky bit.
32134+ * this may be a violation of unix fs semantics.
32135+ */
32136+ force = (h_dir->i_mode & S_ISVTX)
5527c038 32137+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
523b37e3
AM
32138+ delegated = NULL;
32139+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32140+ if (unlikely(err == -EWOULDBLOCK)) {
32141+ pr_warn("cannot retry for NFSv4 delegation"
32142+ " for an internal unlink\n");
32143+ iput(delegated);
32144+ }
32145+ return err;
1facf9fc 32146+}
32147+
32148+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32149+ struct dentry *dentry)
32150+{
32151+ int err;
32152+
32153+ err = do_unlink_wh(h_dir, h_path);
32154+ if (!err && dentry)
32155+ au_set_dbwh(dentry, -1);
32156+
32157+ return err;
32158+}
32159+
32160+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32161+ struct au_branch *br)
32162+{
32163+ int err;
32164+ struct path h_path = {
86dc4139 32165+ .mnt = au_br_mnt(br)
1facf9fc 32166+ };
32167+
32168+ err = 0;
b4510431 32169+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 32170+ if (IS_ERR(h_path.dentry))
32171+ err = PTR_ERR(h_path.dentry);
32172+ else {
5527c038
JR
32173+ if (d_is_reg(h_path.dentry))
32174+ err = do_unlink_wh(d_inode(h_parent), &h_path);
1facf9fc 32175+ dput(h_path.dentry);
32176+ }
32177+
32178+ return err;
32179+}
32180+
32181+/* ---------------------------------------------------------------------- */
32182+/*
32183+ * initialize/clean whiteout for a branch
32184+ */
32185+
32186+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32187+ const int isdir)
32188+{
32189+ int err;
523b37e3 32190+ struct inode *delegated;
1facf9fc 32191+
5527c038 32192+ if (d_is_negative(whpath->dentry))
1facf9fc 32193+ return;
32194+
86dc4139
AM
32195+ if (isdir)
32196+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
32197+ else {
32198+ delegated = NULL;
32199+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32200+ if (unlikely(err == -EWOULDBLOCK)) {
32201+ pr_warn("cannot retry for NFSv4 delegation"
32202+ " for an internal unlink\n");
32203+ iput(delegated);
32204+ }
32205+ }
1facf9fc 32206+ if (unlikely(err))
523b37e3
AM
32207+ pr_warn("failed removing %pd (%d), ignored.\n",
32208+ whpath->dentry, err);
1facf9fc 32209+}
32210+
32211+static int test_linkable(struct dentry *h_root)
32212+{
5527c038 32213+ struct inode *h_dir = d_inode(h_root);
1facf9fc 32214+
32215+ if (h_dir->i_op->link)
32216+ return 0;
32217+
523b37e3
AM
32218+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32219+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 32220+ return -ENOSYS;
32221+}
32222+
32223+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32224+static int au_whdir(struct inode *h_dir, struct path *path)
32225+{
32226+ int err;
32227+
32228+ err = -EEXIST;
5527c038 32229+ if (d_is_negative(path->dentry)) {
1facf9fc 32230+ int mode = S_IRWXU;
32231+
32232+ if (au_test_nfs(path->dentry->d_sb))
32233+ mode |= S_IXUGO;
86dc4139 32234+ err = vfsub_mkdir(h_dir, path, mode);
2000de60 32235+ } else if (d_is_dir(path->dentry))
1facf9fc 32236+ err = 0;
32237+ else
523b37e3 32238+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 32239+
32240+ return err;
32241+}
32242+
32243+struct au_wh_base {
32244+ const struct qstr *name;
32245+ struct dentry *dentry;
32246+};
32247+
32248+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32249+ struct path *h_path)
32250+{
32251+ h_path->dentry = base[AuBrWh_BASE].dentry;
32252+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32253+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32254+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32255+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32256+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32257+}
32258+
32259+/*
32260+ * returns tri-state,
c1595e42 32261+ * minus: error, caller should print the message
1facf9fc 32262+ * zero: succuess
c1595e42 32263+ * plus: error, caller should NOT print the message
1facf9fc 32264+ */
32265+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
32266+ int do_plink, struct au_wh_base base[],
32267+ struct path *h_path)
32268+{
32269+ int err;
32270+ struct inode *h_dir;
32271+
5527c038 32272+ h_dir = d_inode(h_root);
1facf9fc 32273+ h_path->dentry = base[AuBrWh_BASE].dentry;
32274+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32275+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32276+ if (do_plink) {
32277+ err = test_linkable(h_root);
32278+ if (unlikely(err)) {
32279+ err = 1;
32280+ goto out;
32281+ }
32282+
32283+ err = au_whdir(h_dir, h_path);
32284+ if (unlikely(err))
32285+ goto out;
32286+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32287+ } else
32288+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32289+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32290+ err = au_whdir(h_dir, h_path);
32291+ if (unlikely(err))
32292+ goto out;
32293+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32294+
4f0767ce 32295+out:
1facf9fc 32296+ return err;
32297+}
32298+
32299+/*
32300+ * for the moment, aufs supports the branch filesystem which does not support
32301+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
32302+ * copyup failed. finally, such filesystem will not be used as the writable
32303+ * branch.
32304+ *
32305+ * returns tri-state, see above.
32306+ */
32307+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
32308+ int do_plink, struct au_wh_base base[],
32309+ struct path *h_path)
32310+{
32311+ int err;
32312+ struct inode *h_dir;
32313+
1308ab2a 32314+ WbrWhMustWriteLock(wbr);
32315+
1facf9fc 32316+ err = test_linkable(h_root);
32317+ if (unlikely(err)) {
32318+ err = 1;
32319+ goto out;
32320+ }
32321+
32322+ /*
32323+ * todo: should this create be done in /sbin/mount.aufs helper?
32324+ */
32325+ err = -EEXIST;
5527c038
JR
32326+ h_dir = d_inode(h_root);
32327+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
86dc4139
AM
32328+ h_path->dentry = base[AuBrWh_BASE].dentry;
32329+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
7e9cd9fe 32330+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
1facf9fc 32331+ err = 0;
32332+ else
523b37e3 32333+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 32334+ if (unlikely(err))
32335+ goto out;
32336+
32337+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32338+ if (do_plink) {
32339+ err = au_whdir(h_dir, h_path);
32340+ if (unlikely(err))
32341+ goto out;
32342+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32343+ } else
32344+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32345+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
32346+
32347+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32348+ err = au_whdir(h_dir, h_path);
32349+ if (unlikely(err))
32350+ goto out;
32351+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32352+
4f0767ce 32353+out:
1facf9fc 32354+ return err;
32355+}
32356+
32357+/*
32358+ * initialize the whiteout base file/dir for @br.
32359+ */
86dc4139 32360+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 32361+{
32362+ int err, i;
32363+ const unsigned char do_plink
32364+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 32365+ struct inode *h_dir;
86dc4139
AM
32366+ struct path path = br->br_path;
32367+ struct dentry *h_root = path.dentry;
1facf9fc 32368+ struct au_wbr *wbr = br->br_wbr;
32369+ static const struct qstr base_name[] = {
0c3ec466
AM
32370+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
32371+ sizeof(AUFS_BASE_NAME) - 1),
32372+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
32373+ sizeof(AUFS_PLINKDIR_NAME) - 1),
32374+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
32375+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 32376+ };
32377+ struct au_wh_base base[] = {
32378+ [AuBrWh_BASE] = {
32379+ .name = base_name + AuBrWh_BASE,
32380+ .dentry = NULL
32381+ },
32382+ [AuBrWh_PLINK] = {
32383+ .name = base_name + AuBrWh_PLINK,
32384+ .dentry = NULL
32385+ },
32386+ [AuBrWh_ORPH] = {
32387+ .name = base_name + AuBrWh_ORPH,
32388+ .dentry = NULL
32389+ }
32390+ };
32391+
1308ab2a 32392+ if (wbr)
32393+ WbrWhMustWriteLock(wbr);
1facf9fc 32394+
1facf9fc 32395+ for (i = 0; i < AuBrWh_Last; i++) {
32396+ /* doubly whiteouted */
32397+ struct dentry *d;
32398+
32399+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
32400+ err = PTR_ERR(d);
32401+ if (IS_ERR(d))
32402+ goto out;
32403+
32404+ base[i].dentry = d;
32405+ AuDebugOn(wbr
32406+ && wbr->wbr_wh[i]
32407+ && wbr->wbr_wh[i] != base[i].dentry);
32408+ }
32409+
32410+ if (wbr)
32411+ for (i = 0; i < AuBrWh_Last; i++) {
32412+ dput(wbr->wbr_wh[i]);
32413+ wbr->wbr_wh[i] = NULL;
32414+ }
32415+
32416+ err = 0;
1e00d052 32417+ if (!au_br_writable(br->br_perm)) {
5527c038 32418+ h_dir = d_inode(h_root);
1facf9fc 32419+ au_wh_init_ro(h_dir, base, &path);
1e00d052 32420+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 32421+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
32422+ if (err > 0)
32423+ goto out;
32424+ else if (err)
32425+ goto out_err;
1e00d052 32426+ } else {
1facf9fc 32427+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
32428+ if (err > 0)
32429+ goto out;
32430+ else if (err)
32431+ goto out_err;
1facf9fc 32432+ }
32433+ goto out; /* success */
32434+
4f0767ce 32435+out_err:
523b37e3
AM
32436+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
32437+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 32438+out:
1facf9fc 32439+ for (i = 0; i < AuBrWh_Last; i++)
32440+ dput(base[i].dentry);
32441+ return err;
32442+}
32443+
32444+/* ---------------------------------------------------------------------- */
32445+/*
32446+ * whiteouts are all hard-linked usually.
32447+ * when its link count reaches a ceiling, we create a new whiteout base
32448+ * asynchronously.
32449+ */
32450+
32451+struct reinit_br_wh {
32452+ struct super_block *sb;
32453+ struct au_branch *br;
32454+};
32455+
32456+static void reinit_br_wh(void *arg)
32457+{
32458+ int err;
32459+ aufs_bindex_t bindex;
32460+ struct path h_path;
32461+ struct reinit_br_wh *a = arg;
32462+ struct au_wbr *wbr;
523b37e3 32463+ struct inode *dir, *delegated;
1facf9fc 32464+ struct dentry *h_root;
32465+ struct au_hinode *hdir;
32466+
32467+ err = 0;
32468+ wbr = a->br->br_wbr;
32469+ /* big aufs lock */
32470+ si_noflush_write_lock(a->sb);
32471+ if (!au_br_writable(a->br->br_perm))
32472+ goto out;
32473+ bindex = au_br_index(a->sb, a->br->br_id);
32474+ if (unlikely(bindex < 0))
32475+ goto out;
32476+
1308ab2a 32477+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
5527c038 32478+ dir = d_inode(a->sb->s_root);
1facf9fc 32479+ hdir = au_hi(dir, bindex);
32480+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 32481+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 32482+
4a4d8108 32483+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 32484+ wbr_wh_write_lock(wbr);
32485+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
32486+ h_root, a->br);
32487+ if (!err) {
86dc4139
AM
32488+ h_path.dentry = wbr->wbr_whbase;
32489+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
32490+ delegated = NULL;
32491+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
32492+ /*force*/0);
32493+ if (unlikely(err == -EWOULDBLOCK)) {
32494+ pr_warn("cannot retry for NFSv4 delegation"
32495+ " for an internal unlink\n");
32496+ iput(delegated);
32497+ }
1facf9fc 32498+ } else {
523b37e3 32499+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 32500+ err = 0;
32501+ }
32502+ dput(wbr->wbr_whbase);
32503+ wbr->wbr_whbase = NULL;
32504+ if (!err)
86dc4139 32505+ err = au_wh_init(a->br, a->sb);
1facf9fc 32506+ wbr_wh_write_unlock(wbr);
4a4d8108 32507+ au_hn_imtx_unlock(hdir);
1308ab2a 32508+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
32509+ if (!err)
32510+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 32511+
4f0767ce 32512+out:
1facf9fc 32513+ if (wbr)
32514+ atomic_dec(&wbr->wbr_wh_running);
32515+ atomic_dec(&a->br->br_count);
1facf9fc 32516+ si_write_unlock(a->sb);
027c5e7a 32517+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 32518+ kfree(arg);
32519+ if (unlikely(err))
32520+ AuIOErr("err %d\n", err);
32521+}
32522+
32523+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
32524+{
32525+ int do_dec, wkq_err;
32526+ struct reinit_br_wh *arg;
32527+
32528+ do_dec = 1;
32529+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
32530+ goto out;
32531+
32532+ /* ignore ENOMEM */
32533+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
32534+ if (arg) {
32535+ /*
32536+ * dec(wh_running), kfree(arg) and dec(br_count)
32537+ * in reinit function
32538+ */
32539+ arg->sb = sb;
32540+ arg->br = br;
32541+ atomic_inc(&br->br_count);
53392da6 32542+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 32543+ if (unlikely(wkq_err)) {
32544+ atomic_dec(&br->br_wbr->wbr_wh_running);
32545+ atomic_dec(&br->br_count);
32546+ kfree(arg);
32547+ }
32548+ do_dec = 0;
32549+ }
32550+
4f0767ce 32551+out:
1facf9fc 32552+ if (do_dec)
32553+ atomic_dec(&br->br_wbr->wbr_wh_running);
32554+}
32555+
32556+/* ---------------------------------------------------------------------- */
32557+
32558+/*
32559+ * create the whiteout @wh.
32560+ */
32561+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
32562+ struct dentry *wh)
32563+{
32564+ int err;
32565+ struct path h_path = {
32566+ .dentry = wh
32567+ };
32568+ struct au_branch *br;
32569+ struct au_wbr *wbr;
32570+ struct dentry *h_parent;
523b37e3 32571+ struct inode *h_dir, *delegated;
1facf9fc 32572+
32573+ h_parent = wh->d_parent; /* dir inode is locked */
5527c038 32574+ h_dir = d_inode(h_parent);
1facf9fc 32575+ IMustLock(h_dir);
32576+
32577+ br = au_sbr(sb, bindex);
86dc4139 32578+ h_path.mnt = au_br_mnt(br);
1facf9fc 32579+ wbr = br->br_wbr;
32580+ wbr_wh_read_lock(wbr);
32581+ if (wbr->wbr_whbase) {
523b37e3
AM
32582+ delegated = NULL;
32583+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
32584+ if (unlikely(err == -EWOULDBLOCK)) {
32585+ pr_warn("cannot retry for NFSv4 delegation"
32586+ " for an internal link\n");
32587+ iput(delegated);
32588+ }
1facf9fc 32589+ if (!err || err != -EMLINK)
32590+ goto out;
32591+
32592+ /* link count full. re-initialize br_whbase. */
32593+ kick_reinit_br_wh(sb, br);
32594+ }
32595+
32596+ /* return this error in this context */
b4510431 32597+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
32598+ if (!err)
32599+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 32600+
4f0767ce 32601+out:
1facf9fc 32602+ wbr_wh_read_unlock(wbr);
32603+ return err;
32604+}
32605+
32606+/* ---------------------------------------------------------------------- */
32607+
32608+/*
32609+ * create or remove the diropq.
32610+ */
32611+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
32612+ unsigned int flags)
32613+{
32614+ struct dentry *opq_dentry, *h_dentry;
32615+ struct super_block *sb;
32616+ struct au_branch *br;
32617+ int err;
32618+
32619+ sb = dentry->d_sb;
32620+ br = au_sbr(sb, bindex);
32621+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 32622+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 32623+ if (IS_ERR(opq_dentry))
32624+ goto out;
32625+
32626+ if (au_ftest_diropq(flags, CREATE)) {
32627+ err = link_or_create_wh(sb, bindex, opq_dentry);
32628+ if (!err) {
32629+ au_set_dbdiropq(dentry, bindex);
32630+ goto out; /* success */
32631+ }
32632+ } else {
32633+ struct path tmp = {
32634+ .dentry = opq_dentry,
86dc4139 32635+ .mnt = au_br_mnt(br)
1facf9fc 32636+ };
5527c038 32637+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
1facf9fc 32638+ if (!err)
32639+ au_set_dbdiropq(dentry, -1);
32640+ }
32641+ dput(opq_dentry);
32642+ opq_dentry = ERR_PTR(err);
32643+
4f0767ce 32644+out:
1facf9fc 32645+ return opq_dentry;
32646+}
32647+
32648+struct do_diropq_args {
32649+ struct dentry **errp;
32650+ struct dentry *dentry;
32651+ aufs_bindex_t bindex;
32652+ unsigned int flags;
32653+};
32654+
32655+static void call_do_diropq(void *args)
32656+{
32657+ struct do_diropq_args *a = args;
32658+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
32659+}
32660+
32661+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
32662+ unsigned int flags)
32663+{
32664+ struct dentry *diropq, *h_dentry;
32665+
32666+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 32667+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
1facf9fc 32668+ diropq = do_diropq(dentry, bindex, flags);
32669+ else {
32670+ int wkq_err;
32671+ struct do_diropq_args args = {
32672+ .errp = &diropq,
32673+ .dentry = dentry,
32674+ .bindex = bindex,
32675+ .flags = flags
32676+ };
32677+
32678+ wkq_err = au_wkq_wait(call_do_diropq, &args);
32679+ if (unlikely(wkq_err))
32680+ diropq = ERR_PTR(wkq_err);
32681+ }
32682+
32683+ return diropq;
32684+}
32685+
32686+/* ---------------------------------------------------------------------- */
32687+
32688+/*
32689+ * lookup whiteout dentry.
32690+ * @h_parent: lower parent dentry which must exist and be locked
32691+ * @base_name: name of dentry which will be whiteouted
32692+ * returns dentry for whiteout.
32693+ */
32694+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
32695+ struct au_branch *br)
32696+{
32697+ int err;
32698+ struct qstr wh_name;
32699+ struct dentry *wh_dentry;
32700+
32701+ err = au_wh_name_alloc(&wh_name, base_name);
32702+ wh_dentry = ERR_PTR(err);
32703+ if (!err) {
b4510431 32704+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
1facf9fc 32705+ kfree(wh_name.name);
32706+ }
32707+ return wh_dentry;
32708+}
32709+
32710+/*
32711+ * link/create a whiteout for @dentry on @bindex.
32712+ */
32713+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
32714+ struct dentry *h_parent)
32715+{
32716+ struct dentry *wh_dentry;
32717+ struct super_block *sb;
32718+ int err;
32719+
32720+ sb = dentry->d_sb;
32721+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
5527c038 32722+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
1facf9fc 32723+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 32724+ if (!err) {
1facf9fc 32725+ au_set_dbwh(dentry, bindex);
076b876e
AM
32726+ au_fhsm_wrote(sb, bindex, /*force*/0);
32727+ } else {
1facf9fc 32728+ dput(wh_dentry);
32729+ wh_dentry = ERR_PTR(err);
32730+ }
32731+ }
32732+
32733+ return wh_dentry;
32734+}
32735+
32736+/* ---------------------------------------------------------------------- */
32737+
32738+/* Delete all whiteouts in this directory on branch bindex. */
32739+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
32740+ aufs_bindex_t bindex, struct au_branch *br)
32741+{
32742+ int err;
32743+ unsigned long ul, n;
32744+ struct qstr wh_name;
32745+ char *p;
32746+ struct hlist_head *head;
c06a8ce3 32747+ struct au_vdir_wh *pos;
1facf9fc 32748+ struct au_vdir_destr *str;
32749+
32750+ err = -ENOMEM;
537831f9 32751+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 32752+ wh_name.name = p;
32753+ if (unlikely(!wh_name.name))
32754+ goto out;
32755+
32756+ err = 0;
32757+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32758+ p += AUFS_WH_PFX_LEN;
32759+ n = whlist->nh_num;
32760+ head = whlist->nh_head;
32761+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
32762+ hlist_for_each_entry(pos, head, wh_hash) {
32763+ if (pos->wh_bindex != bindex)
1facf9fc 32764+ continue;
32765+
c06a8ce3 32766+ str = &pos->wh_str;
1facf9fc 32767+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
32768+ memcpy(p, str->name, str->len);
32769+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
32770+ err = unlink_wh_name(h_dentry, &wh_name, br);
32771+ if (!err)
32772+ continue;
32773+ break;
32774+ }
32775+ AuIOErr("whiteout name too long %.*s\n",
32776+ str->len, str->name);
32777+ err = -EIO;
32778+ break;
32779+ }
32780+ }
537831f9 32781+ free_page((unsigned long)wh_name.name);
1facf9fc 32782+
4f0767ce 32783+out:
1facf9fc 32784+ return err;
32785+}
32786+
32787+struct del_wh_children_args {
32788+ int *errp;
32789+ struct dentry *h_dentry;
1308ab2a 32790+ struct au_nhash *whlist;
1facf9fc 32791+ aufs_bindex_t bindex;
32792+ struct au_branch *br;
32793+};
32794+
32795+static void call_del_wh_children(void *args)
32796+{
32797+ struct del_wh_children_args *a = args;
1308ab2a 32798+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 32799+}
32800+
32801+/* ---------------------------------------------------------------------- */
32802+
32803+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
32804+{
32805+ struct au_whtmp_rmdir *whtmp;
dece6358 32806+ int err;
1308ab2a 32807+ unsigned int rdhash;
dece6358
AM
32808+
32809+ SiMustAnyLock(sb);
1facf9fc 32810+
32811+ whtmp = kmalloc(sizeof(*whtmp), gfp);
dece6358
AM
32812+ if (unlikely(!whtmp)) {
32813+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 32814+ goto out;
dece6358 32815+ }
1facf9fc 32816+
32817+ whtmp->dir = NULL;
027c5e7a 32818+ whtmp->br = NULL;
1facf9fc 32819+ whtmp->wh_dentry = NULL;
1308ab2a 32820+ /* no estimation for dir size */
32821+ rdhash = au_sbi(sb)->si_rdhash;
32822+ if (!rdhash)
32823+ rdhash = AUFS_RDHASH_DEF;
32824+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
32825+ if (unlikely(err)) {
32826+ kfree(whtmp);
32827+ whtmp = ERR_PTR(err);
32828+ }
dece6358 32829+
4f0767ce 32830+out:
dece6358 32831+ return whtmp;
1facf9fc 32832+}
32833+
32834+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
32835+{
027c5e7a
AM
32836+ if (whtmp->br)
32837+ atomic_dec(&whtmp->br->br_count);
1facf9fc 32838+ dput(whtmp->wh_dentry);
32839+ iput(whtmp->dir);
dece6358 32840+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 32841+ kfree(whtmp);
32842+}
32843+
32844+/*
32845+ * rmdir the whiteouted temporary named dir @h_dentry.
32846+ * @whlist: whiteouted children.
32847+ */
32848+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
32849+ struct dentry *wh_dentry, struct au_nhash *whlist)
32850+{
32851+ int err;
2000de60 32852+ unsigned int h_nlink;
1facf9fc 32853+ struct path h_tmp;
32854+ struct inode *wh_inode, *h_dir;
32855+ struct au_branch *br;
32856+
5527c038 32857+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
1facf9fc 32858+ IMustLock(h_dir);
32859+
32860+ br = au_sbr(dir->i_sb, bindex);
5527c038 32861+ wh_inode = d_inode(wh_dentry);
1facf9fc 32862+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
32863+
32864+ /*
32865+ * someone else might change some whiteouts while we were sleeping.
32866+ * it means this whlist may have an obsoleted entry.
32867+ */
32868+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
32869+ err = del_wh_children(wh_dentry, whlist, bindex, br);
32870+ else {
32871+ int wkq_err;
32872+ struct del_wh_children_args args = {
32873+ .errp = &err,
32874+ .h_dentry = wh_dentry,
1308ab2a 32875+ .whlist = whlist,
1facf9fc 32876+ .bindex = bindex,
32877+ .br = br
32878+ };
32879+
32880+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
32881+ if (unlikely(wkq_err))
32882+ err = wkq_err;
32883+ }
32884+ mutex_unlock(&wh_inode->i_mutex);
32885+
32886+ if (!err) {
32887+ h_tmp.dentry = wh_dentry;
86dc4139 32888+ h_tmp.mnt = au_br_mnt(br);
2000de60 32889+ h_nlink = h_dir->i_nlink;
1facf9fc 32890+ err = vfsub_rmdir(h_dir, &h_tmp);
2000de60
JR
32891+ /* some fs doesn't change the parent nlink in some cases */
32892+ h_nlink -= h_dir->i_nlink;
1facf9fc 32893+ }
32894+
32895+ if (!err) {
32896+ if (au_ibstart(dir) == bindex) {
7f207e10 32897+ /* todo: dir->i_mutex is necessary */
1facf9fc 32898+ au_cpup_attr_timesizes(dir);
2000de60
JR
32899+ if (h_nlink)
32900+ vfsub_drop_nlink(dir);
1facf9fc 32901+ }
32902+ return 0; /* success */
32903+ }
32904+
523b37e3 32905+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 32906+ return err;
32907+}
32908+
32909+static void call_rmdir_whtmp(void *args)
32910+{
32911+ int err;
e49829fe 32912+ aufs_bindex_t bindex;
1facf9fc 32913+ struct au_whtmp_rmdir *a = args;
32914+ struct super_block *sb;
32915+ struct dentry *h_parent;
32916+ struct inode *h_dir;
1facf9fc 32917+ struct au_hinode *hdir;
32918+
32919+ /* rmdir by nfsd may cause deadlock with this i_mutex */
32920+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 32921+ err = -EROFS;
1facf9fc 32922+ sb = a->dir->i_sb;
e49829fe
JR
32923+ si_read_lock(sb, !AuLock_FLUSH);
32924+ if (!au_br_writable(a->br->br_perm))
32925+ goto out;
32926+ bindex = au_br_index(sb, a->br->br_id);
32927+ if (unlikely(bindex < 0))
1facf9fc 32928+ goto out;
32929+
32930+ err = -EIO;
1facf9fc 32931+ ii_write_lock_parent(a->dir);
32932+ h_parent = dget_parent(a->wh_dentry);
5527c038 32933+ h_dir = d_inode(h_parent);
e49829fe 32934+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
32935+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
32936+ if (unlikely(err))
32937+ goto out_mnt;
4a4d8108 32938+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
32939+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
32940+ a->br);
86dc4139
AM
32941+ if (!err)
32942+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
4a4d8108 32943+ au_hn_imtx_unlock(hdir);
86dc4139
AM
32944+ vfsub_mnt_drop_write(au_br_mnt(a->br));
32945+
32946+out_mnt:
1facf9fc 32947+ dput(h_parent);
32948+ ii_write_unlock(a->dir);
4f0767ce 32949+out:
1facf9fc 32950+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 32951+ au_whtmp_rmdir_free(a);
027c5e7a
AM
32952+ si_read_unlock(sb);
32953+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 32954+ if (unlikely(err))
32955+ AuIOErr("err %d\n", err);
32956+}
32957+
32958+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
32959+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
32960+{
32961+ int wkq_err;
e49829fe 32962+ struct super_block *sb;
1facf9fc 32963+
32964+ IMustLock(dir);
32965+
32966+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 32967+ sb = dir->i_sb;
1facf9fc 32968+ args->dir = au_igrab(dir);
e49829fe
JR
32969+ args->br = au_sbr(sb, bindex);
32970+ atomic_inc(&args->br->br_count);
1facf9fc 32971+ args->wh_dentry = dget(wh_dentry);
53392da6 32972+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 32973+ if (unlikely(wkq_err)) {
523b37e3 32974+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 32975+ au_whtmp_rmdir_free(args);
32976+ }
32977+}
7f207e10
AM
32978diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
32979--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 32980+++ linux/fs/aufs/whout.h 2015-09-24 10:47:58.258053165 +0200
076b876e 32981@@ -0,0 +1,85 @@
1facf9fc 32982+/*
2000de60 32983+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 32984+ *
32985+ * This program, aufs is free software; you can redistribute it and/or modify
32986+ * it under the terms of the GNU General Public License as published by
32987+ * the Free Software Foundation; either version 2 of the License, or
32988+ * (at your option) any later version.
dece6358
AM
32989+ *
32990+ * This program is distributed in the hope that it will be useful,
32991+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32992+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32993+ * GNU General Public License for more details.
32994+ *
32995+ * You should have received a copy of the GNU General Public License
523b37e3 32996+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32997+ */
32998+
32999+/*
33000+ * whiteout for logical deletion and opaque directory
33001+ */
33002+
33003+#ifndef __AUFS_WHOUT_H__
33004+#define __AUFS_WHOUT_H__
33005+
33006+#ifdef __KERNEL__
33007+
1facf9fc 33008+#include "dir.h"
33009+
33010+/* whout.c */
33011+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
33012+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33013+int au_diropq_test(struct dentry *h_dentry);
7e9cd9fe 33014+struct au_branch;
1facf9fc 33015+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33016+ struct qstr *prefix);
33017+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33018+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33019+ struct dentry *dentry);
86dc4139 33020+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 33021+
33022+/* diropq flags */
33023+#define AuDiropq_CREATE 1
33024+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
33025+#define au_fset_diropq(flags, name) \
33026+ do { (flags) |= AuDiropq_##name; } while (0)
33027+#define au_fclr_diropq(flags, name) \
33028+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 33029+
33030+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33031+ unsigned int flags);
33032+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33033+ struct au_branch *br);
33034+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33035+ struct dentry *h_parent);
33036+
33037+/* real rmdir for the whiteout-ed dir */
33038+struct au_whtmp_rmdir {
33039+ struct inode *dir;
e49829fe 33040+ struct au_branch *br;
1facf9fc 33041+ struct dentry *wh_dentry;
dece6358 33042+ struct au_nhash whlist;
1facf9fc 33043+};
33044+
33045+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33046+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33047+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33048+ struct dentry *wh_dentry, struct au_nhash *whlist);
33049+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33050+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33051+
33052+/* ---------------------------------------------------------------------- */
33053+
33054+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33055+ aufs_bindex_t bindex)
33056+{
33057+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33058+}
33059+
33060+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33061+{
33062+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33063+}
33064+
33065+#endif /* __KERNEL__ */
33066+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
33067diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33068--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 33069+++ linux/fs/aufs/wkq.c 2015-09-24 10:47:58.258053165 +0200
38d290e6 33070@@ -0,0 +1,213 @@
1facf9fc 33071+/*
2000de60 33072+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33073+ *
33074+ * This program, aufs is free software; you can redistribute it and/or modify
33075+ * it under the terms of the GNU General Public License as published by
33076+ * the Free Software Foundation; either version 2 of the License, or
33077+ * (at your option) any later version.
dece6358
AM
33078+ *
33079+ * This program is distributed in the hope that it will be useful,
33080+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33081+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33082+ * GNU General Public License for more details.
33083+ *
33084+ * You should have received a copy of the GNU General Public License
523b37e3 33085+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33086+ */
33087+
33088+/*
33089+ * workqueue for asynchronous/super-io operations
33090+ * todo: try new dredential scheme
33091+ */
33092+
dece6358 33093+#include <linux/module.h>
1facf9fc 33094+#include "aufs.h"
33095+
9dbd164d 33096+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 33097+
9dbd164d 33098+static struct workqueue_struct *au_wkq;
1facf9fc 33099+
33100+struct au_wkinfo {
33101+ struct work_struct wk;
7f207e10 33102+ struct kobject *kobj;
1facf9fc 33103+
33104+ unsigned int flags; /* see wkq.h */
33105+
33106+ au_wkq_func_t func;
33107+ void *args;
33108+
1facf9fc 33109+ struct completion *comp;
33110+};
33111+
33112+/* ---------------------------------------------------------------------- */
33113+
1facf9fc 33114+static void wkq_func(struct work_struct *wk)
33115+{
33116+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33117+
2dfbb274 33118+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
33119+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33120+
1facf9fc 33121+ wkinfo->func(wkinfo->args);
1facf9fc 33122+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33123+ complete(wkinfo->comp);
33124+ else {
7f207e10 33125+ kobject_put(wkinfo->kobj);
9dbd164d 33126+ module_put(THIS_MODULE); /* todo: ?? */
1facf9fc 33127+ kfree(wkinfo);
33128+ }
33129+}
33130+
33131+/*
33132+ * Since struct completion is large, try allocating it dynamically.
33133+ */
c2b27bf2 33134+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 33135+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33136+
33137+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33138+{
33139+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33140+ if (*comp) {
33141+ init_completion(*comp);
33142+ wkinfo->comp = *comp;
33143+ return 0;
33144+ }
33145+ return -ENOMEM;
33146+}
33147+
33148+static void au_wkq_comp_free(struct completion *comp)
33149+{
33150+ kfree(comp);
33151+}
33152+
33153+#else
33154+
33155+/* no braces */
33156+#define AuWkqCompDeclare(name) \
33157+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33158+ struct completion *comp = &_ ## name
33159+
33160+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33161+{
33162+ wkinfo->comp = *comp;
33163+ return 0;
33164+}
33165+
33166+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33167+{
33168+ /* empty */
33169+}
33170+#endif /* 4KSTACKS */
33171+
53392da6 33172+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 33173+{
53392da6
AM
33174+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33175+ if (au_wkq_test()) {
38d290e6
JR
33176+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33177+ " due to a dead dir by UDBA?\n");
53392da6
AM
33178+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33179+ }
33180+ } else
33181+ au_dbg_verify_kthread();
33182+
33183+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 33184+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 33185+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
33186+ } else {
33187+ INIT_WORK(&wkinfo->wk, wkq_func);
33188+ schedule_work(&wkinfo->wk);
33189+ }
1facf9fc 33190+}
33191+
7f207e10
AM
33192+/*
33193+ * Be careful. It is easy to make deadlock happen.
33194+ * processA: lock, wkq and wait
33195+ * processB: wkq and wait, lock in wkq
33196+ * --> deadlock
33197+ */
b752ccd1 33198+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 33199+{
33200+ int err;
33201+ AuWkqCompDeclare(comp);
33202+ struct au_wkinfo wkinfo = {
b752ccd1 33203+ .flags = flags,
1facf9fc 33204+ .func = func,
33205+ .args = args
33206+ };
33207+
33208+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33209+ if (!err) {
53392da6 33210+ au_wkq_run(&wkinfo);
1facf9fc 33211+ /* no timeout, no interrupt */
33212+ wait_for_completion(wkinfo.comp);
33213+ au_wkq_comp_free(comp);
4a4d8108 33214+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 33215+ }
33216+
33217+ return err;
33218+
33219+}
33220+
027c5e7a
AM
33221+/*
33222+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33223+ * problem in a concurrent umounting.
33224+ */
53392da6
AM
33225+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33226+ unsigned int flags)
1facf9fc 33227+{
33228+ int err;
33229+ struct au_wkinfo *wkinfo;
33230+
33231+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
33232+
33233+ /*
33234+ * wkq_func() must free this wkinfo.
33235+ * it highly depends upon the implementation of workqueue.
33236+ */
33237+ err = 0;
33238+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33239+ if (wkinfo) {
7f207e10 33240+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 33241+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 33242+ wkinfo->func = func;
33243+ wkinfo->args = args;
33244+ wkinfo->comp = NULL;
7f207e10 33245+ kobject_get(wkinfo->kobj);
9dbd164d 33246+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 33247+
53392da6 33248+ au_wkq_run(wkinfo);
1facf9fc 33249+ } else {
33250+ err = -ENOMEM;
e49829fe 33251+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33252+ }
33253+
33254+ return err;
33255+}
33256+
33257+/* ---------------------------------------------------------------------- */
33258+
33259+void au_nwt_init(struct au_nowait_tasks *nwt)
33260+{
33261+ atomic_set(&nwt->nw_len, 0);
4a4d8108 33262+ /* smp_mb(); */ /* atomic_set */
1facf9fc 33263+ init_waitqueue_head(&nwt->nw_wq);
33264+}
33265+
33266+void au_wkq_fin(void)
33267+{
9dbd164d 33268+ destroy_workqueue(au_wkq);
1facf9fc 33269+}
33270+
33271+int __init au_wkq_init(void)
33272+{
9dbd164d 33273+ int err;
b752ccd1
AM
33274+
33275+ err = 0;
86dc4139 33276+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
33277+ if (IS_ERR(au_wkq))
33278+ err = PTR_ERR(au_wkq);
33279+ else if (!au_wkq)
33280+ err = -ENOMEM;
b752ccd1
AM
33281+
33282+ return err;
1facf9fc 33283+}
7f207e10
AM
33284diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
33285--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 33286+++ linux/fs/aufs/wkq.h 2015-09-24 10:47:58.258053165 +0200
523b37e3 33287@@ -0,0 +1,91 @@
1facf9fc 33288+/*
2000de60 33289+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33290+ *
33291+ * This program, aufs is free software; you can redistribute it and/or modify
33292+ * it under the terms of the GNU General Public License as published by
33293+ * the Free Software Foundation; either version 2 of the License, or
33294+ * (at your option) any later version.
dece6358
AM
33295+ *
33296+ * This program is distributed in the hope that it will be useful,
33297+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33298+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33299+ * GNU General Public License for more details.
33300+ *
33301+ * You should have received a copy of the GNU General Public License
523b37e3 33302+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33303+ */
33304+
33305+/*
33306+ * workqueue for asynchronous/super-io operations
33307+ * todo: try new credentials management scheme
33308+ */
33309+
33310+#ifndef __AUFS_WKQ_H__
33311+#define __AUFS_WKQ_H__
33312+
33313+#ifdef __KERNEL__
33314+
dece6358
AM
33315+struct super_block;
33316+
1facf9fc 33317+/* ---------------------------------------------------------------------- */
33318+
33319+/*
33320+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
33321+ */
33322+struct au_nowait_tasks {
33323+ atomic_t nw_len;
33324+ wait_queue_head_t nw_wq;
33325+};
33326+
33327+/* ---------------------------------------------------------------------- */
33328+
33329+typedef void (*au_wkq_func_t)(void *args);
33330+
33331+/* wkq flags */
33332+#define AuWkq_WAIT 1
9dbd164d 33333+#define AuWkq_NEST (1 << 1)
1facf9fc 33334+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
33335+#define au_fset_wkq(flags, name) \
33336+ do { (flags) |= AuWkq_##name; } while (0)
33337+#define au_fclr_wkq(flags, name) \
33338+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 33339+
9dbd164d
AM
33340+#ifndef CONFIG_AUFS_HNOTIFY
33341+#undef AuWkq_NEST
33342+#define AuWkq_NEST 0
33343+#endif
33344+
1facf9fc 33345+/* wkq.c */
b752ccd1 33346+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
33347+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33348+ unsigned int flags);
1facf9fc 33349+void au_nwt_init(struct au_nowait_tasks *nwt);
33350+int __init au_wkq_init(void);
33351+void au_wkq_fin(void);
33352+
33353+/* ---------------------------------------------------------------------- */
33354+
53392da6
AM
33355+static inline int au_wkq_test(void)
33356+{
33357+ return current->flags & PF_WQ_WORKER;
33358+}
33359+
b752ccd1 33360+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 33361+{
b752ccd1 33362+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 33363+}
33364+
33365+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
33366+{
e49829fe 33367+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 33368+ wake_up_all(&nwt->nw_wq);
33369+}
33370+
33371+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
33372+{
33373+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
33374+ return 0;
33375+}
33376+
33377+#endif /* __KERNEL__ */
33378+#endif /* __AUFS_WKQ_H__ */
c1595e42
JR
33379diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
33380--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd 33381+++ linux/fs/aufs/xattr.c 2015-09-24 10:47:58.258053165 +0200
b912730e 33382@@ -0,0 +1,344 @@
c1595e42 33383+/*
2000de60 33384+ * Copyright (C) 2014-2015 Junjiro R. Okajima
c1595e42
JR
33385+ *
33386+ * This program, aufs is free software; you can redistribute it and/or modify
33387+ * it under the terms of the GNU General Public License as published by
33388+ * the Free Software Foundation; either version 2 of the License, or
33389+ * (at your option) any later version.
33390+ *
33391+ * This program is distributed in the hope that it will be useful,
33392+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33393+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33394+ * GNU General Public License for more details.
33395+ *
33396+ * You should have received a copy of the GNU General Public License
33397+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33398+ */
33399+
33400+/*
33401+ * handling xattr functions
33402+ */
33403+
33404+#include <linux/xattr.h>
33405+#include "aufs.h"
33406+
33407+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
33408+{
33409+ if (!ignore_flags)
33410+ goto out;
33411+ switch (err) {
33412+ case -ENOMEM:
33413+ case -EDQUOT:
33414+ goto out;
33415+ }
33416+
33417+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
33418+ err = 0;
33419+ goto out;
33420+ }
33421+
33422+#define cmp(brattr, prefix) do { \
33423+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
33424+ XATTR_##prefix##_PREFIX_LEN)) { \
33425+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
33426+ err = 0; \
33427+ goto out; \
33428+ } \
33429+ } while (0)
33430+
33431+ cmp(SEC, SECURITY);
33432+ cmp(SYS, SYSTEM);
33433+ cmp(TR, TRUSTED);
33434+ cmp(USR, USER);
33435+#undef cmp
33436+
33437+ if (ignore_flags & AuBrAttr_ICEX_OTH)
33438+ err = 0;
33439+
33440+out:
33441+ return err;
33442+}
33443+
33444+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
33445+
33446+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe
AM
33447+ char *name, char **buf, unsigned int ignore_flags,
33448+ unsigned int verbose)
c1595e42
JR
33449+{
33450+ int err;
33451+ ssize_t ssz;
33452+ struct inode *h_idst;
33453+
33454+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
33455+ err = ssz;
33456+ if (unlikely(err <= 0)) {
c1595e42
JR
33457+ if (err == -ENODATA
33458+ || (err == -EOPNOTSUPP
b912730e 33459+ && ((ignore_flags & au_xattr_out_of_list)
5527c038 33460+ || (au_test_nfs_noacl(d_inode(h_src))
b912730e
AM
33461+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
33462+ || !strcmp(name,
33463+ XATTR_NAME_POSIX_ACL_DEFAULT))))
33464+ ))
c1595e42 33465+ err = 0;
b912730e
AM
33466+ if (err && (verbose || au_debug_test()))
33467+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33468+ goto out;
33469+ }
33470+
33471+ /* unlock it temporary */
5527c038 33472+ h_idst = d_inode(h_dst);
c1595e42
JR
33473+ mutex_unlock(&h_idst->i_mutex);
33474+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
33475+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33476+ if (unlikely(err)) {
7e9cd9fe
AM
33477+ if (verbose || au_debug_test())
33478+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33479+ err = au_xattr_ignore(err, name, ignore_flags);
33480+ }
33481+
33482+out:
33483+ return err;
33484+}
33485+
7e9cd9fe
AM
33486+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
33487+ unsigned int verbose)
c1595e42
JR
33488+{
33489+ int err, unlocked, acl_access, acl_default;
33490+ ssize_t ssz;
33491+ struct inode *h_isrc, *h_idst;
33492+ char *value, *p, *o, *e;
33493+
33494+ /* try stopping to update the source inode while we are referencing */
7e9cd9fe 33495+ /* there should not be the parent-child relationship between them */
5527c038
JR
33496+ h_isrc = d_inode(h_src);
33497+ h_idst = d_inode(h_dst);
c1595e42
JR
33498+ mutex_unlock(&h_idst->i_mutex);
33499+ mutex_lock_nested(&h_isrc->i_mutex, AuLsc_I_CHILD);
33500+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33501+ unlocked = 0;
33502+
33503+ /* some filesystems don't list POSIX ACL, for example tmpfs */
33504+ ssz = vfs_listxattr(h_src, NULL, 0);
33505+ err = ssz;
33506+ if (unlikely(err < 0)) {
33507+ AuTraceErr(err);
33508+ if (err == -ENODATA
33509+ || err == -EOPNOTSUPP)
33510+ err = 0; /* ignore */
33511+ goto out;
33512+ }
33513+
33514+ err = 0;
33515+ p = NULL;
33516+ o = NULL;
33517+ if (ssz) {
33518+ err = -ENOMEM;
33519+ p = kmalloc(ssz, GFP_NOFS);
33520+ o = p;
33521+ if (unlikely(!p))
33522+ goto out;
33523+ err = vfs_listxattr(h_src, p, ssz);
33524+ }
33525+ mutex_unlock(&h_isrc->i_mutex);
33526+ unlocked = 1;
33527+ AuDbg("err %d, ssz %zd\n", err, ssz);
33528+ if (unlikely(err < 0))
33529+ goto out_free;
33530+
33531+ err = 0;
33532+ e = p + ssz;
33533+ value = NULL;
33534+ acl_access = 0;
33535+ acl_default = 0;
33536+ while (!err && p < e) {
33537+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
33538+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
33539+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
33540+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
33541+ - 1);
7e9cd9fe
AM
33542+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
33543+ verbose);
c1595e42
JR
33544+ p += strlen(p) + 1;
33545+ }
33546+ AuTraceErr(err);
33547+ ignore_flags |= au_xattr_out_of_list;
33548+ if (!err && !acl_access) {
33549+ err = au_do_cpup_xattr(h_dst, h_src,
33550+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7e9cd9fe 33551+ ignore_flags, verbose);
c1595e42
JR
33552+ AuTraceErr(err);
33553+ }
33554+ if (!err && !acl_default) {
33555+ err = au_do_cpup_xattr(h_dst, h_src,
33556+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7e9cd9fe 33557+ ignore_flags, verbose);
c1595e42
JR
33558+ AuTraceErr(err);
33559+ }
33560+
33561+ kfree(value);
33562+
33563+out_free:
33564+ kfree(o);
33565+out:
33566+ if (!unlocked)
33567+ mutex_unlock(&h_isrc->i_mutex);
33568+ AuTraceErr(err);
33569+ return err;
33570+}
33571+
33572+/* ---------------------------------------------------------------------- */
33573+
33574+enum {
33575+ AU_XATTR_LIST,
33576+ AU_XATTR_GET
33577+};
33578+
33579+struct au_lgxattr {
33580+ int type;
33581+ union {
33582+ struct {
33583+ char *list;
33584+ size_t size;
33585+ } list;
33586+ struct {
33587+ const char *name;
33588+ void *value;
33589+ size_t size;
33590+ } get;
33591+ } u;
33592+};
33593+
33594+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
33595+{
33596+ ssize_t err;
33597+ struct path h_path;
33598+ struct super_block *sb;
33599+
33600+ sb = dentry->d_sb;
33601+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
33602+ if (unlikely(err))
33603+ goto out;
33604+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
33605+ if (unlikely(err))
33606+ goto out_si;
33607+ if (unlikely(!h_path.dentry))
33608+ /* illegally overlapped or something */
33609+ goto out_di; /* pretending success */
33610+
33611+ /* always topmost entry only */
33612+ switch (arg->type) {
33613+ case AU_XATTR_LIST:
33614+ err = vfs_listxattr(h_path.dentry,
33615+ arg->u.list.list, arg->u.list.size);
33616+ break;
33617+ case AU_XATTR_GET:
33618+ err = vfs_getxattr(h_path.dentry,
33619+ arg->u.get.name, arg->u.get.value,
33620+ arg->u.get.size);
33621+ break;
33622+ }
33623+
33624+out_di:
33625+ di_read_unlock(dentry, AuLock_IR);
33626+out_si:
33627+ si_read_unlock(sb);
33628+out:
33629+ AuTraceErr(err);
33630+ return err;
33631+}
33632+
33633+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
33634+{
33635+ struct au_lgxattr arg = {
33636+ .type = AU_XATTR_LIST,
33637+ .u.list = {
33638+ .list = list,
33639+ .size = size
33640+ },
33641+ };
33642+
33643+ return au_lgxattr(dentry, &arg);
33644+}
33645+
33646+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
33647+ size_t size)
33648+{
33649+ struct au_lgxattr arg = {
33650+ .type = AU_XATTR_GET,
33651+ .u.get = {
33652+ .name = name,
33653+ .value = value,
33654+ .size = size
33655+ },
33656+ };
33657+
33658+ return au_lgxattr(dentry, &arg);
33659+}
33660+
33661+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
33662+ size_t size, int flags)
33663+{
33664+ struct au_srxattr arg = {
33665+ .type = AU_XATTR_SET,
33666+ .u.set = {
33667+ .name = name,
33668+ .value = value,
33669+ .size = size,
33670+ .flags = flags
33671+ },
33672+ };
33673+
33674+ return au_srxattr(dentry, &arg);
33675+}
33676+
33677+int aufs_removexattr(struct dentry *dentry, const char *name)
33678+{
33679+ struct au_srxattr arg = {
33680+ .type = AU_XATTR_REMOVE,
33681+ .u.remove = {
33682+ .name = name
33683+ },
33684+ };
33685+
33686+ return au_srxattr(dentry, &arg);
33687+}
33688+
33689+/* ---------------------------------------------------------------------- */
33690+
33691+#if 0
33692+static size_t au_xattr_list(struct dentry *dentry, char *list, size_t list_size,
33693+ const char *name, size_t name_len, int type)
33694+{
33695+ return aufs_listxattr(dentry, list, list_size);
33696+}
33697+
33698+static int au_xattr_get(struct dentry *dentry, const char *name, void *buffer,
33699+ size_t size, int type)
33700+{
33701+ return aufs_getxattr(dentry, name, buffer, size);
33702+}
33703+
33704+static int au_xattr_set(struct dentry *dentry, const char *name,
33705+ const void *value, size_t size, int flags, int type)
33706+{
33707+ return aufs_setxattr(dentry, name, value, size, flags);
33708+}
33709+
33710+static const struct xattr_handler au_xattr_handler = {
33711+ /* no prefix, no flags */
33712+ .list = au_xattr_list,
33713+ .get = au_xattr_get,
33714+ .set = au_xattr_set
33715+ /* why no remove? */
33716+};
33717+
33718+static const struct xattr_handler *au_xattr_handlers[] = {
33719+ &au_xattr_handler
33720+};
33721+
33722+void au_xattr_init(struct super_block *sb)
33723+{
33724+ /* sb->s_xattr = au_xattr_handlers; */
33725+}
33726+#endif
7f207e10
AM
33727diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
33728--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
ab036dbd
AM
33729+++ linux/fs/aufs/xino.c 2015-11-11 17:21:46.922197217 +0100
33730@@ -0,0 +1,1296 @@
1facf9fc 33731+/*
2000de60 33732+ * Copyright (C) 2005-2015 Junjiro R. Okajima
1facf9fc 33733+ *
33734+ * This program, aufs is free software; you can redistribute it and/or modify
33735+ * it under the terms of the GNU General Public License as published by
33736+ * the Free Software Foundation; either version 2 of the License, or
33737+ * (at your option) any later version.
dece6358
AM
33738+ *
33739+ * This program is distributed in the hope that it will be useful,
33740+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33741+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33742+ * GNU General Public License for more details.
33743+ *
33744+ * You should have received a copy of the GNU General Public License
523b37e3 33745+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33746+ */
33747+
33748+/*
33749+ * external inode number translation table and bitmap
33750+ */
33751+
33752+#include <linux/seq_file.h>
392086de 33753+#include <linux/statfs.h>
1facf9fc 33754+#include "aufs.h"
33755+
9dbd164d 33756+/* todo: unnecessary to support mmap_sem since kernel-space? */
5527c038 33757+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 33758+ loff_t *pos)
33759+{
33760+ ssize_t err;
33761+ mm_segment_t oldfs;
b752ccd1
AM
33762+ union {
33763+ void *k;
33764+ char __user *u;
33765+ } buf;
1facf9fc 33766+
b752ccd1 33767+ buf.k = kbuf;
1facf9fc 33768+ oldfs = get_fs();
33769+ set_fs(KERNEL_DS);
33770+ do {
33771+ /* todo: signal_pending? */
b752ccd1 33772+ err = func(file, buf.u, size, pos);
1facf9fc 33773+ } while (err == -EAGAIN || err == -EINTR);
33774+ set_fs(oldfs);
33775+
33776+#if 0 /* reserved for future use */
33777+ if (err > 0)
2000de60 33778+ fsnotify_access(file->f_path.dentry);
1facf9fc 33779+#endif
33780+
33781+ return err;
33782+}
33783+
33784+/* ---------------------------------------------------------------------- */
33785+
5527c038 33786+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
1facf9fc 33787+ size_t size, loff_t *pos)
33788+{
33789+ ssize_t err;
33790+ mm_segment_t oldfs;
b752ccd1
AM
33791+ union {
33792+ void *k;
33793+ const char __user *u;
33794+ } buf;
1facf9fc 33795+
b752ccd1 33796+ buf.k = kbuf;
1facf9fc 33797+ oldfs = get_fs();
33798+ set_fs(KERNEL_DS);
1facf9fc 33799+ do {
33800+ /* todo: signal_pending? */
b752ccd1 33801+ err = func(file, buf.u, size, pos);
1facf9fc 33802+ } while (err == -EAGAIN || err == -EINTR);
1facf9fc 33803+ set_fs(oldfs);
33804+
33805+#if 0 /* reserved for future use */
33806+ if (err > 0)
2000de60 33807+ fsnotify_modify(file->f_path.dentry);
1facf9fc 33808+#endif
33809+
33810+ return err;
33811+}
33812+
33813+struct do_xino_fwrite_args {
33814+ ssize_t *errp;
5527c038 33815+ vfs_writef_t func;
1facf9fc 33816+ struct file *file;
33817+ void *buf;
33818+ size_t size;
33819+ loff_t *pos;
33820+};
33821+
33822+static void call_do_xino_fwrite(void *args)
33823+{
33824+ struct do_xino_fwrite_args *a = args;
33825+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
33826+}
33827+
5527c038
JR
33828+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
33829+ size_t size, loff_t *pos)
1facf9fc 33830+{
33831+ ssize_t err;
33832+
33833+ /* todo: signal block and no wkq? */
b752ccd1
AM
33834+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
33835+ lockdep_off();
33836+ err = do_xino_fwrite(func, file, buf, size, pos);
33837+ lockdep_on();
33838+ } else {
33839+ /*
33840+ * it breaks RLIMIT_FSIZE and normal user's limit,
33841+ * users should care about quota and real 'filesystem full.'
33842+ */
1facf9fc 33843+ int wkq_err;
33844+ struct do_xino_fwrite_args args = {
33845+ .errp = &err,
33846+ .func = func,
33847+ .file = file,
33848+ .buf = buf,
33849+ .size = size,
33850+ .pos = pos
33851+ };
33852+
33853+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
33854+ if (unlikely(wkq_err))
33855+ err = wkq_err;
b752ccd1 33856+ }
1facf9fc 33857+
33858+ return err;
33859+}
33860+
33861+/* ---------------------------------------------------------------------- */
33862+
33863+/*
33864+ * create a new xinofile at the same place/path as @base_file.
33865+ */
33866+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
33867+{
33868+ struct file *file;
4a4d8108 33869+ struct dentry *base, *parent;
523b37e3 33870+ struct inode *dir, *delegated;
1facf9fc 33871+ struct qstr *name;
1308ab2a 33872+ struct path path;
4a4d8108 33873+ int err;
1facf9fc 33874+
2000de60 33875+ base = base_file->f_path.dentry;
1facf9fc 33876+ parent = base->d_parent; /* dir inode is locked */
5527c038 33877+ dir = d_inode(parent);
1facf9fc 33878+ IMustLock(dir);
33879+
33880+ file = ERR_PTR(-EINVAL);
33881+ name = &base->d_name;
4a4d8108
AM
33882+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
33883+ if (IS_ERR(path.dentry)) {
33884+ file = (void *)path.dentry;
523b37e3
AM
33885+ pr_err("%pd lookup err %ld\n",
33886+ base, PTR_ERR(path.dentry));
1facf9fc 33887+ goto out;
33888+ }
33889+
33890+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 33891+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 33892+ if (unlikely(err)) {
33893+ file = ERR_PTR(err);
523b37e3 33894+ pr_err("%pd create err %d\n", base, err);
1facf9fc 33895+ goto out_dput;
33896+ }
33897+
c06a8ce3 33898+ path.mnt = base_file->f_path.mnt;
4a4d8108 33899+ file = vfsub_dentry_open(&path,
7f207e10 33900+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 33901+ /* | __FMODE_NONOTIFY */);
1facf9fc 33902+ if (IS_ERR(file)) {
523b37e3 33903+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 33904+ goto out_dput;
33905+ }
33906+
523b37e3
AM
33907+ delegated = NULL;
33908+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
33909+ if (unlikely(err == -EWOULDBLOCK)) {
33910+ pr_warn("cannot retry for NFSv4 delegation"
33911+ " for an internal unlink\n");
33912+ iput(delegated);
33913+ }
1facf9fc 33914+ if (unlikely(err)) {
523b37e3 33915+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 33916+ goto out_fput;
33917+ }
33918+
33919+ if (copy_src) {
33920+ /* no one can touch copy_src xino */
c06a8ce3 33921+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 33922+ if (unlikely(err)) {
523b37e3 33923+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 33924+ goto out_fput;
33925+ }
33926+ }
33927+ goto out_dput; /* success */
33928+
4f0767ce 33929+out_fput:
1facf9fc 33930+ fput(file);
33931+ file = ERR_PTR(err);
4f0767ce 33932+out_dput:
4a4d8108 33933+ dput(path.dentry);
4f0767ce 33934+out:
1facf9fc 33935+ return file;
33936+}
33937+
33938+struct au_xino_lock_dir {
33939+ struct au_hinode *hdir;
33940+ struct dentry *parent;
33941+ struct mutex *mtx;
33942+};
33943+
33944+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
33945+ struct au_xino_lock_dir *ldir)
33946+{
33947+ aufs_bindex_t brid, bindex;
33948+
33949+ ldir->hdir = NULL;
33950+ bindex = -1;
33951+ brid = au_xino_brid(sb);
33952+ if (brid >= 0)
33953+ bindex = au_br_index(sb, brid);
33954+ if (bindex >= 0) {
5527c038 33955+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
4a4d8108 33956+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 33957+ } else {
2000de60 33958+ ldir->parent = dget_parent(xino->f_path.dentry);
5527c038 33959+ ldir->mtx = &d_inode(ldir->parent)->i_mutex;
1facf9fc 33960+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
33961+ }
33962+}
33963+
33964+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
33965+{
33966+ if (ldir->hdir)
4a4d8108 33967+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 33968+ else {
33969+ mutex_unlock(ldir->mtx);
33970+ dput(ldir->parent);
33971+ }
33972+}
33973+
33974+/* ---------------------------------------------------------------------- */
33975+
33976+/* trucate xino files asynchronously */
33977+
33978+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
33979+{
33980+ int err;
392086de
AM
33981+ unsigned long jiffy;
33982+ blkcnt_t blocks;
1facf9fc 33983+ aufs_bindex_t bi, bend;
392086de 33984+ struct kstatfs *st;
1facf9fc 33985+ struct au_branch *br;
33986+ struct file *new_xino, *file;
33987+ struct super_block *h_sb;
33988+ struct au_xino_lock_dir ldir;
33989+
392086de
AM
33990+ err = -ENOMEM;
33991+ st = kzalloc(sizeof(*st), GFP_NOFS);
33992+ if (unlikely(!st))
33993+ goto out;
33994+
1facf9fc 33995+ err = -EINVAL;
33996+ bend = au_sbend(sb);
33997+ if (unlikely(bindex < 0 || bend < bindex))
392086de 33998+ goto out_st;
1facf9fc 33999+ br = au_sbr(sb, bindex);
34000+ file = br->br_xino.xi_file;
34001+ if (!file)
392086de
AM
34002+ goto out_st;
34003+
34004+ err = vfs_statfs(&file->f_path, st);
34005+ if (unlikely(err))
34006+ AuErr1("statfs err %d, ignored\n", err);
34007+ jiffy = jiffies;
34008+ blocks = file_inode(file)->i_blocks;
34009+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34010+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 34011+
34012+ au_xino_lock_dir(sb, file, &ldir);
34013+ /* mnt_want_write() is unnecessary here */
34014+ new_xino = au_xino_create2(file, file);
34015+ au_xino_unlock_dir(&ldir);
34016+ err = PTR_ERR(new_xino);
392086de
AM
34017+ if (IS_ERR(new_xino)) {
34018+ pr_err("err %d, ignored\n", err);
34019+ goto out_st;
34020+ }
1facf9fc 34021+ err = 0;
34022+ fput(file);
34023+ br->br_xino.xi_file = new_xino;
34024+
86dc4139 34025+ h_sb = au_br_sb(br);
1facf9fc 34026+ for (bi = 0; bi <= bend; bi++) {
34027+ if (unlikely(bi == bindex))
34028+ continue;
34029+ br = au_sbr(sb, bi);
86dc4139 34030+ if (au_br_sb(br) != h_sb)
1facf9fc 34031+ continue;
34032+
34033+ fput(br->br_xino.xi_file);
34034+ br->br_xino.xi_file = new_xino;
34035+ get_file(new_xino);
34036+ }
34037+
392086de
AM
34038+ err = vfs_statfs(&new_xino->f_path, st);
34039+ if (!err) {
34040+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34041+ bindex, (u64)file_inode(new_xino)->i_blocks,
34042+ st->f_bfree, st->f_blocks);
34043+ if (file_inode(new_xino)->i_blocks < blocks)
34044+ au_sbi(sb)->si_xino_jiffy = jiffy;
34045+ } else
34046+ AuErr1("statfs err %d, ignored\n", err);
34047+
34048+out_st:
34049+ kfree(st);
4f0767ce 34050+out:
1facf9fc 34051+ return err;
34052+}
34053+
34054+struct xino_do_trunc_args {
34055+ struct super_block *sb;
34056+ struct au_branch *br;
34057+};
34058+
34059+static void xino_do_trunc(void *_args)
34060+{
34061+ struct xino_do_trunc_args *args = _args;
34062+ struct super_block *sb;
34063+ struct au_branch *br;
34064+ struct inode *dir;
34065+ int err;
34066+ aufs_bindex_t bindex;
34067+
34068+ err = 0;
34069+ sb = args->sb;
5527c038 34070+ dir = d_inode(sb->s_root);
1facf9fc 34071+ br = args->br;
34072+
34073+ si_noflush_write_lock(sb);
34074+ ii_read_lock_parent(dir);
34075+ bindex = au_br_index(sb, br->br_id);
34076+ err = au_xino_trunc(sb, bindex);
1facf9fc 34077+ ii_read_unlock(dir);
34078+ if (unlikely(err))
392086de 34079+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 34080+ atomic_dec(&br->br_xino_running);
34081+ atomic_dec(&br->br_count);
1facf9fc 34082+ si_write_unlock(sb);
027c5e7a 34083+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 34084+ kfree(args);
34085+}
34086+
392086de
AM
34087+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34088+{
34089+ int err;
34090+ struct kstatfs st;
34091+ struct au_sbinfo *sbinfo;
34092+
34093+ /* todo: si_xino_expire and the ratio should be customizable */
34094+ sbinfo = au_sbi(sb);
34095+ if (time_before(jiffies,
34096+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34097+ return 0;
34098+
34099+ /* truncation border */
34100+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34101+ if (unlikely(err)) {
34102+ AuErr1("statfs err %d, ignored\n", err);
34103+ return 0;
34104+ }
34105+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34106+ return 0;
34107+
34108+ return 1;
34109+}
34110+
1facf9fc 34111+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34112+{
34113+ struct xino_do_trunc_args *args;
34114+ int wkq_err;
34115+
392086de 34116+ if (!xino_trunc_test(sb, br))
1facf9fc 34117+ return;
34118+
34119+ if (atomic_inc_return(&br->br_xino_running) > 1)
34120+ goto out;
34121+
34122+ /* lock and kfree() will be called in trunc_xino() */
34123+ args = kmalloc(sizeof(*args), GFP_NOFS);
34124+ if (unlikely(!args)) {
34125+ AuErr1("no memory\n");
34126+ goto out_args;
34127+ }
34128+
e49829fe 34129+ atomic_inc(&br->br_count);
1facf9fc 34130+ args->sb = sb;
34131+ args->br = br;
53392da6 34132+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 34133+ if (!wkq_err)
34134+ return; /* success */
34135+
4a4d8108 34136+ pr_err("wkq %d\n", wkq_err);
e49829fe 34137+ atomic_dec(&br->br_count);
1facf9fc 34138+
4f0767ce 34139+out_args:
1facf9fc 34140+ kfree(args);
4f0767ce 34141+out:
e49829fe 34142+ atomic_dec(&br->br_xino_running);
1facf9fc 34143+}
34144+
34145+/* ---------------------------------------------------------------------- */
34146+
5527c038 34147+static int au_xino_do_write(vfs_writef_t write, struct file *file,
1facf9fc 34148+ ino_t h_ino, ino_t ino)
34149+{
34150+ loff_t pos;
34151+ ssize_t sz;
34152+
34153+ pos = h_ino;
34154+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34155+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34156+ return -EFBIG;
34157+ }
34158+ pos *= sizeof(ino);
34159+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34160+ if (sz == sizeof(ino))
34161+ return 0; /* success */
34162+
34163+ AuIOErr("write failed (%zd)\n", sz);
34164+ return -EIO;
34165+}
34166+
34167+/*
34168+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34169+ * at the position of @h_ino.
34170+ * even if @ino is zero, it is written to the xinofile and means no entry.
34171+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34172+ * try truncating it.
34173+ */
34174+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34175+ ino_t ino)
34176+{
34177+ int err;
34178+ unsigned int mnt_flags;
34179+ struct au_branch *br;
34180+
34181+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34182+ || ((loff_t)-1) > 0);
dece6358 34183+ SiMustAnyLock(sb);
1facf9fc 34184+
34185+ mnt_flags = au_mntflags(sb);
34186+ if (!au_opt_test(mnt_flags, XINO))
34187+ return 0;
34188+
34189+ br = au_sbr(sb, bindex);
34190+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34191+ h_ino, ino);
34192+ if (!err) {
34193+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 34194+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 34195+ xino_try_trunc(sb, br);
34196+ return 0; /* success */
34197+ }
34198+
34199+ AuIOErr("write failed (%d)\n", err);
34200+ return -EIO;
34201+}
34202+
34203+/* ---------------------------------------------------------------------- */
34204+
34205+/* aufs inode number bitmap */
34206+
34207+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34208+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34209+{
34210+ ino_t ino;
34211+
34212+ AuDebugOn(bit < 0 || page_bits <= bit);
34213+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34214+ return ino;
34215+}
34216+
34217+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34218+{
34219+ AuDebugOn(ino < AUFS_FIRST_INO);
34220+ ino -= AUFS_FIRST_INO;
34221+ *pindex = ino / page_bits;
34222+ *bit = ino % page_bits;
34223+}
34224+
34225+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34226+{
34227+ int err;
34228+ loff_t pos;
34229+ ssize_t sz;
34230+ struct au_sbinfo *sbinfo;
34231+ struct file *xib;
34232+ unsigned long *p;
34233+
34234+ sbinfo = au_sbi(sb);
34235+ MtxMustLock(&sbinfo->si_xib_mtx);
34236+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34237+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34238+
34239+ if (pindex == sbinfo->si_xib_last_pindex)
34240+ return 0;
34241+
34242+ xib = sbinfo->si_xib;
34243+ p = sbinfo->si_xib_buf;
34244+ pos = sbinfo->si_xib_last_pindex;
34245+ pos *= PAGE_SIZE;
34246+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34247+ if (unlikely(sz != PAGE_SIZE))
34248+ goto out;
34249+
34250+ pos = pindex;
34251+ pos *= PAGE_SIZE;
c06a8ce3 34252+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 34253+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
34254+ else {
34255+ memset(p, 0, PAGE_SIZE);
34256+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34257+ }
34258+ if (sz == PAGE_SIZE) {
34259+ sbinfo->si_xib_last_pindex = pindex;
34260+ return 0; /* success */
34261+ }
34262+
4f0767ce 34263+out:
b752ccd1
AM
34264+ AuIOErr1("write failed (%zd)\n", sz);
34265+ err = sz;
34266+ if (sz >= 0)
34267+ err = -EIO;
34268+ return err;
34269+}
34270+
34271+/* ---------------------------------------------------------------------- */
34272+
34273+static void au_xib_clear_bit(struct inode *inode)
34274+{
34275+ int err, bit;
34276+ unsigned long pindex;
34277+ struct super_block *sb;
34278+ struct au_sbinfo *sbinfo;
34279+
34280+ AuDebugOn(inode->i_nlink);
34281+
34282+ sb = inode->i_sb;
34283+ xib_calc_bit(inode->i_ino, &pindex, &bit);
34284+ AuDebugOn(page_bits <= bit);
34285+ sbinfo = au_sbi(sb);
34286+ mutex_lock(&sbinfo->si_xib_mtx);
34287+ err = xib_pindex(sb, pindex);
34288+ if (!err) {
34289+ clear_bit(bit, sbinfo->si_xib_buf);
34290+ sbinfo->si_xib_next_bit = bit;
34291+ }
34292+ mutex_unlock(&sbinfo->si_xib_mtx);
34293+}
34294+
34295+/* for s_op->delete_inode() */
34296+void au_xino_delete_inode(struct inode *inode, const int unlinked)
34297+{
34298+ int err;
34299+ unsigned int mnt_flags;
34300+ aufs_bindex_t bindex, bend, bi;
34301+ unsigned char try_trunc;
34302+ struct au_iinfo *iinfo;
34303+ struct super_block *sb;
34304+ struct au_hinode *hi;
34305+ struct inode *h_inode;
34306+ struct au_branch *br;
5527c038 34307+ vfs_writef_t xwrite;
b752ccd1
AM
34308+
34309+ sb = inode->i_sb;
34310+ mnt_flags = au_mntflags(sb);
34311+ if (!au_opt_test(mnt_flags, XINO)
34312+ || inode->i_ino == AUFS_ROOT_INO)
34313+ return;
34314+
34315+ if (unlinked) {
34316+ au_xigen_inc(inode);
34317+ au_xib_clear_bit(inode);
34318+ }
34319+
34320+ iinfo = au_ii(inode);
34321+ if (!iinfo)
34322+ return;
1facf9fc 34323+
b752ccd1
AM
34324+ bindex = iinfo->ii_bstart;
34325+ if (bindex < 0)
34326+ return;
1facf9fc 34327+
b752ccd1
AM
34328+ xwrite = au_sbi(sb)->si_xwrite;
34329+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
34330+ hi = iinfo->ii_hinode + bindex;
34331+ bend = iinfo->ii_bend;
34332+ for (; bindex <= bend; bindex++, hi++) {
34333+ h_inode = hi->hi_inode;
34334+ if (!h_inode
34335+ || (!unlinked && h_inode->i_nlink))
34336+ continue;
1facf9fc 34337+
b752ccd1
AM
34338+ /* inode may not be revalidated */
34339+ bi = au_br_index(sb, hi->hi_id);
34340+ if (bi < 0)
34341+ continue;
1facf9fc 34342+
b752ccd1
AM
34343+ br = au_sbr(sb, bi);
34344+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
34345+ h_inode->i_ino, /*ino*/0);
34346+ if (!err && try_trunc
86dc4139 34347+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 34348+ xino_try_trunc(sb, br);
1facf9fc 34349+ }
1facf9fc 34350+}
34351+
34352+/* get an unused inode number from bitmap */
34353+ino_t au_xino_new_ino(struct super_block *sb)
34354+{
34355+ ino_t ino;
34356+ unsigned long *p, pindex, ul, pend;
34357+ struct au_sbinfo *sbinfo;
34358+ struct file *file;
34359+ int free_bit, err;
34360+
34361+ if (!au_opt_test(au_mntflags(sb), XINO))
34362+ return iunique(sb, AUFS_FIRST_INO);
34363+
34364+ sbinfo = au_sbi(sb);
34365+ mutex_lock(&sbinfo->si_xib_mtx);
34366+ p = sbinfo->si_xib_buf;
34367+ free_bit = sbinfo->si_xib_next_bit;
34368+ if (free_bit < page_bits && !test_bit(free_bit, p))
34369+ goto out; /* success */
34370+ free_bit = find_first_zero_bit(p, page_bits);
34371+ if (free_bit < page_bits)
34372+ goto out; /* success */
34373+
34374+ pindex = sbinfo->si_xib_last_pindex;
34375+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
34376+ err = xib_pindex(sb, ul);
34377+ if (unlikely(err))
34378+ goto out_err;
34379+ free_bit = find_first_zero_bit(p, page_bits);
34380+ if (free_bit < page_bits)
34381+ goto out; /* success */
34382+ }
34383+
34384+ file = sbinfo->si_xib;
c06a8ce3 34385+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 34386+ for (ul = pindex + 1; ul <= pend; ul++) {
34387+ err = xib_pindex(sb, ul);
34388+ if (unlikely(err))
34389+ goto out_err;
34390+ free_bit = find_first_zero_bit(p, page_bits);
34391+ if (free_bit < page_bits)
34392+ goto out; /* success */
34393+ }
34394+ BUG();
34395+
4f0767ce 34396+out:
1facf9fc 34397+ set_bit(free_bit, p);
7f207e10 34398+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 34399+ pindex = sbinfo->si_xib_last_pindex;
34400+ mutex_unlock(&sbinfo->si_xib_mtx);
34401+ ino = xib_calc_ino(pindex, free_bit);
34402+ AuDbg("i%lu\n", (unsigned long)ino);
34403+ return ino;
4f0767ce 34404+out_err:
1facf9fc 34405+ mutex_unlock(&sbinfo->si_xib_mtx);
34406+ AuDbg("i0\n");
34407+ return 0;
34408+}
34409+
34410+/*
34411+ * read @ino from xinofile for the specified branch{@sb, @bindex}
34412+ * at the position of @h_ino.
34413+ * if @ino does not exist and @do_new is true, get new one.
34414+ */
34415+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34416+ ino_t *ino)
34417+{
34418+ int err;
34419+ ssize_t sz;
34420+ loff_t pos;
34421+ struct file *file;
34422+ struct au_sbinfo *sbinfo;
34423+
34424+ *ino = 0;
34425+ if (!au_opt_test(au_mntflags(sb), XINO))
34426+ return 0; /* no xino */
34427+
34428+ err = 0;
34429+ sbinfo = au_sbi(sb);
34430+ pos = h_ino;
34431+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
34432+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34433+ return -EFBIG;
34434+ }
34435+ pos *= sizeof(*ino);
34436+
34437+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 34438+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 34439+ return 0; /* no ino */
34440+
34441+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
34442+ if (sz == sizeof(*ino))
34443+ return 0; /* success */
34444+
34445+ err = sz;
34446+ if (unlikely(sz >= 0)) {
34447+ err = -EIO;
34448+ AuIOErr("xino read error (%zd)\n", sz);
34449+ }
34450+
34451+ return err;
34452+}
34453+
34454+/* ---------------------------------------------------------------------- */
34455+
34456+/* create and set a new xino file */
34457+
34458+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
34459+{
34460+ struct file *file;
34461+ struct dentry *h_parent, *d;
b912730e 34462+ struct inode *h_dir, *inode;
1facf9fc 34463+ int err;
34464+
34465+ /*
34466+ * at mount-time, and the xino file is the default path,
4a4d8108 34467+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 34468+ * when a user specified the xino, we cannot get au_hdir to be ignored.
34469+ */
7f207e10 34470+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34471+ /* | __FMODE_NONOTIFY */,
1facf9fc 34472+ S_IRUGO | S_IWUGO);
34473+ if (IS_ERR(file)) {
34474+ if (!silent)
4a4d8108 34475+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 34476+ return file;
34477+ }
34478+
34479+ /* keep file count */
b912730e
AM
34480+ err = 0;
34481+ inode = file_inode(file);
2000de60 34482+ h_parent = dget_parent(file->f_path.dentry);
5527c038 34483+ h_dir = d_inode(h_parent);
1facf9fc 34484+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
34485+ /* mnt_want_write() is unnecessary here */
523b37e3 34486+ /* no delegation since it is just created */
b912730e
AM
34487+ if (inode->i_nlink)
34488+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
34489+ /*force*/0);
1facf9fc 34490+ mutex_unlock(&h_dir->i_mutex);
34491+ dput(h_parent);
34492+ if (unlikely(err)) {
34493+ if (!silent)
4a4d8108 34494+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 34495+ goto out;
34496+ }
34497+
34498+ err = -EINVAL;
2000de60 34499+ d = file->f_path.dentry;
1facf9fc 34500+ if (unlikely(sb == d->d_sb)) {
34501+ if (!silent)
4a4d8108 34502+ pr_err("%s must be outside\n", fname);
1facf9fc 34503+ goto out;
34504+ }
34505+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
34506+ if (!silent)
4a4d8108
AM
34507+ pr_err("xino doesn't support %s(%s)\n",
34508+ fname, au_sbtype(d->d_sb));
1facf9fc 34509+ goto out;
34510+ }
34511+ return file; /* success */
34512+
4f0767ce 34513+out:
1facf9fc 34514+ fput(file);
34515+ file = ERR_PTR(err);
34516+ return file;
34517+}
34518+
34519+/*
34520+ * find another branch who is on the same filesystem of the specified
34521+ * branch{@btgt}. search until @bend.
34522+ */
34523+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
34524+ aufs_bindex_t bend)
34525+{
34526+ aufs_bindex_t bindex;
34527+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
34528+
34529+ for (bindex = 0; bindex < btgt; bindex++)
34530+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34531+ return bindex;
34532+ for (bindex++; bindex <= bend; bindex++)
34533+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34534+ return bindex;
34535+ return -1;
34536+}
34537+
34538+/* ---------------------------------------------------------------------- */
34539+
34540+/*
34541+ * initialize the xinofile for the specified branch @br
34542+ * at the place/path where @base_file indicates.
34543+ * test whether another branch is on the same filesystem or not,
34544+ * if @do_test is true.
34545+ */
34546+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
34547+ struct file *base_file, int do_test)
34548+{
34549+ int err;
34550+ ino_t ino;
34551+ aufs_bindex_t bend, bindex;
34552+ struct au_branch *shared_br, *b;
34553+ struct file *file;
34554+ struct super_block *tgt_sb;
34555+
34556+ shared_br = NULL;
34557+ bend = au_sbend(sb);
34558+ if (do_test) {
86dc4139 34559+ tgt_sb = au_br_sb(br);
1facf9fc 34560+ for (bindex = 0; bindex <= bend; bindex++) {
34561+ b = au_sbr(sb, bindex);
86dc4139 34562+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 34563+ shared_br = b;
34564+ break;
34565+ }
34566+ }
34567+ }
34568+
34569+ if (!shared_br || !shared_br->br_xino.xi_file) {
34570+ struct au_xino_lock_dir ldir;
34571+
34572+ au_xino_lock_dir(sb, base_file, &ldir);
34573+ /* mnt_want_write() is unnecessary here */
34574+ file = au_xino_create2(base_file, NULL);
34575+ au_xino_unlock_dir(&ldir);
34576+ err = PTR_ERR(file);
34577+ if (IS_ERR(file))
34578+ goto out;
34579+ br->br_xino.xi_file = file;
34580+ } else {
34581+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
34582+ get_file(br->br_xino.xi_file);
34583+ }
34584+
34585+ ino = AUFS_ROOT_INO;
34586+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34587+ h_ino, ino);
b752ccd1
AM
34588+ if (unlikely(err)) {
34589+ fput(br->br_xino.xi_file);
34590+ br->br_xino.xi_file = NULL;
34591+ }
1facf9fc 34592+
4f0767ce 34593+out:
1facf9fc 34594+ return err;
34595+}
34596+
34597+/* ---------------------------------------------------------------------- */
34598+
34599+/* trucate a xino bitmap file */
34600+
34601+/* todo: slow */
34602+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
34603+{
34604+ int err, bit;
34605+ ssize_t sz;
34606+ unsigned long pindex;
34607+ loff_t pos, pend;
34608+ struct au_sbinfo *sbinfo;
5527c038 34609+ vfs_readf_t func;
1facf9fc 34610+ ino_t *ino;
34611+ unsigned long *p;
34612+
34613+ err = 0;
34614+ sbinfo = au_sbi(sb);
dece6358 34615+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 34616+ p = sbinfo->si_xib_buf;
34617+ func = sbinfo->si_xread;
c06a8ce3 34618+ pend = vfsub_f_size_read(file);
1facf9fc 34619+ pos = 0;
34620+ while (pos < pend) {
34621+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
34622+ err = sz;
34623+ if (unlikely(sz <= 0))
34624+ goto out;
34625+
34626+ err = 0;
34627+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
34628+ if (unlikely(*ino < AUFS_FIRST_INO))
34629+ continue;
34630+
34631+ xib_calc_bit(*ino, &pindex, &bit);
34632+ AuDebugOn(page_bits <= bit);
34633+ err = xib_pindex(sb, pindex);
34634+ if (!err)
34635+ set_bit(bit, p);
34636+ else
34637+ goto out;
34638+ }
34639+ }
34640+
4f0767ce 34641+out:
1facf9fc 34642+ return err;
34643+}
34644+
34645+static int xib_restore(struct super_block *sb)
34646+{
34647+ int err;
34648+ aufs_bindex_t bindex, bend;
34649+ void *page;
34650+
34651+ err = -ENOMEM;
34652+ page = (void *)__get_free_page(GFP_NOFS);
34653+ if (unlikely(!page))
34654+ goto out;
34655+
34656+ err = 0;
34657+ bend = au_sbend(sb);
34658+ for (bindex = 0; !err && bindex <= bend; bindex++)
34659+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
34660+ err = do_xib_restore
34661+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
34662+ else
34663+ AuDbg("b%d\n", bindex);
34664+ free_page((unsigned long)page);
34665+
4f0767ce 34666+out:
1facf9fc 34667+ return err;
34668+}
34669+
34670+int au_xib_trunc(struct super_block *sb)
34671+{
34672+ int err;
34673+ ssize_t sz;
34674+ loff_t pos;
34675+ struct au_xino_lock_dir ldir;
34676+ struct au_sbinfo *sbinfo;
34677+ unsigned long *p;
34678+ struct file *file;
34679+
dece6358
AM
34680+ SiMustWriteLock(sb);
34681+
1facf9fc 34682+ err = 0;
34683+ sbinfo = au_sbi(sb);
34684+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
34685+ goto out;
34686+
34687+ file = sbinfo->si_xib;
c06a8ce3 34688+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 34689+ goto out;
34690+
34691+ au_xino_lock_dir(sb, file, &ldir);
34692+ /* mnt_want_write() is unnecessary here */
34693+ file = au_xino_create2(sbinfo->si_xib, NULL);
34694+ au_xino_unlock_dir(&ldir);
34695+ err = PTR_ERR(file);
34696+ if (IS_ERR(file))
34697+ goto out;
34698+ fput(sbinfo->si_xib);
34699+ sbinfo->si_xib = file;
34700+
34701+ p = sbinfo->si_xib_buf;
34702+ memset(p, 0, PAGE_SIZE);
34703+ pos = 0;
34704+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
34705+ if (unlikely(sz != PAGE_SIZE)) {
34706+ err = sz;
34707+ AuIOErr("err %d\n", err);
34708+ if (sz >= 0)
34709+ err = -EIO;
34710+ goto out;
34711+ }
34712+
34713+ mutex_lock(&sbinfo->si_xib_mtx);
34714+ /* mnt_want_write() is unnecessary here */
34715+ err = xib_restore(sb);
34716+ mutex_unlock(&sbinfo->si_xib_mtx);
34717+
34718+out:
34719+ return err;
34720+}
34721+
34722+/* ---------------------------------------------------------------------- */
34723+
34724+/*
34725+ * xino mount option handlers
34726+ */
1facf9fc 34727+
34728+/* xino bitmap */
34729+static void xino_clear_xib(struct super_block *sb)
34730+{
34731+ struct au_sbinfo *sbinfo;
34732+
dece6358
AM
34733+ SiMustWriteLock(sb);
34734+
1facf9fc 34735+ sbinfo = au_sbi(sb);
34736+ sbinfo->si_xread = NULL;
34737+ sbinfo->si_xwrite = NULL;
34738+ if (sbinfo->si_xib)
34739+ fput(sbinfo->si_xib);
34740+ sbinfo->si_xib = NULL;
34741+ free_page((unsigned long)sbinfo->si_xib_buf);
34742+ sbinfo->si_xib_buf = NULL;
34743+}
34744+
34745+static int au_xino_set_xib(struct super_block *sb, struct file *base)
34746+{
34747+ int err;
34748+ loff_t pos;
34749+ struct au_sbinfo *sbinfo;
34750+ struct file *file;
34751+
dece6358
AM
34752+ SiMustWriteLock(sb);
34753+
1facf9fc 34754+ sbinfo = au_sbi(sb);
34755+ file = au_xino_create2(base, sbinfo->si_xib);
34756+ err = PTR_ERR(file);
34757+ if (IS_ERR(file))
34758+ goto out;
34759+ if (sbinfo->si_xib)
34760+ fput(sbinfo->si_xib);
34761+ sbinfo->si_xib = file;
5527c038
JR
34762+ sbinfo->si_xread = vfs_readf(file);
34763+ sbinfo->si_xwrite = vfs_writef(file);
1facf9fc 34764+
34765+ err = -ENOMEM;
34766+ if (!sbinfo->si_xib_buf)
34767+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
34768+ if (unlikely(!sbinfo->si_xib_buf))
34769+ goto out_unset;
34770+
34771+ sbinfo->si_xib_last_pindex = 0;
34772+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 34773+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 34774+ pos = 0;
34775+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
34776+ PAGE_SIZE, &pos);
34777+ if (unlikely(err != PAGE_SIZE))
34778+ goto out_free;
34779+ }
34780+ err = 0;
34781+ goto out; /* success */
34782+
4f0767ce 34783+out_free:
1facf9fc 34784+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
34785+ sbinfo->si_xib_buf = NULL;
34786+ if (err >= 0)
34787+ err = -EIO;
4f0767ce 34788+out_unset:
b752ccd1
AM
34789+ fput(sbinfo->si_xib);
34790+ sbinfo->si_xib = NULL;
34791+ sbinfo->si_xread = NULL;
34792+ sbinfo->si_xwrite = NULL;
4f0767ce 34793+out:
b752ccd1 34794+ return err;
1facf9fc 34795+}
34796+
b752ccd1
AM
34797+/* xino for each branch */
34798+static void xino_clear_br(struct super_block *sb)
34799+{
34800+ aufs_bindex_t bindex, bend;
34801+ struct au_branch *br;
1facf9fc 34802+
b752ccd1
AM
34803+ bend = au_sbend(sb);
34804+ for (bindex = 0; bindex <= bend; bindex++) {
34805+ br = au_sbr(sb, bindex);
34806+ if (!br || !br->br_xino.xi_file)
34807+ continue;
34808+
34809+ fput(br->br_xino.xi_file);
34810+ br->br_xino.xi_file = NULL;
34811+ }
34812+}
34813+
34814+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 34815+{
34816+ int err;
b752ccd1
AM
34817+ ino_t ino;
34818+ aufs_bindex_t bindex, bend, bshared;
34819+ struct {
34820+ struct file *old, *new;
34821+ } *fpair, *p;
34822+ struct au_branch *br;
34823+ struct inode *inode;
5527c038 34824+ vfs_writef_t writef;
1facf9fc 34825+
b752ccd1
AM
34826+ SiMustWriteLock(sb);
34827+
34828+ err = -ENOMEM;
34829+ bend = au_sbend(sb);
34830+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
34831+ if (unlikely(!fpair))
1facf9fc 34832+ goto out;
34833+
5527c038 34834+ inode = d_inode(sb->s_root);
b752ccd1
AM
34835+ ino = AUFS_ROOT_INO;
34836+ writef = au_sbi(sb)->si_xwrite;
34837+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
34838+ br = au_sbr(sb, bindex);
34839+ bshared = is_sb_shared(sb, bindex, bindex - 1);
34840+ if (bshared >= 0) {
34841+ /* shared xino */
34842+ *p = fpair[bshared];
34843+ get_file(p->new);
34844+ }
34845+
34846+ if (!p->new) {
34847+ /* new xino */
34848+ p->old = br->br_xino.xi_file;
34849+ p->new = au_xino_create2(base, br->br_xino.xi_file);
34850+ err = PTR_ERR(p->new);
34851+ if (IS_ERR(p->new)) {
34852+ p->new = NULL;
34853+ goto out_pair;
34854+ }
34855+ }
34856+
34857+ err = au_xino_do_write(writef, p->new,
34858+ au_h_iptr(inode, bindex)->i_ino, ino);
34859+ if (unlikely(err))
34860+ goto out_pair;
34861+ }
34862+
34863+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
34864+ br = au_sbr(sb, bindex);
34865+ if (br->br_xino.xi_file)
34866+ fput(br->br_xino.xi_file);
34867+ get_file(p->new);
34868+ br->br_xino.xi_file = p->new;
34869+ }
1facf9fc 34870+
4f0767ce 34871+out_pair:
b752ccd1
AM
34872+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
34873+ if (p->new)
34874+ fput(p->new);
34875+ else
34876+ break;
34877+ kfree(fpair);
4f0767ce 34878+out:
1facf9fc 34879+ return err;
34880+}
b752ccd1
AM
34881+
34882+void au_xino_clr(struct super_block *sb)
34883+{
34884+ struct au_sbinfo *sbinfo;
34885+
34886+ au_xigen_clr(sb);
34887+ xino_clear_xib(sb);
34888+ xino_clear_br(sb);
34889+ sbinfo = au_sbi(sb);
34890+ /* lvalue, do not call au_mntflags() */
34891+ au_opt_clr(sbinfo->si_mntflags, XINO);
34892+}
34893+
34894+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
34895+{
34896+ int err, skip;
34897+ struct dentry *parent, *cur_parent;
34898+ struct qstr *dname, *cur_name;
34899+ struct file *cur_xino;
34900+ struct inode *dir;
34901+ struct au_sbinfo *sbinfo;
34902+
34903+ SiMustWriteLock(sb);
34904+
34905+ err = 0;
34906+ sbinfo = au_sbi(sb);
2000de60 34907+ parent = dget_parent(xino->file->f_path.dentry);
b752ccd1
AM
34908+ if (remount) {
34909+ skip = 0;
2000de60 34910+ dname = &xino->file->f_path.dentry->d_name;
b752ccd1
AM
34911+ cur_xino = sbinfo->si_xib;
34912+ if (cur_xino) {
2000de60
JR
34913+ cur_parent = dget_parent(cur_xino->f_path.dentry);
34914+ cur_name = &cur_xino->f_path.dentry->d_name;
b752ccd1 34915+ skip = (cur_parent == parent
38d290e6 34916+ && au_qstreq(dname, cur_name));
b752ccd1
AM
34917+ dput(cur_parent);
34918+ }
34919+ if (skip)
34920+ goto out;
34921+ }
34922+
34923+ au_opt_set(sbinfo->si_mntflags, XINO);
5527c038 34924+ dir = d_inode(parent);
b752ccd1
AM
34925+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
34926+ /* mnt_want_write() is unnecessary here */
34927+ err = au_xino_set_xib(sb, xino->file);
34928+ if (!err)
34929+ err = au_xigen_set(sb, xino->file);
34930+ if (!err)
34931+ err = au_xino_set_br(sb, xino->file);
34932+ mutex_unlock(&dir->i_mutex);
34933+ if (!err)
34934+ goto out; /* success */
34935+
34936+ /* reset all */
34937+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
34938+ au_xigen_clr(sb);
34939+ xino_clear_xib(sb);
b752ccd1 34940+
4f0767ce 34941+out:
b752ccd1
AM
34942+ dput(parent);
34943+ return err;
34944+}
34945+
34946+/* ---------------------------------------------------------------------- */
34947+
34948+/*
34949+ * create a xinofile at the default place/path.
34950+ */
34951+struct file *au_xino_def(struct super_block *sb)
34952+{
34953+ struct file *file;
34954+ char *page, *p;
34955+ struct au_branch *br;
34956+ struct super_block *h_sb;
34957+ struct path path;
34958+ aufs_bindex_t bend, bindex, bwr;
34959+
34960+ br = NULL;
34961+ bend = au_sbend(sb);
34962+ bwr = -1;
34963+ for (bindex = 0; bindex <= bend; bindex++) {
34964+ br = au_sbr(sb, bindex);
34965+ if (au_br_writable(br->br_perm)
86dc4139 34966+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
34967+ bwr = bindex;
34968+ break;
34969+ }
34970+ }
34971+
7f207e10
AM
34972+ if (bwr >= 0) {
34973+ file = ERR_PTR(-ENOMEM);
537831f9 34974+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
34975+ if (unlikely(!page))
34976+ goto out;
86dc4139 34977+ path.mnt = au_br_mnt(br);
7f207e10
AM
34978+ path.dentry = au_h_dptr(sb->s_root, bwr);
34979+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
34980+ file = (void *)p;
34981+ if (!IS_ERR(p)) {
34982+ strcat(p, "/" AUFS_XINO_FNAME);
34983+ AuDbg("%s\n", p);
34984+ file = au_xino_create(sb, p, /*silent*/0);
34985+ if (!IS_ERR(file))
34986+ au_xino_brid_set(sb, br->br_id);
34987+ }
537831f9 34988+ free_page((unsigned long)page);
7f207e10
AM
34989+ } else {
34990+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
34991+ if (IS_ERR(file))
34992+ goto out;
2000de60 34993+ h_sb = file->f_path.dentry->d_sb;
7f207e10
AM
34994+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
34995+ pr_err("xino doesn't support %s(%s)\n",
34996+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
34997+ fput(file);
34998+ file = ERR_PTR(-EINVAL);
34999+ }
35000+ if (!IS_ERR(file))
35001+ au_xino_brid_set(sb, -1);
35002+ }
0c5527e5 35003+
7f207e10
AM
35004+out:
35005+ return file;
35006+}
35007+
35008+/* ---------------------------------------------------------------------- */
35009+
35010+int au_xino_path(struct seq_file *seq, struct file *file)
35011+{
35012+ int err;
35013+
35014+ err = au_seq_path(seq, &file->f_path);
ab036dbd 35015+ if (unlikely(err))
7f207e10
AM
35016+ goto out;
35017+
7f207e10
AM
35018+#define Deleted "\\040(deleted)"
35019+ seq->count -= sizeof(Deleted) - 1;
35020+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35021+ sizeof(Deleted) - 1));
35022+#undef Deleted
35023+
35024+out:
35025+ return err;
35026+}
537831f9
AM
35027diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35028--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
ab036dbd 35029+++ linux/include/uapi/linux/aufs_type.h 2015-12-10 18:46:31.223310574 +0100
c1595e42 35030@@ -0,0 +1,419 @@
7f207e10 35031+/*
2000de60 35032+ * Copyright (C) 2005-2015 Junjiro R. Okajima
7f207e10
AM
35033+ *
35034+ * This program, aufs is free software; you can redistribute it and/or modify
35035+ * it under the terms of the GNU General Public License as published by
35036+ * the Free Software Foundation; either version 2 of the License, or
35037+ * (at your option) any later version.
35038+ *
35039+ * This program is distributed in the hope that it will be useful,
35040+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35041+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35042+ * GNU General Public License for more details.
35043+ *
35044+ * You should have received a copy of the GNU General Public License
523b37e3 35045+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
35046+ */
35047+
35048+#ifndef __AUFS_TYPE_H__
35049+#define __AUFS_TYPE_H__
35050+
f6c5ef8b
AM
35051+#define AUFS_NAME "aufs"
35052+
9dbd164d 35053+#ifdef __KERNEL__
f6c5ef8b
AM
35054+/*
35055+ * define it before including all other headers.
35056+ * sched.h may use pr_* macros before defining "current", so define the
35057+ * no-current version first, and re-define later.
35058+ */
35059+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35060+#include <linux/sched.h>
35061+#undef pr_fmt
a2a7ad62
AM
35062+#define pr_fmt(fmt) \
35063+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35064+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
35065+#else
35066+#include <stdint.h>
35067+#include <sys/types.h>
f6c5ef8b 35068+#endif /* __KERNEL__ */
7f207e10 35069+
f6c5ef8b
AM
35070+#include <linux/limits.h>
35071+
ab036dbd 35072+#define AUFS_VERSION "4.1-20151116"
7f207e10
AM
35073+
35074+/* todo? move this to linux-2.6.19/include/magic.h */
35075+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35076+
35077+/* ---------------------------------------------------------------------- */
35078+
35079+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 35080+typedef int8_t aufs_bindex_t;
7f207e10
AM
35081+#define AUFS_BRANCH_MAX 127
35082+#else
9dbd164d 35083+typedef int16_t aufs_bindex_t;
7f207e10
AM
35084+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35085+#define AUFS_BRANCH_MAX 511
35086+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35087+#define AUFS_BRANCH_MAX 1023
35088+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35089+#define AUFS_BRANCH_MAX 32767
35090+#endif
35091+#endif
35092+
35093+#ifdef __KERNEL__
35094+#ifndef AUFS_BRANCH_MAX
35095+#error unknown CONFIG_AUFS_BRANCH_MAX value
35096+#endif
35097+#endif /* __KERNEL__ */
35098+
35099+/* ---------------------------------------------------------------------- */
35100+
7f207e10
AM
35101+#define AUFS_FSTYPE AUFS_NAME
35102+
35103+#define AUFS_ROOT_INO 2
35104+#define AUFS_FIRST_INO 11
35105+
35106+#define AUFS_WH_PFX ".wh."
35107+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35108+#define AUFS_WH_TMP_LEN 4
86dc4139 35109+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
35110+#define AUFS_MAX_NAMELEN (NAME_MAX \
35111+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35112+ - 1 /* dot */\
35113+ - AUFS_WH_TMP_LEN) /* hex */
35114+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35115+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
35116+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35117+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
35118+#define AUFS_DIRWH_DEF 3
35119+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 35120+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
35121+#define AUFS_RDBLK_DEF 512 /* bytes */
35122+#define AUFS_RDHASH_DEF 32
35123+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
35124+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35125+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 35126+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 35127+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
35128+
35129+/* pseudo-link maintenace under /proc */
35130+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35131+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35132+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35133+
35134+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35135+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35136+
35137+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35138+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35139+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35140+
35141+/* doubly whiteouted */
35142+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35143+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35144+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35145+
1e00d052 35146+/* branch permissions and attributes */
7f207e10
AM
35147+#define AUFS_BRPERM_RW "rw"
35148+#define AUFS_BRPERM_RO "ro"
35149+#define AUFS_BRPERM_RR "rr"
076b876e
AM
35150+#define AUFS_BRATTR_COO_REG "coo_reg"
35151+#define AUFS_BRATTR_COO_ALL "coo_all"
35152+#define AUFS_BRATTR_FHSM "fhsm"
35153+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
35154+#define AUFS_BRATTR_ICEX "icex"
35155+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35156+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35157+#define AUFS_BRATTR_ICEX_TR "icextr"
35158+#define AUFS_BRATTR_ICEX_USR "icexusr"
35159+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
35160+#define AUFS_BRRATTR_WH "wh"
35161+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
35162+#define AUFS_BRWATTR_MOO "moo"
35163+
35164+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35165+#define AuBrPerm_RO (1 << 1) /* readonly */
35166+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35167+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35168+
35169+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35170+#define AuBrAttr_COO_ALL (1 << 4)
35171+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35172+
35173+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35174+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
35175+ branch. meaningless since
35176+ linux-3.18-rc1 */
35177+
35178+/* ignore error in copying XATTR */
35179+#define AuBrAttr_ICEX_SEC (1 << 7)
35180+#define AuBrAttr_ICEX_SYS (1 << 8)
35181+#define AuBrAttr_ICEX_TR (1 << 9)
35182+#define AuBrAttr_ICEX_USR (1 << 10)
35183+#define AuBrAttr_ICEX_OTH (1 << 11)
35184+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35185+ | AuBrAttr_ICEX_SYS \
35186+ | AuBrAttr_ICEX_TR \
35187+ | AuBrAttr_ICEX_USR \
35188+ | AuBrAttr_ICEX_OTH)
35189+
35190+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
35191+#define AuBrRAttr_Mask AuBrRAttr_WH
35192+
c1595e42
JR
35193+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35194+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
35195+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35196+
35197+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35198+
c1595e42 35199+/* #warning test userspace */
076b876e
AM
35200+#ifdef __KERNEL__
35201+#ifndef CONFIG_AUFS_FHSM
35202+#undef AuBrAttr_FHSM
35203+#define AuBrAttr_FHSM 0
35204+#endif
c1595e42
JR
35205+#ifndef CONFIG_AUFS_XATTR
35206+#undef AuBrAttr_ICEX
35207+#define AuBrAttr_ICEX 0
35208+#undef AuBrAttr_ICEX_SEC
35209+#define AuBrAttr_ICEX_SEC 0
35210+#undef AuBrAttr_ICEX_SYS
35211+#define AuBrAttr_ICEX_SYS 0
35212+#undef AuBrAttr_ICEX_TR
35213+#define AuBrAttr_ICEX_TR 0
35214+#undef AuBrAttr_ICEX_USR
35215+#define AuBrAttr_ICEX_USR 0
35216+#undef AuBrAttr_ICEX_OTH
35217+#define AuBrAttr_ICEX_OTH 0
35218+#endif
076b876e
AM
35219+#endif
35220+
35221+/* the longest combination */
c1595e42
JR
35222+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35223+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35224+ "+" AUFS_BRATTR_COO_REG \
35225+ "+" AUFS_BRATTR_FHSM \
35226+ "+" AUFS_BRATTR_UNPIN \
7e9cd9fe
AM
35227+ "+" AUFS_BRATTR_ICEX_SEC \
35228+ "+" AUFS_BRATTR_ICEX_SYS \
35229+ "+" AUFS_BRATTR_ICEX_USR \
35230+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
35231+ "+" AUFS_BRWATTR_NLWH)
35232+
35233+typedef struct {
35234+ char a[AuBrPermStrSz];
35235+} au_br_perm_str_t;
35236+
35237+static inline int au_br_writable(int brperm)
35238+{
35239+ return brperm & AuBrPerm_RW;
35240+}
35241+
35242+static inline int au_br_whable(int brperm)
35243+{
35244+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35245+}
35246+
35247+static inline int au_br_wh_linkable(int brperm)
35248+{
35249+ return !(brperm & AuBrWAttr_NoLinkWH);
35250+}
35251+
35252+static inline int au_br_cmoo(int brperm)
35253+{
35254+ return brperm & AuBrAttr_CMOO_Mask;
35255+}
35256+
35257+static inline int au_br_fhsm(int brperm)
35258+{
35259+ return brperm & AuBrAttr_FHSM;
35260+}
7f207e10
AM
35261+
35262+/* ---------------------------------------------------------------------- */
35263+
35264+/* ioctl */
35265+enum {
35266+ /* readdir in userspace */
35267+ AuCtl_RDU,
35268+ AuCtl_RDU_INO,
35269+
076b876e
AM
35270+ AuCtl_WBR_FD, /* pathconf wrapper */
35271+ AuCtl_IBUSY, /* busy inode */
35272+ AuCtl_MVDOWN, /* move-down */
35273+ AuCtl_BR, /* info about branches */
35274+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
35275+};
35276+
35277+/* borrowed from linux/include/linux/kernel.h */
35278+#ifndef ALIGN
35279+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
35280+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
35281+#endif
35282+
35283+/* borrowed from linux/include/linux/compiler-gcc3.h */
35284+#ifndef __aligned
35285+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
35286+#endif
35287+
35288+#ifdef __KERNEL__
35289+#ifndef __packed
7f207e10
AM
35290+#define __packed __attribute__((packed))
35291+#endif
53392da6 35292+#endif
7f207e10
AM
35293+
35294+struct au_rdu_cookie {
9dbd164d
AM
35295+ uint64_t h_pos;
35296+ int16_t bindex;
35297+ uint8_t flags;
35298+ uint8_t pad;
35299+ uint32_t generation;
7f207e10
AM
35300+} __aligned(8);
35301+
35302+struct au_rdu_ent {
9dbd164d
AM
35303+ uint64_t ino;
35304+ int16_t bindex;
35305+ uint8_t type;
35306+ uint8_t nlen;
35307+ uint8_t wh;
7f207e10
AM
35308+ char name[0];
35309+} __aligned(8);
35310+
35311+static inline int au_rdu_len(int nlen)
35312+{
35313+ /* include the terminating NULL */
35314+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 35315+ sizeof(uint64_t));
7f207e10
AM
35316+}
35317+
35318+union au_rdu_ent_ul {
35319+ struct au_rdu_ent __user *e;
9dbd164d 35320+ uint64_t ul;
7f207e10
AM
35321+};
35322+
35323+enum {
35324+ AufsCtlRduV_SZ,
35325+ AufsCtlRduV_End
35326+};
35327+
35328+struct aufs_rdu {
35329+ /* input */
35330+ union {
9dbd164d
AM
35331+ uint64_t sz; /* AuCtl_RDU */
35332+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
35333+ };
35334+ union au_rdu_ent_ul ent;
9dbd164d 35335+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
35336+
35337+ /* input/output */
9dbd164d 35338+ uint32_t blk;
7f207e10
AM
35339+
35340+ /* output */
35341+ union au_rdu_ent_ul tail;
35342+ /* number of entries which were added in a single call */
9dbd164d
AM
35343+ uint64_t rent;
35344+ uint8_t full;
35345+ uint8_t shwh;
7f207e10
AM
35346+
35347+ struct au_rdu_cookie cookie;
35348+} __aligned(8);
35349+
1e00d052
AM
35350+/* ---------------------------------------------------------------------- */
35351+
35352+struct aufs_wbr_fd {
9dbd164d
AM
35353+ uint32_t oflags;
35354+ int16_t brid;
1e00d052
AM
35355+} __aligned(8);
35356+
35357+/* ---------------------------------------------------------------------- */
35358+
027c5e7a 35359+struct aufs_ibusy {
9dbd164d
AM
35360+ uint64_t ino, h_ino;
35361+ int16_t bindex;
027c5e7a
AM
35362+} __aligned(8);
35363+
1e00d052
AM
35364+/* ---------------------------------------------------------------------- */
35365+
392086de
AM
35366+/* error code for move-down */
35367+/* the actual message strings are implemented in aufs-util.git */
35368+enum {
35369+ EAU_MVDOWN_OPAQUE = 1,
35370+ EAU_MVDOWN_WHITEOUT,
35371+ EAU_MVDOWN_UPPER,
35372+ EAU_MVDOWN_BOTTOM,
35373+ EAU_MVDOWN_NOUPPER,
35374+ EAU_MVDOWN_NOLOWERBR,
35375+ EAU_Last
35376+};
35377+
c2b27bf2 35378+/* flags for move-down */
392086de
AM
35379+#define AUFS_MVDOWN_DMSG 1
35380+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
35381+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
35382+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
35383+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
35384+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
35385+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
35386+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
35387+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
35388+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
35389+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
35390+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
35391+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 35392+
076b876e 35393+/* index for move-down */
392086de
AM
35394+enum {
35395+ AUFS_MVDOWN_UPPER,
35396+ AUFS_MVDOWN_LOWER,
35397+ AUFS_MVDOWN_NARRAY
35398+};
35399+
076b876e
AM
35400+/*
35401+ * additional info of move-down
35402+ * number of free blocks and inodes.
35403+ * subset of struct kstatfs, but smaller and always 64bit.
35404+ */
35405+struct aufs_stfs {
35406+ uint64_t f_blocks;
35407+ uint64_t f_bavail;
35408+ uint64_t f_files;
35409+ uint64_t f_ffree;
35410+};
35411+
35412+struct aufs_stbr {
35413+ int16_t brid; /* optional input */
35414+ int16_t bindex; /* output */
35415+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
35416+} __aligned(8);
35417+
c2b27bf2 35418+struct aufs_mvdown {
076b876e
AM
35419+ uint32_t flags; /* input/output */
35420+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
35421+ int8_t au_errno; /* output */
35422+} __aligned(8);
35423+
35424+/* ---------------------------------------------------------------------- */
35425+
35426+union aufs_brinfo {
35427+ /* PATH_MAX may differ between kernel-space and user-space */
35428+ char _spacer[4096];
392086de 35429+ struct {
076b876e
AM
35430+ int16_t id;
35431+ int perm;
35432+ char path[0];
35433+ };
c2b27bf2
AM
35434+} __aligned(8);
35435+
35436+/* ---------------------------------------------------------------------- */
35437+
7f207e10
AM
35438+#define AuCtlType 'A'
35439+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
35440+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
35441+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
35442+ struct aufs_wbr_fd)
027c5e7a 35443+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
35444+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
35445+ struct aufs_mvdown)
076b876e
AM
35446+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
35447+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
35448+
35449+#endif /* __AUFS_TYPE_H__ */
ab036dbd 35450aufs4.1 loopback patch
5527c038
JR
35451
35452diff --git a/drivers/block/loop.c b/drivers/block/loop.c
35453index 0160952..866f8e2 100644
35454--- a/drivers/block/loop.c
35455+++ b/drivers/block/loop.c
35456@@ -419,7 +419,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
35457 }
35458
35459 struct switch_request {
35460- struct file *file;
35461+ struct file *file, *virt_file;
35462 struct completion wait;
35463 };
35464
35465@@ -439,6 +439,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
35466 mapping = file->f_mapping;
35467 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
35468 lo->lo_backing_file = file;
35469+ lo->lo_backing_virt_file = p->virt_file;
35470 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
35471 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
35472 lo->old_gfp_mask = mapping_gfp_mask(mapping);
35473@@ -450,11 +451,13 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
35474 * First it needs to flush existing IO, it does this by sending a magic
35475 * BIO down the pipe. The completion of this BIO does the actual switch.
35476 */
35477-static int loop_switch(struct loop_device *lo, struct file *file)
35478+static int loop_switch(struct loop_device *lo, struct file *file,
35479+ struct file *virt_file)
35480 {
35481 struct switch_request w;
35482
35483 w.file = file;
35484+ w.virt_file = virt_file;
35485
35486 /* freeze queue and wait for completion of scheduled requests */
35487 blk_mq_freeze_queue(lo->lo_queue);
35488@@ -473,7 +476,16 @@ static int loop_switch(struct loop_device *lo, struct file *file)
35489 */
35490 static int loop_flush(struct loop_device *lo)
35491 {
35492- return loop_switch(lo, NULL);
35493+ return loop_switch(lo, NULL, NULL);
35494+}
35495+
35496+static struct file *loop_real_file(struct file *file)
35497+{
35498+ struct file *f = NULL;
35499+
35500+ if (file->f_path.dentry->d_sb->s_op->real_loop)
35501+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
35502+ return f;
35503 }
35504
35505 /*
35506@@ -488,6 +500,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
35507 unsigned int arg)
35508 {
35509 struct file *file, *old_file;
35510+ struct file *f, *virt_file = NULL, *old_virt_file;
35511 struct inode *inode;
35512 int error;
35513
35514@@ -504,9 +517,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
35515 file = fget(arg);
35516 if (!file)
35517 goto out;
35518+ f = loop_real_file(file);
35519+ if (f) {
35520+ virt_file = file;
35521+ file = f;
35522+ get_file(file);
35523+ }
35524
35525 inode = file->f_mapping->host;
35526 old_file = lo->lo_backing_file;
35527+ old_virt_file = lo->lo_backing_virt_file;
35528
35529 error = -EINVAL;
35530
35531@@ -518,17 +538,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
35532 goto out_putf;
35533
35534 /* and ... switch */
35535- error = loop_switch(lo, file);
35536+ error = loop_switch(lo, file, virt_file);
35537 if (error)
35538 goto out_putf;
35539
35540 fput(old_file);
35541+ if (old_virt_file)
35542+ fput(old_virt_file);
35543 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
35544 ioctl_by_bdev(bdev, BLKRRPART, 0);
35545 return 0;
35546
35547 out_putf:
35548 fput(file);
35549+ if (virt_file)
35550+ fput(virt_file);
35551 out:
35552 return error;
35553 }
35554@@ -689,7 +713,7 @@ static void loop_config_discard(struct loop_device *lo)
35555 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35556 struct block_device *bdev, unsigned int arg)
35557 {
35558- struct file *file, *f;
35559+ struct file *file, *f, *virt_file = NULL;
35560 struct inode *inode;
35561 struct address_space *mapping;
35562 unsigned lo_blocksize;
35563@@ -704,6 +728,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35564 file = fget(arg);
35565 if (!file)
35566 goto out;
35567+ f = loop_real_file(file);
35568+ if (f) {
35569+ virt_file = file;
35570+ file = f;
35571+ get_file(file);
35572+ }
35573
35574 error = -EBUSY;
35575 if (lo->lo_state != Lo_unbound)
35576@@ -752,6 +782,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35577 lo->lo_device = bdev;
35578 lo->lo_flags = lo_flags;
35579 lo->lo_backing_file = file;
35580+ lo->lo_backing_virt_file = virt_file;
35581 lo->transfer = NULL;
35582 lo->ioctl = NULL;
35583 lo->lo_sizelimit = 0;
35584@@ -783,6 +814,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35585
35586 out_putf:
35587 fput(file);
35588+ if (virt_file)
35589+ fput(virt_file);
35590 out:
35591 /* This is safe: open() is still holding a reference. */
35592 module_put(THIS_MODULE);
35593@@ -829,6 +862,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
35594 static int loop_clr_fd(struct loop_device *lo)
35595 {
35596 struct file *filp = lo->lo_backing_file;
35597+ struct file *virt_filp = lo->lo_backing_virt_file;
35598 gfp_t gfp = lo->old_gfp_mask;
35599 struct block_device *bdev = lo->lo_device;
35600
35601@@ -857,6 +891,7 @@ static int loop_clr_fd(struct loop_device *lo)
35602 spin_lock_irq(&lo->lo_lock);
35603 lo->lo_state = Lo_rundown;
35604 lo->lo_backing_file = NULL;
35605+ lo->lo_backing_virt_file = NULL;
35606 spin_unlock_irq(&lo->lo_lock);
35607
35608 loop_release_xfer(lo);
35609@@ -898,6 +933,8 @@ static int loop_clr_fd(struct loop_device *lo)
35610 * bd_mutex which is usually taken before lo_ctl_mutex.
35611 */
35612 fput(filp);
35613+ if (virt_filp)
35614+ fput(virt_filp);
35615 return 0;
35616 }
35617
35618diff --git a/drivers/block/loop.h b/drivers/block/loop.h
35619index 301c27f..df84aa0 100644
35620--- a/drivers/block/loop.h
35621+++ b/drivers/block/loop.h
35622@@ -46,7 +46,7 @@ struct loop_device {
35623 int (*ioctl)(struct loop_device *, int cmd,
35624 unsigned long arg);
35625
35626- struct file * lo_backing_file;
35627+ struct file * lo_backing_file, *lo_backing_virt_file;
35628 struct block_device *lo_device;
35629 unsigned lo_blocksize;
35630 void *key_data;
35631diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
35632index 91c2ce7..d4ee5a7 100644
35633--- a/fs/aufs/f_op.c
35634+++ b/fs/aufs/f_op.c
35635@@ -389,7 +389,7 @@ static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
35636 if (IS_ERR(h_file))
35637 goto out;
35638
35639- if (au_test_loopback_kthread()) {
35640+ if (0 && au_test_loopback_kthread()) {
35641 au_warn_loopback(h_file->f_path.dentry->d_sb);
35642 if (file->f_mapping != h_file->f_mapping) {
35643 file->f_mapping = h_file->f_mapping;
35644diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
ab036dbd 35645index f324758..4555e7b 100644
5527c038
JR
35646--- a/fs/aufs/loop.c
35647+++ b/fs/aufs/loop.c
ab036dbd
AM
35648@@ -131,3 +131,19 @@ void au_loopback_fin(void)
35649 symbol_put(loop_backing_file);
5527c038
JR
35650 kfree(au_warn_loopback_array);
35651 }
35652+
35653+/* ---------------------------------------------------------------------- */
35654+
35655+/* support the loopback block device insude aufs */
35656+
35657+struct file *aufs_real_loop(struct file *file)
35658+{
35659+ struct file *f;
35660+
35661+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
35662+ fi_read_lock(file);
35663+ f = au_hf_top(file);
35664+ fi_read_unlock(file);
35665+ AuDebugOn(!f);
35666+ return f;
35667+}
35668diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
35669index 6d9864d..3322557 100644
35670--- a/fs/aufs/loop.h
35671+++ b/fs/aufs/loop.h
35672@@ -25,7 +25,11 @@ void au_warn_loopback(struct super_block *h_sb);
35673
35674 int au_loopback_init(void);
35675 void au_loopback_fin(void);
35676+
35677+struct file *aufs_real_loop(struct file *file);
35678 #else
35679+AuStub(struct file *, loop_backing_file, return NULL)
35680+
35681 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
35682 struct dentry *h_adding)
35683 AuStubInt0(au_test_loopback_kthread, void)
35684@@ -33,6 +37,8 @@ AuStubVoid(au_warn_loopback, struct super_block *h_sb)
35685
35686 AuStubInt0(au_loopback_init, void)
35687 AuStubVoid(au_loopback_fin, void)
35688+
35689+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
35690 #endif /* BLK_DEV_LOOP */
35691
35692 #endif /* __KERNEL__ */
35693diff --git a/fs/aufs/super.c b/fs/aufs/super.c
ab036dbd 35694index 7efab49..ed357c7 100644
5527c038
JR
35695--- a/fs/aufs/super.c
35696+++ b/fs/aufs/super.c
ab036dbd 35697@@ -840,7 +840,10 @@ static const struct super_operations aufs_sop = {
5527c038
JR
35698 .statfs = aufs_statfs,
35699 .put_super = aufs_put_super,
35700 .sync_fs = aufs_sync_fs,
35701- .remount_fs = aufs_remount_fs
35702+ .remount_fs = aufs_remount_fs,
35703+#ifdef CONFIG_AUFS_BDEV_LOOP
35704+ .real_loop = aufs_real_loop
35705+#endif
35706 };
35707
35708 /* ---------------------------------------------------------------------- */
35709diff --git a/include/linux/fs.h b/include/linux/fs.h
35710index 3229f97..f63cc0d 100644
35711--- a/include/linux/fs.h
35712+++ b/include/linux/fs.h
35713@@ -1696,6 +1696,10 @@ struct super_operations {
35714 struct shrink_control *);
35715 long (*free_cached_objects)(struct super_block *,
35716 struct shrink_control *);
35717+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
35718+ /* and aufs */
35719+ struct file *(*real_loop)(struct file *);
35720+#endif
35721 };
35722
35723 /*
This page took 8.487445 seconds and 4 git commands to generate.