]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs4.patch
- 4.4.24
[packages/kernel.git] / kernel-aufs4.patch
CommitLineData
cfc41e69 1aufs4.4 kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
be52b249 4index 6ce72d8..4aa31ea 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
be52b249 7@@ -221,6 +221,7 @@ source "fs/pstore/Kconfig"
5527c038 8 source "fs/sysv/Kconfig"
7e9cd9fe 9 source "fs/ufs/Kconfig"
7f207e10
AM
10 source "fs/exofs/Kconfig"
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
be52b249 16index 79f5225..a7c7f16 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
be52b249 19@@ -126,3 +126,4 @@ obj-y += exofs/ # Multiple modules
7f207e10 20 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 21 obj-$(CONFIG_PSTORE) += pstore/
c06a8ce3 22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
86dc4139 23+obj-$(CONFIG_AUFS_FS) += aufs/
c06a8ce3 24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
be52b249 25index c2e5d6c..d736c11 100644
c06a8ce3
AM
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
5527c038 28@@ -59,6 +59,7 @@ header-y += atmsvc.h
03673fb0
JR
29 header-y += atm_tcp.h
30 header-y += atm_zatm.h
c06a8ce3
AM
31 header-y += audit.h
32+header-y += aufs_type.h
c06a8ce3 33 header-y += auto_fs4.h
03673fb0 34 header-y += auto_fs.h
c06a8ce3 35 header-y += auxvec.h
cfc41e69 36aufs4.4 base patch
7f207e10 37
c1595e42 38diff --git a/MAINTAINERS b/MAINTAINERS
be52b249 39index 233f834..c250892 100644
c1595e42
JR
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
be52b249 42@@ -2029,6 +2029,19 @@ F: include/linux/audit.h
c1595e42
JR
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
5527c038 51+T: git://github.com/sfjro/aufs4-linux.git
c1595e42
JR
52+S: Supported
53+F: Documentation/filesystems/aufs/
54+F: Documentation/ABI/testing/debugfs-aufs
55+F: Documentation/ABI/testing/sysfs-aufs
56+F: fs/aufs/
57+F: include/uapi/linux/aufs_type.h
58+
59 AUXILIARY DISPLAY DRIVERS
60 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
61 W: http://miguelojeda.es/auxdisplay.htm
392086de 62diff --git a/drivers/block/loop.c b/drivers/block/loop.c
be52b249 63index 423f4ca..abfdd2b 100644
392086de
AM
64--- a/drivers/block/loop.c
65+++ b/drivers/block/loop.c
be52b249 66@@ -706,6 +706,24 @@ static inline int is_loop_device(struct file *file)
392086de
AM
67 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
68 }
69
70+/*
71+ * for AUFS
72+ * no get/put for file.
73+ */
74+struct file *loop_backing_file(struct super_block *sb)
75+{
76+ struct file *ret;
77+ struct loop_device *l;
78+
79+ ret = NULL;
80+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
81+ l = sb->s_bdev->bd_disk->private_data;
82+ ret = l->lo_backing_file;
83+ }
84+ return ret;
85+}
86+EXPORT_SYMBOL(loop_backing_file);
87+
88 /* loop sysfs attributes */
89
90 static ssize_t loop_attr_show(struct device *dev, char *page,
c1595e42 91diff --git a/fs/dcache.c b/fs/dcache.c
79b8bda9 92index 5c33aeb..8aa7f26 100644
c1595e42
JR
93--- a/fs/dcache.c
94+++ b/fs/dcache.c
79b8bda9 95@@ -1167,7 +1167,7 @@ enum d_walk_ret {
c1595e42
JR
96 *
97 * The @enter() and @finish() callbacks are called with d_lock held.
98 */
99-static void d_walk(struct dentry *parent, void *data,
100+void d_walk(struct dentry *parent, void *data,
101 enum d_walk_ret (*enter)(void *, struct dentry *),
102 void (*finish)(void *))
103 {
5527c038
JR
104diff --git a/fs/read_write.c b/fs/read_write.c
105index 819ef3f..fd0414e 100644
106--- a/fs/read_write.c
107+++ b/fs/read_write.c
108@@ -494,6 +494,28 @@ ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
109 }
110 EXPORT_SYMBOL(__vfs_write);
111
112+vfs_readf_t vfs_readf(struct file *file)
113+{
114+ const struct file_operations *fop = file->f_op;
115+
116+ if (fop->read)
117+ return fop->read;
118+ if (fop->read_iter)
119+ return new_sync_read;
120+ return ERR_PTR(-ENOSYS);
121+}
122+
123+vfs_writef_t vfs_writef(struct file *file)
124+{
125+ const struct file_operations *fop = file->f_op;
126+
127+ if (fop->write)
128+ return fop->write;
129+ if (fop->write_iter)
130+ return new_sync_write;
131+ return ERR_PTR(-ENOSYS);
132+}
133+
134 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
135 {
136 mm_segment_t old_fs;
7f207e10 137diff --git a/fs/splice.c b/fs/splice.c
be52b249 138index 4cf700d..30a091d 100644
7f207e10
AM
139--- a/fs/splice.c
140+++ b/fs/splice.c
be52b249 141@@ -1110,8 +1110,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
142 /*
143 * Attempt to initiate a splice from pipe to file.
144 */
145-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
146- loff_t *ppos, size_t len, unsigned int flags)
147+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
148+ loff_t *ppos, size_t len, unsigned int flags)
149 {
150 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
151 loff_t *, size_t, unsigned int);
be52b249 152@@ -1127,9 +1127,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
153 /*
154 * Attempt to initiate a splice from a file to a pipe.
155 */
156-static long do_splice_to(struct file *in, loff_t *ppos,
157- struct pipe_inode_info *pipe, size_t len,
158- unsigned int flags)
159+long do_splice_to(struct file *in, loff_t *ppos,
160+ struct pipe_inode_info *pipe, size_t len,
161+ unsigned int flags)
162 {
163 ssize_t (*splice_read)(struct file *, loff_t *,
164 struct pipe_inode_info *, size_t, unsigned int);
b912730e
AM
165diff --git a/include/linux/file.h b/include/linux/file.h
166index f87d308..9a290b3 100644
167--- a/include/linux/file.h
168+++ b/include/linux/file.h
169@@ -19,6 +19,7 @@ struct dentry;
170 struct path;
171 extern struct file *alloc_file(struct path *, fmode_t mode,
172 const struct file_operations *fop);
173+extern struct file *get_empty_filp(void);
174
175 static inline void fput_light(struct file *file, int fput_needed)
176 {
5527c038 177diff --git a/include/linux/fs.h b/include/linux/fs.h
be52b249 178index 3aa5142..8d48506 100644
5527c038
JR
179--- a/include/linux/fs.h
180+++ b/include/linux/fs.h
be52b249 181@@ -1672,6 +1672,12 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
5527c038
JR
182 struct iovec *fast_pointer,
183 struct iovec **ret_pointer);
184
185+typedef ssize_t (*vfs_readf_t)(struct file *, char __user *, size_t, loff_t *);
186+typedef ssize_t (*vfs_writef_t)(struct file *, const char __user *, size_t,
187+ loff_t *);
188+vfs_readf_t vfs_readf(struct file *file);
189+vfs_writef_t vfs_writef(struct file *file);
190+
191 extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
192 extern ssize_t __vfs_write(struct file *, const char __user *, size_t, loff_t *);
193 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
1e00d052 194diff --git a/include/linux/splice.h b/include/linux/splice.h
076b876e 195index da2751d..2e0fca6 100644
1e00d052
AM
196--- a/include/linux/splice.h
197+++ b/include/linux/splice.h
076b876e 198@@ -83,4 +83,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
4b3da204
AM
199 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
200
201 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
1e00d052
AM
202+
203+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
204+ loff_t *ppos, size_t len, unsigned int flags);
205+extern long do_splice_to(struct file *in, loff_t *ppos,
206+ struct pipe_inode_info *pipe, size_t len,
207+ unsigned int flags);
208 #endif
cfc41e69 209aufs4.4 mmap patch
fb47a38f 210
c1595e42 211diff --git a/fs/proc/base.c b/fs/proc/base.c
be52b249 212index 4bd5d31..aa41f2a 100644
c1595e42
JR
213--- a/fs/proc/base.c
214+++ b/fs/proc/base.c
be52b249 215@@ -1921,7 +1921,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
c1595e42
JR
216 down_read(&mm->mmap_sem);
217 vma = find_exact_vma(mm, vm_start, vm_end);
218 if (vma && vma->vm_file) {
219- *path = vma->vm_file->f_path;
220+ *path = vma_pr_or_file(vma)->f_path;
221 path_get(path);
222 rc = 0;
223 }
fb47a38f 224diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
c2c0f25c 225index f8595e8..cb8eda0 100644
fb47a38f
JR
226--- a/fs/proc/nommu.c
227+++ b/fs/proc/nommu.c
076b876e 228@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
fb47a38f
JR
229 file = region->vm_file;
230
231 if (file) {
232- struct inode *inode = file_inode(region->vm_file);
233+ struct inode *inode;
076b876e 234+
fb47a38f
JR
235+ file = vmr_pr_or_file(region);
236+ inode = file_inode(file);
237 dev = inode->i_sb->s_dev;
238 ino = inode->i_ino;
239 }
240diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
be52b249 241index 187b3b5..e03793e 100644
fb47a38f
JR
242--- a/fs/proc/task_mmu.c
243+++ b/fs/proc/task_mmu.c
be52b249 244@@ -281,7 +281,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
fb47a38f
JR
245 const char *name = NULL;
246
247 if (file) {
248- struct inode *inode = file_inode(vma->vm_file);
249+ struct inode *inode;
076b876e 250+
fb47a38f
JR
251+ file = vma_pr_or_file(vma);
252+ inode = file_inode(file);
253 dev = inode->i_sb->s_dev;
254 ino = inode->i_ino;
255 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
be52b249 256@@ -1505,7 +1508,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
076b876e
AM
257 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
258 struct vm_area_struct *vma = v;
259 struct numa_maps *md = &numa_priv->md;
260- struct file *file = vma->vm_file;
261+ struct file *file = vma_pr_or_file(vma);
076b876e 262 struct mm_struct *mm = vma->vm_mm;
7e9cd9fe
AM
263 struct mm_walk walk = {
264 .hugetlb_entry = gather_hugetlb_stats,
fb47a38f 265diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
c2c0f25c 266index e0d64c9..7aa92db 100644
fb47a38f
JR
267--- a/fs/proc/task_nommu.c
268+++ b/fs/proc/task_nommu.c
c1595e42 269@@ -160,7 +160,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
fb47a38f
JR
270 file = vma->vm_file;
271
272 if (file) {
273- struct inode *inode = file_inode(vma->vm_file);
274+ struct inode *inode;
076b876e 275+
b912730e 276+ file = vma_pr_or_file(vma);
fb47a38f
JR
277+ inode = file_inode(file);
278 dev = inode->i_sb->s_dev;
279 ino = inode->i_ino;
280 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
281diff --git a/include/linux/mm.h b/include/linux/mm.h
be52b249 282index 00bad77..cc616b0 100644
fb47a38f
JR
283--- a/include/linux/mm.h
284+++ b/include/linux/mm.h
be52b249 285@@ -1183,6 +1183,28 @@ static inline int fixup_user_fault(struct task_struct *tsk,
fb47a38f
JR
286 }
287 #endif
288
076b876e
AM
289+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
290+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
291+ int);
292+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
293+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
fb47a38f 294+
fb47a38f
JR
295+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
296+ __LINE__)
297+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
298+ __LINE__)
299+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
300+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
b912730e
AM
301+
302+#ifndef CONFIG_MMU
076b876e
AM
303+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
304+extern void vmr_do_fput(struct vm_region *, const char[], int);
305+
306+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
307+ __LINE__)
308+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
b912730e 309+#endif /* !CONFIG_MMU */
fb47a38f
JR
310+
311 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
312 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
313 void *buf, int len, int write);
314diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
be52b249 315index f8d1492..c3a3760 100644
fb47a38f
JR
316--- a/include/linux/mm_types.h
317+++ b/include/linux/mm_types.h
be52b249 318@@ -272,6 +272,7 @@ struct vm_region {
fb47a38f
JR
319 unsigned long vm_top; /* region allocated to here */
320 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
321 struct file *vm_file; /* the backing file or NULL */
322+ struct file *vm_prfile; /* the virtual backing file or NULL */
323
324 int vm_usage; /* region usage count (access under nommu_region_sem) */
325 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
be52b249 326@@ -346,6 +347,7 @@ struct vm_area_struct {
fb47a38f
JR
327 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
328 units, *not* PAGE_CACHE_SIZE */
329 struct file * vm_file; /* File we map to (can be NULL). */
330+ struct file *vm_prfile; /* shadow of vm_file */
331 void * vm_private_data; /* was vm_pte (shared mem) */
332
333 #ifndef CONFIG_MMU
334diff --git a/kernel/fork.c b/kernel/fork.c
cfc41e69 335index 1155eac..c001ea4 100644
fb47a38f
JR
336--- a/kernel/fork.c
337+++ b/kernel/fork.c
cfc41e69 338@@ -465,7 +465,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
fb47a38f
JR
339 struct inode *inode = file_inode(file);
340 struct address_space *mapping = file->f_mapping;
341
342- get_file(file);
343+ vma_get_file(tmp);
344 if (tmp->vm_flags & VM_DENYWRITE)
345 atomic_dec(&inode->i_writecount);
2000de60 346 i_mmap_lock_write(mapping);
076b876e 347diff --git a/mm/Makefile b/mm/Makefile
79b8bda9 348index 2ed4319..e3a53f5 100644
076b876e
AM
349--- a/mm/Makefile
350+++ b/mm/Makefile
7e9cd9fe 351@@ -21,7 +21,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
076b876e 352 mm_init.o mmu_context.o percpu.o slab_common.o \
c1595e42 353 compaction.o vmacache.o \
076b876e 354 interval_tree.o list_lru.o workingset.o \
7e9cd9fe
AM
355- debug.o $(mmu-y)
356+ prfile.o debug.o $(mmu-y)
076b876e
AM
357
358 obj-y += init-mm.o
359
fb47a38f 360diff --git a/mm/filemap.c b/mm/filemap.c
be52b249 361index 1bb0076..8eaece8 100644
fb47a38f
JR
362--- a/mm/filemap.c
363+++ b/mm/filemap.c
be52b249 364@@ -2128,7 +2128,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
fb47a38f
JR
365 int ret = VM_FAULT_LOCKED;
366
367 sb_start_pagefault(inode->i_sb);
368- file_update_time(vma->vm_file);
369+ vma_file_update_time(vma);
370 lock_page(page);
371 if (page->mapping != inode->i_mapping) {
372 unlock_page(page);
fb47a38f 373diff --git a/mm/memory.c b/mm/memory.c
be52b249 374index c387430..d434404 100644
fb47a38f
JR
375--- a/mm/memory.c
376+++ b/mm/memory.c
79b8bda9 377@@ -2035,7 +2035,7 @@ static inline int wp_page_reuse(struct mm_struct *mm,
fb47a38f 378 }
7e9cd9fe 379
b912730e
AM
380 if (!page_mkwrite)
381- file_update_time(vma->vm_file);
382+ vma_file_update_time(vma);
383 }
384
385 return VM_FAULT_WRITE;
fb47a38f 386diff --git a/mm/mmap.c b/mm/mmap.c
be52b249 387index 2ce04a6..f555c0a 100644
fb47a38f
JR
388--- a/mm/mmap.c
389+++ b/mm/mmap.c
79b8bda9 390@@ -275,7 +275,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fb47a38f
JR
391 if (vma->vm_ops && vma->vm_ops->close)
392 vma->vm_ops->close(vma);
393 if (vma->vm_file)
394- fput(vma->vm_file);
395+ vma_fput(vma);
396 mpol_put(vma_policy(vma));
397 kmem_cache_free(vm_area_cachep, vma);
398 return next;
79b8bda9 399@@ -887,7 +887,7 @@ again: remove_next = 1 + (end > next->vm_end);
fb47a38f
JR
400 if (remove_next) {
401 if (file) {
402 uprobe_munmap(next, next->vm_start, next->vm_end);
403- fput(file);
404+ vma_fput(vma);
405 }
406 if (next->anon_vma)
407 anon_vma_merge(vma, next);
be52b249 408@@ -1681,8 +1681,8 @@ out:
35939ee7
JR
409 return addr;
410
fb47a38f 411 unmap_and_free_vma:
fb47a38f
JR
412+ vma_fput(vma);
413 vma->vm_file = NULL;
414- fput(file);
415
416 /* Undo any partial mapping done by a device driver. */
417 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
be52b249 418@@ -2488,7 +2488,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
419 goto out_free_mpol;
420
421 if (new->vm_file)
422- get_file(new->vm_file);
423+ vma_get_file(new);
424
425 if (new->vm_ops && new->vm_ops->open)
426 new->vm_ops->open(new);
be52b249 427@@ -2507,7 +2507,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
428 if (new->vm_ops && new->vm_ops->close)
429 new->vm_ops->close(new);
430 if (new->vm_file)
431- fput(new->vm_file);
432+ vma_fput(new);
433 unlink_anon_vmas(new);
434 out_free_mpol:
435 mpol_put(vma_policy(new));
be52b249 436@@ -2649,7 +2649,6 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
7e9cd9fe
AM
437 struct vm_area_struct *vma;
438 unsigned long populate = 0;
439 unsigned long ret = -EINVAL;
440- struct file *file;
441
442 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. "
443 "See Documentation/vm/remap_file_pages.txt.\n",
be52b249 444@@ -2693,10 +2692,10 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
7e9cd9fe
AM
445 munlock_vma_pages_range(vma, start, start + size);
446 }
447
448- file = get_file(vma->vm_file);
449+ vma_get_file(vma);
450 ret = do_mmap_pgoff(vma->vm_file, start, size,
451 prot, flags, pgoff, &populate);
452- fput(file);
453+ vma_fput(vma);
454 out:
455 up_write(&mm->mmap_sem);
456 if (populate)
be52b249 457@@ -2966,7 +2965,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
79b8bda9
AM
458 if (anon_vma_clone(new_vma, vma))
459 goto out_free_mempol;
460 if (new_vma->vm_file)
461- get_file(new_vma->vm_file);
462+ vma_get_file(new_vma);
463 if (new_vma->vm_ops && new_vma->vm_ops->open)
464 new_vma->vm_ops->open(new_vma);
465 vma_link(mm, new_vma, prev, rb_link, rb_parent);
fb47a38f 466diff --git a/mm/nommu.c b/mm/nommu.c
be52b249 467index 92be862..29179f7 100644
fb47a38f
JR
468--- a/mm/nommu.c
469+++ b/mm/nommu.c
c2c0f25c 470@@ -671,7 +671,7 @@ static void __put_nommu_region(struct vm_region *region)
fb47a38f
JR
471 up_write(&nommu_region_sem);
472
473 if (region->vm_file)
474- fput(region->vm_file);
475+ vmr_fput(region);
476
477 /* IO memory and memory shared directly out of the pagecache
478 * from ramfs/tmpfs mustn't be released here */
c2c0f25c 479@@ -829,7 +829,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
fb47a38f
JR
480 if (vma->vm_ops && vma->vm_ops->close)
481 vma->vm_ops->close(vma);
482 if (vma->vm_file)
483- fput(vma->vm_file);
484+ vma_fput(vma);
485 put_nommu_region(vma->vm_region);
486 kmem_cache_free(vm_area_cachep, vma);
487 }
79b8bda9 488@@ -1355,7 +1355,7 @@ unsigned long do_mmap(struct file *file,
fb47a38f
JR
489 goto error_just_free;
490 }
491 }
492- fput(region->vm_file);
493+ vmr_fput(region);
494 kmem_cache_free(vm_region_jar, region);
495 region = pregion;
496 result = start;
79b8bda9 497@@ -1430,10 +1430,10 @@ error_just_free:
fb47a38f
JR
498 up_write(&nommu_region_sem);
499 error:
500 if (region->vm_file)
501- fput(region->vm_file);
502+ vmr_fput(region);
503 kmem_cache_free(vm_region_jar, region);
504 if (vma->vm_file)
505- fput(vma->vm_file);
506+ vma_fput(vma);
507 kmem_cache_free(vm_area_cachep, vma);
fb47a38f 508 return ret;
c2c0f25c 509
076b876e
AM
510diff --git a/mm/prfile.c b/mm/prfile.c
511new file mode 100644
c2c0f25c 512index 0000000..b323b8a
076b876e
AM
513--- /dev/null
514+++ b/mm/prfile.c
515@@ -0,0 +1,86 @@
516+/*
517+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
518+ * in /proc/PID/maps.
519+ * Call these functions via macros defined in linux/mm.h.
520+ *
521+ * See Documentation/filesystems/aufs/design/06mmap.txt
522+ *
523+ * Copyright (c) 2014 Junjro R. Okajima
524+ * Copyright (c) 2014 Ian Campbell
525+ */
526+
527+#include <linux/mm.h>
528+#include <linux/file.h>
529+#include <linux/fs.h>
530+
531+/* #define PRFILE_TRACE */
532+static inline void prfile_trace(struct file *f, struct file *pr,
533+ const char func[], int line, const char func2[])
534+{
535+#ifdef PRFILE_TRACE
536+ if (pr)
c2c0f25c 537+ pr_info("%s:%d: %s, %s\n", func, line, func2,
7e9cd9fe 538+ f ? (char *)f->f_path.dentry->d_name.name : "(null)");
076b876e
AM
539+#endif
540+}
541+
076b876e
AM
542+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
543+ int line)
544+{
545+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
546+
547+ prfile_trace(f, pr, func, line, __func__);
548+ file_update_time(f);
549+ if (f && pr)
550+ file_update_time(pr);
551+}
552+
553+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
554+ int line)
555+{
556+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
557+
558+ prfile_trace(f, pr, func, line, __func__);
559+ return (f && pr) ? pr : f;
560+}
561+
562+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
563+{
564+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
565+
566+ prfile_trace(f, pr, func, line, __func__);
567+ get_file(f);
568+ if (f && pr)
569+ get_file(pr);
570+}
571+
572+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
573+{
574+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
575+
576+ prfile_trace(f, pr, func, line, __func__);
577+ fput(f);
578+ if (f && pr)
579+ fput(pr);
580+}
b912730e
AM
581+
582+#ifndef CONFIG_MMU
076b876e
AM
583+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
584+ int line)
585+{
586+ struct file *f = region->vm_file, *pr = region->vm_prfile;
587+
588+ prfile_trace(f, pr, func, line, __func__);
589+ return (f && pr) ? pr : f;
590+}
591+
592+void vmr_do_fput(struct vm_region *region, const char func[], int line)
593+{
594+ struct file *f = region->vm_file, *pr = region->vm_prfile;
595+
596+ prfile_trace(f, pr, func, line, __func__);
597+ fput(f);
598+ if (f && pr)
599+ fput(pr);
600+}
b912730e 601+#endif /* !CONFIG_MMU */
cfc41e69 602aufs4.4 standalone patch
7f207e10 603
c1595e42 604diff --git a/fs/dcache.c b/fs/dcache.c
79b8bda9 605index 8aa7f26..f997345 100644
c1595e42
JR
606--- a/fs/dcache.c
607+++ b/fs/dcache.c
79b8bda9 608@@ -1272,6 +1272,7 @@ rename_retry:
c1595e42
JR
609 seq = 1;
610 goto again;
611 }
612+EXPORT_SYMBOL(d_walk);
613
614 /*
615 * Search for at least 1 mount point in the dentry's subdirs.
79b8bda9
AM
616diff --git a/fs/exec.c b/fs/exec.c
617index b06623a..b9206c5 100644
618--- a/fs/exec.c
619+++ b/fs/exec.c
620@@ -103,6 +103,7 @@ bool path_noexec(const struct path *path)
621 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
622 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
623 }
624+EXPORT_SYMBOL(path_noexec);
625
626 #ifdef CONFIG_USELIB
627 /*
b912730e 628diff --git a/fs/file_table.c b/fs/file_table.c
8cdd5066 629index ad17e05..38e046a 100644
b912730e
AM
630--- a/fs/file_table.c
631+++ b/fs/file_table.c
79b8bda9 632@@ -147,6 +147,7 @@ over:
b912730e
AM
633 }
634 return ERR_PTR(-ENFILE);
635 }
636+EXPORT_SYMBOL(get_empty_filp);
637
638 /**
639 * alloc_file - allocate and initialize a 'struct file'
8cdd5066
JR
640@@ -258,6 +259,7 @@ void flush_delayed_fput(void)
641 {
642 delayed_fput(NULL);
643 }
644+EXPORT_SYMBOL(flush_delayed_fput);
645
646 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
647
648@@ -300,6 +302,7 @@ void __fput_sync(struct file *file)
649 }
650
651 EXPORT_SYMBOL(fput);
652+EXPORT_SYMBOL(__fput_sync);
653
654 void put_filp(struct file *file)
655 {
656@@ -308,6 +311,7 @@ void put_filp(struct file *file)
b912730e
AM
657 file_free(file);
658 }
659 }
660+EXPORT_SYMBOL(put_filp);
661
79b8bda9 662 void __init files_init(void)
b912730e 663 {
7f207e10 664diff --git a/fs/namespace.c b/fs/namespace.c
79b8bda9 665index 0570729..ec560d8 100644
7f207e10
AM
666--- a/fs/namespace.c
667+++ b/fs/namespace.c
7e9cd9fe 668@@ -463,6 +463,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
c06a8ce3
AM
669 mnt_dec_writers(real_mount(mnt));
670 preempt_enable();
671 }
672+EXPORT_SYMBOL_GPL(__mnt_drop_write);
673
674 /**
675 * mnt_drop_write - give up write access to a mount
79b8bda9 676@@ -1803,6 +1804,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
677 }
678 return 0;
679 }
680+EXPORT_SYMBOL(iterate_mounts);
681
7eafdf33 682 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
7f207e10
AM
683 {
684diff --git a/fs/notify/group.c b/fs/notify/group.c
c1595e42 685index d16b62c..06ca6bc 100644
7f207e10
AM
686--- a/fs/notify/group.c
687+++ b/fs/notify/group.c
688@@ -22,6 +22,7 @@
689 #include <linux/srcu.h>
690 #include <linux/rculist.h>
691 #include <linux/wait.h>
692+#include <linux/module.h>
693
694 #include <linux/fsnotify_backend.h>
695 #include "fsnotify.h"
fb47a38f 696@@ -72,6 +73,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
1716fcea
AM
697 {
698 atomic_inc(&group->refcnt);
699 }
700+EXPORT_SYMBOL(fsnotify_get_group);
701
702 /*
703 * Drop a reference to a group. Free it if it's through.
fb47a38f 704@@ -81,6 +83,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
7f207e10 705 if (atomic_dec_and_test(&group->refcnt))
1716fcea 706 fsnotify_final_destroy_group(group);
7f207e10
AM
707 }
708+EXPORT_SYMBOL(fsnotify_put_group);
709
710 /*
711 * Create a new fsnotify_group and hold a reference for the group returned.
fb47a38f 712@@ -109,6 +112,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
7f207e10
AM
713
714 return group;
715 }
716+EXPORT_SYMBOL(fsnotify_alloc_group);
1716fcea
AM
717
718 int fsnotify_fasync(int fd, struct file *file, int on)
719 {
7f207e10 720diff --git a/fs/notify/mark.c b/fs/notify/mark.c
79b8bda9 721index fc0df44..325b5c6 100644
7f207e10
AM
722--- a/fs/notify/mark.c
723+++ b/fs/notify/mark.c
392086de 724@@ -109,6 +109,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10 725 mark->free_mark(mark);
1716fcea 726 }
7f207e10
AM
727 }
728+EXPORT_SYMBOL(fsnotify_put_mark);
729
2000de60
JR
730 /* Calculate mask of events for a list of marks */
731 u32 fsnotify_recalc_mask(struct hlist_head *head)
79b8bda9 732@@ -208,6 +209,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
1716fcea 733 mutex_unlock(&group->mark_mutex);
79b8bda9 734 fsnotify_free_mark(mark);
7f207e10
AM
735 }
736+EXPORT_SYMBOL(fsnotify_destroy_mark);
737
79b8bda9
AM
738 void fsnotify_destroy_marks(struct hlist_head *head, spinlock_t *lock)
739 {
740@@ -392,6 +394,7 @@ err:
7f207e10
AM
741
742 return ret;
743 }
744+EXPORT_SYMBOL(fsnotify_add_mark);
745
1716fcea
AM
746 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
747 struct inode *inode, struct vfsmount *mnt, int allow_dups)
79b8bda9 748@@ -492,6 +495,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
749 atomic_set(&mark->refcnt, 1);
750 mark->free_mark = free_mark;
751 }
752+EXPORT_SYMBOL(fsnotify_init_mark);
753
754 static int fsnotify_mark_destroy(void *ignored)
755 {
756diff --git a/fs/open.c b/fs/open.c
79b8bda9 757index b6f1e96..4ab0d4e 100644
7f207e10
AM
758--- a/fs/open.c
759+++ b/fs/open.c
c2c0f25c 760@@ -64,6 +64,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
7f207e10
AM
761 mutex_unlock(&dentry->d_inode->i_mutex);
762 return ret;
763 }
764+EXPORT_SYMBOL(do_truncate);
765
1716fcea 766 long vfs_truncate(struct path *path, loff_t length)
7f207e10 767 {
c2c0f25c 768@@ -678,6 +679,7 @@ int open_check_o_direct(struct file *f)
b912730e
AM
769 }
770 return 0;
771 }
772+EXPORT_SYMBOL(open_check_o_direct);
773
774 static int do_dentry_open(struct file *f,
c2c0f25c 775 struct inode *inode,
5527c038
JR
776diff --git a/fs/read_write.c b/fs/read_write.c
777index fd0414e..8ace6ec 100644
778--- a/fs/read_write.c
779+++ b/fs/read_write.c
780@@ -504,6 +504,7 @@ vfs_readf_t vfs_readf(struct file *file)
781 return new_sync_read;
782 return ERR_PTR(-ENOSYS);
783 }
784+EXPORT_SYMBOL(vfs_readf);
785
786 vfs_writef_t vfs_writef(struct file *file)
787 {
788@@ -515,6 +516,7 @@ vfs_writef_t vfs_writef(struct file *file)
789 return new_sync_write;
790 return ERR_PTR(-ENOSYS);
791 }
792+EXPORT_SYMBOL(vfs_writef);
793
794 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
795 {
7f207e10 796diff --git a/fs/splice.c b/fs/splice.c
be52b249 797index 30a091d..c37c311 100644
7f207e10
AM
798--- a/fs/splice.c
799+++ b/fs/splice.c
be52b249 800@@ -1123,6 +1123,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
392086de
AM
801
802 return splice_write(pipe, out, ppos, len, flags);
7f207e10
AM
803 }
804+EXPORT_SYMBOL(do_splice_from);
805
806 /*
807 * Attempt to initiate a splice from a file to a pipe.
be52b249 808@@ -1149,6 +1150,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
809
810 return splice_read(in, ppos, pipe, len, flags);
811 }
812+EXPORT_SYMBOL(do_splice_to);
813
814 /**
815 * splice_direct_to_actor - splices data directly between two non-pipes
c1595e42 816diff --git a/fs/xattr.c b/fs/xattr.c
be52b249 817index 9b932b9..44c457a 100644
c1595e42
JR
818--- a/fs/xattr.c
819+++ b/fs/xattr.c
820@@ -207,6 +207,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
821 *xattr_value = value;
822 return error;
823 }
824+EXPORT_SYMBOL(vfs_getxattr_alloc);
825
826 /* Compare an extended attribute value with the given value */
827 int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
8cdd5066
JR
828diff --git a/kernel/task_work.c b/kernel/task_work.c
829index 53fa971..f80d564 100644
830--- a/kernel/task_work.c
831+++ b/kernel/task_work.c
832@@ -118,3 +118,4 @@ void task_work_run(void)
833 } while (work);
834 }
835 }
836+EXPORT_SYMBOL(task_work_run);
7f207e10 837diff --git a/security/commoncap.c b/security/commoncap.c
79b8bda9 838index 1832cf7..987ff5f 100644
7f207e10
AM
839--- a/security/commoncap.c
840+++ b/security/commoncap.c
79b8bda9 841@@ -1053,12 +1053,14 @@ int cap_mmap_addr(unsigned long addr)
94337f0d 842 }
7f207e10
AM
843 return ret;
844 }
0c3ec466
AM
845+EXPORT_SYMBOL(cap_mmap_addr);
846
847 int cap_mmap_file(struct file *file, unsigned long reqprot,
848 unsigned long prot, unsigned long flags)
849 {
850 return 0;
851 }
852+EXPORT_SYMBOL(cap_mmap_file);
c2c0f25c
AM
853
854 #ifdef CONFIG_SECURITY
855
7f207e10 856diff --git a/security/device_cgroup.c b/security/device_cgroup.c
79b8bda9 857index 03c1652..b00aa76 100644
7f207e10
AM
858--- a/security/device_cgroup.c
859+++ b/security/device_cgroup.c
f6c5ef8b
AM
860@@ -7,6 +7,7 @@
861 #include <linux/device_cgroup.h>
862 #include <linux/cgroup.h>
863 #include <linux/ctype.h>
864+#include <linux/export.h>
865 #include <linux/list.h>
866 #include <linux/uaccess.h>
867 #include <linux/seq_file.h>
076b876e 868@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
537831f9
AM
869 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
870 access);
7f207e10 871 }
2cbb1c4b 872+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
873
874 int devcgroup_inode_mknod(int mode, dev_t dev)
875 {
876diff --git a/security/security.c b/security/security.c
79b8bda9 877index 46f405c..54488b0 100644
7f207e10
AM
878--- a/security/security.c
879+++ b/security/security.c
79b8bda9 880@@ -433,6 +433,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10 881 return 0;
c2c0f25c 882 return call_int_hook(path_rmdir, 0, dir, dentry);
7f207e10
AM
883 }
884+EXPORT_SYMBOL(security_path_rmdir);
885
886 int security_path_unlink(struct path *dir, struct dentry *dentry)
887 {
79b8bda9 888@@ -449,6 +450,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10 889 return 0;
c2c0f25c 890 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
7f207e10
AM
891 }
892+EXPORT_SYMBOL(security_path_symlink);
893
894 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
895 struct dentry *new_dentry)
79b8bda9 896@@ -457,6 +459,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10 897 return 0;
c2c0f25c 898 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
7f207e10
AM
899 }
900+EXPORT_SYMBOL(security_path_link);
901
902 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
38d290e6 903 struct path *new_dir, struct dentry *new_dentry,
79b8bda9 904@@ -484,6 +487,7 @@ int security_path_truncate(struct path *path)
7f207e10 905 return 0;
c2c0f25c 906 return call_int_hook(path_truncate, 0, path);
7f207e10
AM
907 }
908+EXPORT_SYMBOL(security_path_truncate);
909
7eafdf33
AM
910 int security_path_chmod(struct path *path, umode_t mode)
911 {
79b8bda9 912@@ -491,6 +495,7 @@ int security_path_chmod(struct path *path, umode_t mode)
7f207e10 913 return 0;
c2c0f25c 914 return call_int_hook(path_chmod, 0, path, mode);
7f207e10
AM
915 }
916+EXPORT_SYMBOL(security_path_chmod);
917
537831f9 918 int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10 919 {
79b8bda9 920@@ -498,6 +503,7 @@ int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10 921 return 0;
c2c0f25c 922 return call_int_hook(path_chown, 0, path, uid, gid);
7f207e10
AM
923 }
924+EXPORT_SYMBOL(security_path_chown);
925
926 int security_path_chroot(struct path *path)
927 {
79b8bda9 928@@ -583,6 +589,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10 929 return 0;
c2c0f25c 930 return call_int_hook(inode_readlink, 0, dentry);
7f207e10
AM
931 }
932+EXPORT_SYMBOL(security_inode_readlink);
933
c2c0f25c
AM
934 int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
935 bool rcu)
79b8bda9 936@@ -598,6 +605,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 937 return 0;
c2c0f25c 938 return call_int_hook(inode_permission, 0, inode, mask);
7f207e10
AM
939 }
940+EXPORT_SYMBOL(security_inode_permission);
941
1e00d052 942 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 943 {
79b8bda9 944@@ -736,6 +744,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
945
946 return fsnotify_perm(file, mask);
947 }
948+EXPORT_SYMBOL(security_file_permission);
949
950 int security_file_alloc(struct file *file)
951 {
79b8bda9 952@@ -795,6 +804,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
7f207e10
AM
953 return ret;
954 return ima_file_mmap(file, prot);
955 }
0c3ec466 956+EXPORT_SYMBOL(security_mmap_file);
7f207e10 957
0c3ec466
AM
958 int security_mmap_addr(unsigned long addr)
959 {
7f207e10
AM
960diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
961--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
8cdd5066 962+++ linux/Documentation/ABI/testing/debugfs-aufs 2016-02-28 11:26:32.569971135 +0100
86dc4139 963@@ -0,0 +1,50 @@
7f207e10
AM
964+What: /debug/aufs/si_<id>/
965+Date: March 2009
f6b6e03d 966+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
967+Description:
968+ Under /debug/aufs, a directory named si_<id> is created
969+ per aufs mount, where <id> is a unique id generated
970+ internally.
1facf9fc 971+
86dc4139
AM
972+What: /debug/aufs/si_<id>/plink
973+Date: Apr 2013
f6b6e03d 974+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
975+Description:
976+ It has three lines and shows the information about the
977+ pseudo-link. The first line is a single number
978+ representing a number of buckets. The second line is a
979+ number of pseudo-links per buckets (separated by a
980+ blank). The last line is a single number representing a
981+ total number of psedo-links.
982+ When the aufs mount option 'noplink' is specified, it
983+ will show "1\n0\n0\n".
984+
7f207e10
AM
985+What: /debug/aufs/si_<id>/xib
986+Date: March 2009
f6b6e03d 987+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
988+Description:
989+ It shows the consumed blocks by xib (External Inode Number
990+ Bitmap), its block size and file size.
991+ When the aufs mount option 'noxino' is specified, it
992+ will be empty. About XINO files, see the aufs manual.
993+
994+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
995+Date: March 2009
f6b6e03d 996+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
997+Description:
998+ It shows the consumed blocks by xino (External Inode Number
999+ Translation Table), its link count, block size and file
1000+ size.
1001+ When the aufs mount option 'noxino' is specified, it
1002+ will be empty. About XINO files, see the aufs manual.
1003+
1004+What: /debug/aufs/si_<id>/xigen
1005+Date: March 2009
f6b6e03d 1006+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1007+Description:
1008+ It shows the consumed blocks by xigen (External Inode
1009+ Generation Table), its block size and file size.
1010+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
1011+ be created.
1012+ When the aufs mount option 'noxino' is specified, it
1013+ will be empty. About XINO files, see the aufs manual.
1014diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1015--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1016+++ linux/Documentation/ABI/testing/sysfs-aufs 2016-02-28 11:26:32.569971135 +0100
392086de 1017@@ -0,0 +1,31 @@
7f207e10
AM
1018+What: /sys/fs/aufs/si_<id>/
1019+Date: March 2009
f6b6e03d 1020+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1021+Description:
1022+ Under /sys/fs/aufs, a directory named si_<id> is created
1023+ per aufs mount, where <id> is a unique id generated
1024+ internally.
1025+
1026+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1027+Date: March 2009
f6b6e03d 1028+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1029+Description:
1030+ It shows the abolute path of a member directory (which
1031+ is called branch) in aufs, and its permission.
1032+
392086de
AM
1033+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1034+Date: July 2013
f6b6e03d 1035+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
1036+Description:
1037+ It shows the id of a member directory (which is called
1038+ branch) in aufs.
1039+
7f207e10
AM
1040+What: /sys/fs/aufs/si_<id>/xi_path
1041+Date: March 2009
f6b6e03d 1042+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1043+Description:
1044+ It shows the abolute path of XINO (External Inode Number
1045+ Bitmap, Translation Table and Generation Table) file
1046+ even if it is the default path.
1047+ When the aufs mount option 'noxino' is specified, it
1048+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
1049diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1050--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1051+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 1052@@ -0,0 +1,170 @@
53392da6 1053+
8cdd5066 1054+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1055+#
1056+# This program is free software; you can redistribute it and/or modify
1057+# it under the terms of the GNU General Public License as published by
1058+# the Free Software Foundation; either version 2 of the License, or
1059+# (at your option) any later version.
1060+#
1061+# This program is distributed in the hope that it will be useful,
1062+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1063+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1064+# GNU General Public License for more details.
1065+#
1066+# You should have received a copy of the GNU General Public License
523b37e3 1067+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1068+
1069+Introduction
1070+----------------------------------------
1071+
1072+aufs [ei ju: ef es] | [a u f s]
1073+1. abbrev. for "advanced multi-layered unification filesystem".
1074+2. abbrev. for "another unionfs".
1075+3. abbrev. for "auf das" in German which means "on the" in English.
1076+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1077+ But "Filesystem aufs Filesystem" is hard to understand.
1078+
1079+AUFS is a filesystem with features:
1080+- multi layered stackable unification filesystem, the member directory
1081+ is called as a branch.
1082+- branch permission and attribute, 'readonly', 'real-readonly',
7e9cd9fe 1083+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
1084+ combination.
1085+- internal "file copy-on-write".
1086+- logical deletion, whiteout.
1087+- dynamic branch manipulation, adding, deleting and changing permission.
1088+- allow bypassing aufs, user's direct branch access.
1089+- external inode number translation table and bitmap which maintains the
1090+ persistent aufs inode number.
1091+- seekable directory, including NFS readdir.
1092+- file mapping, mmap and sharing pages.
1093+- pseudo-link, hardlink over branches.
1094+- loopback mounted filesystem as a branch.
1095+- several policies to select one among multiple writable branches.
1096+- revert a single systemcall when an error occurs in aufs.
1097+- and more...
1098+
1099+
1100+Multi Layered Stackable Unification Filesystem
1101+----------------------------------------------------------------------
1102+Most people already knows what it is.
1103+It is a filesystem which unifies several directories and provides a
1104+merged single directory. When users access a file, the access will be
1105+passed/re-directed/converted (sorry, I am not sure which English word is
1106+correct) to the real file on the member filesystem. The member
1107+filesystem is called 'lower filesystem' or 'branch' and has a mode
1108+'readonly' and 'readwrite.' And the deletion for a file on the lower
1109+readonly branch is handled by creating 'whiteout' on the upper writable
1110+branch.
1111+
1112+On LKML, there have been discussions about UnionMount (Jan Blunck,
1113+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1114+different approaches to implement the merged-view.
1115+The former tries putting it into VFS, and the latter implements as a
1116+separate filesystem.
1117+(If I misunderstand about these implementations, please let me know and
1118+I shall correct it. Because it is a long time ago when I read their
1119+source files last time).
1120+
1121+UnionMount's approach will be able to small, but may be hard to share
1122+branches between several UnionMount since the whiteout in it is
1123+implemented in the inode on branch filesystem and always
1124+shared. According to Bharata's post, readdir does not seems to be
1125+finished yet.
1126+There are several missing features known in this implementations such as
1127+- for users, the inode number may change silently. eg. copy-up.
1128+- link(2) may break by copy-up.
1129+- read(2) may get an obsoleted filedata (fstat(2) too).
1130+- fcntl(F_SETLK) may be broken by copy-up.
1131+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1132+ open(O_RDWR).
1133+
7e9cd9fe
AM
1134+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1135+merged into mainline. This is another implementation of UnionMount as a
1136+separated filesystem. All the limitations and known problems which
1137+UnionMount are equally inherited to "overlay" filesystem.
1138+
1139+Unionfs has a longer history. When I started implementing a stackable
1140+filesystem (Aug 2005), it already existed. It has virtual super_block,
1141+inode, dentry and file objects and they have an array pointing lower
1142+same kind objects. After contributing many patches for Unionfs, I
1143+re-started my project AUFS (Jun 2006).
53392da6
AM
1144+
1145+In AUFS, the structure of filesystem resembles to Unionfs, but I
1146+implemented my own ideas, approaches and enhancements and it became
1147+totally different one.
1148+
1149+Comparing DM snapshot and fs based implementation
1150+- the number of bytes to be copied between devices is much smaller.
1151+- the type of filesystem must be one and only.
1152+- the fs must be writable, no readonly fs, even for the lower original
1153+ device. so the compression fs will not be usable. but if we use
1154+ loopback mount, we may address this issue.
1155+ for instance,
1156+ mount /cdrom/squashfs.img /sq
1157+ losetup /sq/ext2.img
1158+ losetup /somewhere/cow
1159+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1160+- it will be difficult (or needs more operations) to extract the
1161+ difference between the original device and COW.
1162+- DM snapshot-merge may help a lot when users try merging. in the
1163+ fs-layer union, users will use rsync(1).
1164+
7e9cd9fe
AM
1165+You may want to read my old paper "Filesystems in LiveCD"
1166+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 1167+
7e9cd9fe
AM
1168+
1169+Several characters/aspects/persona of aufs
53392da6
AM
1170+----------------------------------------------------------------------
1171+
7e9cd9fe 1172+Aufs has several characters, aspects or persona.
53392da6
AM
1173+1. a filesystem, callee of VFS helper
1174+2. sub-VFS, caller of VFS helper for branches
1175+3. a virtual filesystem which maintains persistent inode number
1176+4. reader/writer of files on branches such like an application
1177+
1178+1. Callee of VFS Helper
1179+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1180+unlink(2) from an application reaches sys_unlink() kernel function and
1181+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1182+calls filesystem specific unlink operation. Actually aufs implements the
1183+unlink operation but it behaves like a redirector.
1184+
1185+2. Caller of VFS Helper for Branches
1186+aufs_unlink() passes the unlink request to the branch filesystem as if
1187+it were called from VFS. So the called unlink operation of the branch
1188+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1189+every necessary pre/post operation for the branch filesystem.
1190+- acquire the lock for the parent dir on a branch
1191+- lookup in a branch
1192+- revalidate dentry on a branch
1193+- mnt_want_write() for a branch
1194+- vfs_unlink() for a branch
1195+- mnt_drop_write() for a branch
1196+- release the lock on a branch
1197+
1198+3. Persistent Inode Number
1199+One of the most important issue for a filesystem is to maintain inode
1200+numbers. This is particularly important to support exporting a
1201+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1202+backend block device for its own. But some storage is necessary to
7e9cd9fe
AM
1203+keep and maintain the inode numbers. It may be a large space and may not
1204+suit to keep in memory. Aufs rents some space from its first writable
1205+branch filesystem (by default) and creates file(s) on it. These files
1206+are created by aufs internally and removed soon (currently) keeping
1207+opened.
53392da6
AM
1208+Note: Because these files are removed, they are totally gone after
1209+ unmounting aufs. It means the inode numbers are not persistent
1210+ across unmount or reboot. I have a plan to make them really
1211+ persistent which will be important for aufs on NFS server.
1212+
1213+4. Read/Write Files Internally (copy-on-write)
1214+Because a branch can be readonly, when you write a file on it, aufs will
1215+"copy-up" it to the upper writable branch internally. And then write the
1216+originally requested thing to the file. Generally kernel doesn't
1217+open/read/write file actively. In aufs, even a single write may cause a
1218+internal "file copy". This behaviour is very similar to cp(1) command.
1219+
1220+Some people may think it is better to pass such work to user space
1221+helper, instead of doing in kernel space. Actually I am still thinking
1222+about it. But currently I have implemented it in kernel space.
1223diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1224--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1225+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 1226@@ -0,0 +1,258 @@
53392da6 1227+
8cdd5066 1228+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1229+#
1230+# This program is free software; you can redistribute it and/or modify
1231+# it under the terms of the GNU General Public License as published by
1232+# the Free Software Foundation; either version 2 of the License, or
1233+# (at your option) any later version.
1234+#
1235+# This program is distributed in the hope that it will be useful,
1236+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1237+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1238+# GNU General Public License for more details.
1239+#
1240+# You should have received a copy of the GNU General Public License
523b37e3 1241+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1242+
1243+Basic Aufs Internal Structure
1244+
1245+Superblock/Inode/Dentry/File Objects
1246+----------------------------------------------------------------------
1247+As like an ordinary filesystem, aufs has its own
1248+superblock/inode/dentry/file objects. All these objects have a
1249+dynamically allocated array and store the same kind of pointers to the
1250+lower filesystem, branch.
1251+For example, when you build a union with one readwrite branch and one
1252+readonly, mounted /au, /rw and /ro respectively.
1253+- /au = /rw + /ro
1254+- /ro/fileA exists but /rw/fileA
1255+
1256+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1257+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7e9cd9fe 1258+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
1259+- [1] = /ro/fileA
1260+
1261+This style of an array is essentially same to the aufs
1262+superblock/inode/dentry/file objects.
1263+
1264+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1265+branches dynamically, these objects has its own generation. When
1266+branches are changed, the generation in aufs superblock is
1267+incremented. And a generation in other object are compared when it is
1268+accessed. When a generation in other objects are obsoleted, aufs
1269+refreshes the internal array.
53392da6
AM
1270+
1271+
1272+Superblock
1273+----------------------------------------------------------------------
1274+Additionally aufs superblock has some data for policies to select one
1275+among multiple writable branches, XIB files, pseudo-links and kobject.
1276+See below in detail.
7e9cd9fe
AM
1277+About the policies which supports copy-down a directory, see
1278+wbr_policy.txt too.
53392da6
AM
1279+
1280+
1281+Branch and XINO(External Inode Number Translation Table)
1282+----------------------------------------------------------------------
1283+Every branch has its own xino (external inode number translation table)
1284+file. The xino file is created and unlinked by aufs internally. When two
1285+members of a union exist on the same filesystem, they share the single
1286+xino file.
1287+The struct of a xino file is simple, just a sequence of aufs inode
1288+numbers which is indexed by the lower inode number.
1289+In the above sample, assume the inode number of /ro/fileA is i111 and
1290+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1291+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1292+
1293+When the inode numbers are not contiguous, the xino file will be sparse
1294+which has a hole in it and doesn't consume as much disk space as it
1295+might appear. If your branch filesystem consumes disk space for such
1296+holes, then you should specify 'xino=' option at mounting aufs.
1297+
7e9cd9fe
AM
1298+Aufs has a mount option to free the disk blocks for such holes in XINO
1299+files on tmpfs or ramdisk. But it is not so effective actually. If you
1300+meet a problem of disk shortage due to XINO files, then you should try
1301+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1302+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1303+the holes in XINO files.
1304+
53392da6 1305+Also a writable branch has three kinds of "whiteout bases". All these
7e9cd9fe 1306+are existed when the branch is joined to aufs, and their names are
53392da6
AM
1307+whiteout-ed doubly, so that users will never see their names in aufs
1308+hierarchy.
7e9cd9fe 1309+1. a regular file which will be hardlinked to all whiteouts.
53392da6 1310+2. a directory to store a pseudo-link.
7e9cd9fe 1311+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
1312+
1313+1. Whiteout Base
1314+ When you remove a file on a readonly branch, aufs handles it as a
1315+ logical deletion and creates a whiteout on the upper writable branch
1316+ as a hardlink of this file in order not to consume inode on the
1317+ writable branch.
1318+2. Pseudo-link Dir
1319+ See below, Pseudo-link.
1320+3. Step-Parent Dir
1321+ When "fileC" exists on the lower readonly branch only and it is
1322+ opened and removed with its parent dir, and then user writes
1323+ something into it, then aufs copies-up fileC to this
1324+ directory. Because there is no other dir to store fileC. After
1325+ creating a file under this dir, the file is unlinked.
1326+
1327+Because aufs supports manipulating branches, ie. add/delete/change
7e9cd9fe
AM
1328+dynamically, a branch has its own id. When the branch order changes,
1329+aufs finds the new index by searching the branch id.
53392da6
AM
1330+
1331+
1332+Pseudo-link
1333+----------------------------------------------------------------------
1334+Assume "fileA" exists on the lower readonly branch only and it is
1335+hardlinked to "fileB" on the branch. When you write something to fileA,
1336+aufs copies-up it to the upper writable branch. Additionally aufs
1337+creates a hardlink under the Pseudo-link Directory of the writable
1338+branch. The inode of a pseudo-link is kept in aufs super_block as a
1339+simple list. If fileB is read after unlinking fileA, aufs returns
1340+filedata from the pseudo-link instead of the lower readonly
1341+branch. Because the pseudo-link is based upon the inode, to keep the
7e9cd9fe 1342+inode number by xino (see above) is essentially necessary.
53392da6
AM
1343+
1344+All the hardlinks under the Pseudo-link Directory of the writable branch
1345+should be restored in a proper location later. Aufs provides a utility
1346+to do this. The userspace helpers executed at remounting and unmounting
1347+aufs by default.
1348+During this utility is running, it puts aufs into the pseudo-link
1349+maintenance mode. In this mode, only the process which began the
1350+maintenance mode (and its child processes) is allowed to operate in
1351+aufs. Some other processes which are not related to the pseudo-link will
1352+be allowed to run too, but the rest have to return an error or wait
1353+until the maintenance mode ends. If a process already acquires an inode
1354+mutex (in VFS), it has to return an error.
1355+
1356+
1357+XIB(external inode number bitmap)
1358+----------------------------------------------------------------------
1359+Addition to the xino file per a branch, aufs has an external inode number
7e9cd9fe
AM
1360+bitmap in a superblock object. It is also an internal file such like a
1361+xino file.
53392da6
AM
1362+It is a simple bitmap to mark whether the aufs inode number is in-use or
1363+not.
1364+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1365+
7e9cd9fe 1366+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
1367+reduce the number of consumed disk blocks for these files.
1368+
1369+
1370+Virtual or Vertical Dir, and Readdir in Userspace
1371+----------------------------------------------------------------------
1372+In order to support multiple layers (branches), aufs readdir operation
1373+constructs a virtual dir block on memory. For readdir, aufs calls
1374+vfs_readdir() internally for each dir on branches, merges their entries
1375+with eliminating the whiteout-ed ones, and sets it to file (dir)
1376+object. So the file object has its entry list until it is closed. The
1377+entry list will be updated when the file position is zero and becomes
7e9cd9fe 1378+obsoleted. This decision is made in aufs automatically.
53392da6
AM
1379+
1380+The dynamically allocated memory block for the name of entries has a
1381+unit of 512 bytes (by default) and stores the names contiguously (no
1382+padding). Another block for each entry is handled by kmem_cache too.
1383+During building dir blocks, aufs creates hash list and judging whether
1384+the entry is whiteouted by its upper branch or already listed.
1385+The merged result is cached in the corresponding inode object and
1386+maintained by a customizable life-time option.
1387+
1388+Some people may call it can be a security hole or invite DoS attack
1389+since the opened and once readdir-ed dir (file object) holds its entry
1390+list and becomes a pressure for system memory. But I'd say it is similar
1391+to files under /proc or /sys. The virtual files in them also holds a
1392+memory page (generally) while they are opened. When an idea to reduce
1393+memory for them is introduced, it will be applied to aufs too.
1394+For those who really hate this situation, I've developed readdir(3)
1395+library which operates this merging in userspace. You just need to set
1396+LD_PRELOAD environment variable, and aufs will not consume no memory in
1397+kernel space for readdir(3).
1398+
1399+
1400+Workqueue
1401+----------------------------------------------------------------------
1402+Aufs sometimes requires privilege access to a branch. For instance,
1403+in copy-up/down operation. When a user process is going to make changes
1404+to a file which exists in the lower readonly branch only, and the mode
1405+of one of ancestor directories may not be writable by a user
1406+process. Here aufs copy-up the file with its ancestors and they may
1407+require privilege to set its owner/group/mode/etc.
1408+This is a typical case of a application character of aufs (see
1409+Introduction).
1410+
1411+Aufs uses workqueue synchronously for this case. It creates its own
1412+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1413+passes the request to call mkdir or write (for example), and wait for
1414+its completion. This approach solves a problem of a signal handler
1415+simply.
1416+If aufs didn't adopt the workqueue and changed the privilege of the
7e9cd9fe
AM
1417+process, then the process may receive the unexpected SIGXFSZ or other
1418+signals.
53392da6
AM
1419+
1420+Also aufs uses the system global workqueue ("events" kernel thread) too
1421+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1422+whiteout base and etc. This is unrelated to a privilege.
1423+Most of aufs operation tries acquiring a rw_semaphore for aufs
1424+superblock at the beginning, at the same time waits for the completion
1425+of all queued asynchronous tasks.
1426+
1427+
1428+Whiteout
1429+----------------------------------------------------------------------
1430+The whiteout in aufs is very similar to Unionfs's. That is represented
1431+by its filename. UnionMount takes an approach of a file mode, but I am
1432+afraid several utilities (find(1) or something) will have to support it.
1433+
1434+Basically the whiteout represents "logical deletion" which stops aufs to
1435+lookup further, but also it represents "dir is opaque" which also stop
7e9cd9fe 1436+further lookup.
53392da6
AM
1437+
1438+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1439+In order to make several functions in a single systemcall to be
1440+revertible, aufs adopts an approach to rename a directory to a temporary
1441+unique whiteouted name.
1442+For example, in rename(2) dir where the target dir already existed, aufs
1443+renames the target dir to a temporary unique whiteouted name before the
7e9cd9fe 1444+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
1445+update the attributes, etc). If an error happens in these actions, aufs
1446+simply renames the whiteouted name back and returns an error. If all are
1447+succeeded, aufs registers a function to remove the whiteouted unique
1448+temporary name completely and asynchronously to the system global
1449+workqueue.
1450+
1451+
1452+Copy-up
1453+----------------------------------------------------------------------
1454+It is a well-known feature or concept.
1455+When user modifies a file on a readonly branch, aufs operate "copy-up"
1456+internally and makes change to the new file on the upper writable branch.
1457+When the trigger systemcall does not update the timestamps of the parent
1458+dir, aufs reverts it after copy-up.
c2b27bf2
AM
1459+
1460+
1461+Move-down (aufs3.9 and later)
1462+----------------------------------------------------------------------
1463+"Copy-up" is one of the essential feature in aufs. It copies a file from
1464+the lower readonly branch to the upper writable branch when a user
1465+changes something about the file.
1466+"Move-down" is an opposite action of copy-up. Basically this action is
1467+ran manually instead of automatically and internally.
076b876e
AM
1468+For desgin and implementation, aufs has to consider these issues.
1469+- whiteout for the file may exist on the lower branch.
1470+- ancestor directories may not exist on the lower branch.
1471+- diropq for the ancestor directories may exist on the upper branch.
1472+- free space on the lower branch will reduce.
1473+- another access to the file may happen during moving-down, including
7e9cd9fe 1474+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
1475+- the file should not be hard-linked nor pseudo-linked. they should be
1476+ handled by auplink utility later.
c2b27bf2
AM
1477+
1478+Sometimes users want to move-down a file from the upper writable branch
1479+to the lower readonly or writable branch. For instance,
1480+- the free space of the upper writable branch is going to run out.
1481+- create a new intermediate branch between the upper and lower branch.
1482+- etc.
1483+
1484+For this purpose, use "aumvdown" command in aufs-util.git.
b912730e
AM
1485diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1486--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1487+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2016-02-28 11:26:32.569971135 +0100
b912730e
AM
1488@@ -0,0 +1,85 @@
1489+
8cdd5066 1490+# Copyright (C) 2015-2016 Junjiro R. Okajima
b912730e
AM
1491+#
1492+# This program is free software; you can redistribute it and/or modify
1493+# it under the terms of the GNU General Public License as published by
1494+# the Free Software Foundation; either version 2 of the License, or
1495+# (at your option) any later version.
1496+#
1497+# This program is distributed in the hope that it will be useful,
1498+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1499+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1500+# GNU General Public License for more details.
1501+#
1502+# You should have received a copy of the GNU General Public License
1503+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1504+
1505+Support for a branch who has its ->atomic_open()
1506+----------------------------------------------------------------------
1507+The filesystems who implement its ->atomic_open() are not majority. For
1508+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1509+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1510+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1511+sure whether all filesystems who have ->atomic_open() behave like this,
1512+but NFSv4 surely returns the error.
1513+
1514+In order to support ->atomic_open() for aufs, there are a few
1515+approaches.
1516+
1517+A. Introduce aufs_atomic_open()
1518+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1519+ branch fs.
1520+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1521+ an aufs user Pip Cet's approach
1522+ - calls aufs_create(), VFS finish_open() and notify_change().
1523+ - pass fake-mode to finish_open(), and then correct the mode by
1524+ notify_change().
1525+C. Extend aufs_open() to call branch fs's ->atomic_open()
1526+ - no aufs_atomic_open().
1527+ - aufs_lookup() registers the TID to an aufs internal object.
1528+ - aufs_create() does nothing when the matching TID is registered, but
1529+ registers the mode.
1530+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1531+ TID is registered.
1532+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1533+ credential
1534+ - no aufs_atomic_open().
1535+ - aufs_create() registers the TID to an internal object. this info
1536+ represents "this process created this file just now."
1537+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1538+ registered TID and re-try open() with superuser's credential.
1539+
1540+Pros and cons for each approach.
1541+
1542+A.
1543+ - straightforward but highly depends upon VFS internal.
1544+ - the atomic behavaiour is kept.
1545+ - some of parameters such as nameidata are hard to reproduce for
1546+ branch fs.
1547+ - large overhead.
1548+B.
1549+ - easy to implement.
1550+ - the atomic behavaiour is lost.
1551+C.
1552+ - the atomic behavaiour is kept.
1553+ - dirty and tricky.
1554+ - VFS checks whether the file is created correctly after calling
1555+ ->create(), which means this approach doesn't work.
1556+D.
1557+ - easy to implement.
1558+ - the atomic behavaiour is lost.
1559+ - to open a file with superuser's credential and give it to a user
1560+ process is a bad idea, since the file object keeps the credential
1561+ in it. It may affect LSM or something. This approach doesn't work
1562+ either.
1563+
1564+The approach A is ideal, but it hard to implement. So here is a
1565+variation of A, which is to be implemented.
1566+
1567+A-1. Introduce aufs_atomic_open()
1568+ - calls branch fs ->atomic_open() if exists. otherwise calls
1569+ vfs_create() and finish_open().
1570+ - the demerit is that the several checks after branch fs
1571+ ->atomic_open() are lost. in the ordinary case, the checks are
1572+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1573+ be implemented in aufs, but not all I am afraid.
53392da6
AM
1574diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1575--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1576+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 1577@@ -0,0 +1,113 @@
53392da6 1578+
8cdd5066 1579+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1580+#
1581+# This program is free software; you can redistribute it and/or modify
1582+# it under the terms of the GNU General Public License as published by
1583+# the Free Software Foundation; either version 2 of the License, or
1584+# (at your option) any later version.
1585+#
1586+# This program is distributed in the hope that it will be useful,
1587+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1588+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1589+# GNU General Public License for more details.
1590+#
1591+# You should have received a copy of the GNU General Public License
523b37e3 1592+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1593+
1594+Lookup in a Branch
1595+----------------------------------------------------------------------
1596+Since aufs has a character of sub-VFS (see Introduction), it operates
7e9cd9fe
AM
1597+lookup for branches as VFS does. It may be a heavy work. But almost all
1598+lookup operation in aufs is the simplest case, ie. lookup only an entry
1599+directly connected to its parent. Digging down the directory hierarchy
1600+is unnecessary. VFS has a function lookup_one_len() for that use, and
1601+aufs calls it.
1602+
1603+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1604+->d_revalidate(), also aufs forces the hardest revalidate tests for
1605+them.
1606+For d_revalidate, aufs implements three levels of revalidate tests. See
1607+"Revalidate Dentry and UDBA" in detail.
1608+
1609+
076b876e
AM
1610+Test Only the Highest One for the Directory Permission (dirperm1 option)
1611+----------------------------------------------------------------------
1612+Let's try case study.
1613+- aufs has two branches, upper readwrite and lower readonly.
1614+ /au = /rw + /ro
1615+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1616+- user invoked "chmod a+rx /au/dirA"
1617+- the internal copy-up is activated and "/rw/dirA" is created and its
7e9cd9fe 1618+ permission bits are set to world readable.
076b876e
AM
1619+- then "/au/dirA" becomes world readable?
1620+
1621+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1622+or it may be a natively readonly filesystem. If aufs respects the lower
1623+branch, it should not respond readdir request from other users. But user
1624+allowed it by chmod. Should really aufs rejects showing the entries
1625+under /ro/dirA?
1626+
7e9cd9fe
AM
1627+To be honest, I don't have a good solution for this case. So aufs
1628+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1629+users.
076b876e
AM
1630+When dirperm1 is specified, aufs checks only the highest one for the
1631+directory permission, and shows the entries. Otherwise, as usual, checks
1632+every dir existing on all branches and rejects the request.
1633+
1634+As a side effect, dirperm1 option improves the performance of aufs
1635+because the number of permission check is reduced when the number of
1636+branch is many.
1637+
1638+
53392da6
AM
1639+Revalidate Dentry and UDBA (User's Direct Branch Access)
1640+----------------------------------------------------------------------
1641+Generally VFS helpers re-validate a dentry as a part of lookup.
1642+0. digging down the directory hierarchy.
1643+1. lock the parent dir by its i_mutex.
1644+2. lookup the final (child) entry.
1645+3. revalidate it.
1646+4. call the actual operation (create, unlink, etc.)
1647+5. unlock the parent dir
1648+
1649+If the filesystem implements its ->d_revalidate() (step 3), then it is
1650+called. Actually aufs implements it and checks the dentry on a branch is
1651+still valid.
1652+But it is not enough. Because aufs has to release the lock for the
1653+parent dir on a branch at the end of ->lookup() (step 2) and
1654+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1655+held by VFS.
1656+If the file on a branch is changed directly, eg. bypassing aufs, after
1657+aufs released the lock, then the subsequent operation may cause
1658+something unpleasant result.
1659+
1660+This situation is a result of VFS architecture, ->lookup() and
1661+->d_revalidate() is separated. But I never say it is wrong. It is a good
1662+design from VFS's point of view. It is just not suitable for sub-VFS
1663+character in aufs.
1664+
1665+Aufs supports such case by three level of revalidation which is
1666+selectable by user.
1667+1. Simple Revalidate
1668+ Addition to the native flow in VFS's, confirm the child-parent
1669+ relationship on the branch just after locking the parent dir on the
1670+ branch in the "actual operation" (step 4). When this validation
1671+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1672+ checks the validation of the dentry on branches.
1673+2. Monitor Changes Internally by Inotify/Fsnotify
1674+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1675+ the dentry on the branch, and returns EBUSY if it finds different
1676+ dentry.
1677+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1678+ during it is in cache. When the event is notified, aufs registers a
1679+ function to kernel 'events' thread by schedule_work(). And the
1680+ function sets some special status to the cached aufs dentry and inode
1681+ private data. If they are not cached, then aufs has nothing to
1682+ do. When the same file is accessed through aufs (step 0-3) later,
1683+ aufs will detect the status and refresh all necessary data.
1684+ In this mode, aufs has to ignore the event which is fired by aufs
1685+ itself.
1686+3. No Extra Validation
1687+ This is the simplest test and doesn't add any additional revalidation
7e9cd9fe 1688+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1689+ aufs performance when system surely hide the aufs branches from user,
1690+ by over-mounting something (or another method).
1691diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1692--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1693+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 1694@@ -0,0 +1,74 @@
53392da6 1695+
8cdd5066 1696+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1697+#
1698+# This program is free software; you can redistribute it and/or modify
1699+# it under the terms of the GNU General Public License as published by
1700+# the Free Software Foundation; either version 2 of the License, or
1701+# (at your option) any later version.
1702+#
1703+# This program is distributed in the hope that it will be useful,
1704+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1705+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1706+# GNU General Public License for more details.
1707+#
1708+# You should have received a copy of the GNU General Public License
523b37e3 1709+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1710+
1711+Branch Manipulation
1712+
1713+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1714+and changing its permission/attribute, there are a lot of works to do.
1715+
1716+
1717+Add a Branch
1718+----------------------------------------------------------------------
1719+o Confirm the adding dir exists outside of aufs, including loopback
7e9cd9fe 1720+ mount, and its various attributes.
53392da6
AM
1721+o Initialize the xino file and whiteout bases if necessary.
1722+ See struct.txt.
1723+
1724+o Check the owner/group/mode of the directory
1725+ When the owner/group/mode of the adding directory differs from the
1726+ existing branch, aufs issues a warning because it may impose a
1727+ security risk.
1728+ For example, when a upper writable branch has a world writable empty
1729+ top directory, a malicious user can create any files on the writable
1730+ branch directly, like copy-up and modify manually. If something like
1731+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1732+ writable branch, and the writable branch is world-writable, then a
1733+ malicious guy may create /etc/passwd on the writable branch directly
1734+ and the infected file will be valid in aufs.
7e9cd9fe 1735+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1736+ producing a warning.
1737+
1738+
1739+Delete a Branch
1740+----------------------------------------------------------------------
1741+o Confirm the deleting branch is not busy
1742+ To be general, there is one merit to adopt "remount" interface to
1743+ manipulate branches. It is to discard caches. At deleting a branch,
1744+ aufs checks the still cached (and connected) dentries and inodes. If
1745+ there are any, then they are all in-use. An inode without its
1746+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1747+
1748+ For the cached one, aufs checks whether the same named entry exists on
1749+ other branches.
1750+ If the cached one is a directory, because aufs provides a merged view
1751+ to users, as long as one dir is left on any branch aufs can show the
1752+ dir to users. In this case, the branch can be removed from aufs.
1753+ Otherwise aufs rejects deleting the branch.
1754+
1755+ If any file on the deleting branch is opened by aufs, then aufs
1756+ rejects deleting.
1757+
1758+
1759+Modify the Permission of a Branch
1760+----------------------------------------------------------------------
1761+o Re-initialize or remove the xino file and whiteout bases if necessary.
1762+ See struct.txt.
1763+
1764+o rw --> ro: Confirm the modifying branch is not busy
1765+ Aufs rejects the request if any of these conditions are true.
1766+ - a file on the branch is mmap-ed.
1767+ - a regular file on the branch is opened for write and there is no
1768+ same named entry on the upper branch.
1769diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1770--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1771+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2016-02-28 11:26:32.569971135 +0100
523b37e3 1772@@ -0,0 +1,64 @@
53392da6 1773+
8cdd5066 1774+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1775+#
1776+# This program is free software; you can redistribute it and/or modify
1777+# it under the terms of the GNU General Public License as published by
1778+# the Free Software Foundation; either version 2 of the License, or
1779+# (at your option) any later version.
1780+#
1781+# This program is distributed in the hope that it will be useful,
1782+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1783+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1784+# GNU General Public License for more details.
1785+#
1786+# You should have received a copy of the GNU General Public License
523b37e3 1787+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1788+
1789+Policies to Select One among Multiple Writable Branches
1790+----------------------------------------------------------------------
1791+When the number of writable branch is more than one, aufs has to decide
1792+the target branch for file creation or copy-up. By default, the highest
1793+writable branch which has the parent (or ancestor) dir of the target
1794+file is chosen (top-down-parent policy).
1795+By user's request, aufs implements some other policies to select the
7e9cd9fe
AM
1796+writable branch, for file creation several policies, round-robin,
1797+most-free-space, and other policies. For copy-up, top-down-parent,
1798+bottom-up-parent, bottom-up and others.
53392da6
AM
1799+
1800+As expected, the round-robin policy selects the branch in circular. When
1801+you have two writable branches and creates 10 new files, 5 files will be
1802+created for each branch. mkdir(2) systemcall is an exception. When you
1803+create 10 new directories, all will be created on the same branch.
1804+And the most-free-space policy selects the one which has most free
1805+space among the writable branches. The amount of free space will be
1806+checked by aufs internally, and users can specify its time interval.
1807+
1808+The policies for copy-up is more simple,
1809+top-down-parent is equivalent to the same named on in create policy,
1810+bottom-up-parent selects the writable branch where the parent dir
1811+exists and the nearest upper one from the copyup-source,
1812+bottom-up selects the nearest upper writable branch from the
1813+copyup-source, regardless the existence of the parent dir.
1814+
1815+There are some rules or exceptions to apply these policies.
1816+- If there is a readonly branch above the policy-selected branch and
1817+ the parent dir is marked as opaque (a variation of whiteout), or the
1818+ target (creating) file is whiteout-ed on the upper readonly branch,
1819+ then the result of the policy is ignored and the target file will be
1820+ created on the nearest upper writable branch than the readonly branch.
1821+- If there is a writable branch above the policy-selected branch and
1822+ the parent dir is marked as opaque or the target file is whiteouted
1823+ on the branch, then the result of the policy is ignored and the target
1824+ file will be created on the highest one among the upper writable
1825+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1826+ it as usual.
1827+- link(2) and rename(2) systemcalls are exceptions in every policy.
1828+ They try selecting the branch where the source exists as possible
1829+ since copyup a large file will take long time. If it can't be,
1830+ ie. the branch where the source exists is readonly, then they will
1831+ follow the copyup policy.
1832+- There is an exception for rename(2) when the target exists.
1833+ If the rename target exists, aufs compares the index of the branches
1834+ where the source and the target exists and selects the higher
1835+ one. If the selected branch is readonly, then aufs follows the
1836+ copyup policy.
076b876e
AM
1837diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1838--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1839+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2016-02-28 11:26:32.569971135 +0100
076b876e
AM
1840@@ -0,0 +1,120 @@
1841+
8cdd5066 1842+# Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
1843+#
1844+# This program is free software; you can redistribute it and/or modify
1845+# it under the terms of the GNU General Public License as published by
1846+# the Free Software Foundation; either version 2 of the License, or
1847+# (at your option) any later version.
1848+#
1849+# This program is distributed in the hope that it will be useful,
1850+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1851+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1852+# GNU General Public License for more details.
1853+#
1854+# You should have received a copy of the GNU General Public License
1855+# along with this program; if not, write to the Free Software
1856+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1857+
1858+
1859+File-based Hierarchical Storage Management (FHSM)
1860+----------------------------------------------------------------------
1861+Hierarchical Storage Management (or HSM) is a well-known feature in the
1862+storage world. Aufs provides this feature as file-based with multiple
7e9cd9fe 1863+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1864+Here the word "colder" means that the less used files, and "lower" means
7e9cd9fe 1865+that the position in the order of the stacked branches vertically.
076b876e
AM
1866+These multiple writable branches are prioritized, ie. the topmost one
1867+should be the fastest drive and be used heavily.
1868+
1869+o Characters in aufs FHSM story
1870+- aufs itself and a new branch attribute.
1871+- a new ioctl interface to move-down and to establish a connection with
1872+ the daemon ("move-down" is a converse of "copy-up").
1873+- userspace tool and daemon.
1874+
1875+The userspace daemon establishes a connection with aufs and waits for
1876+the notification. The notified information is very similar to struct
1877+statfs containing the number of consumed blocks and inodes.
1878+When the consumed blocks/inodes of a branch exceeds the user-specified
1879+upper watermark, the daemon activates its move-down process until the
1880+consumed blocks/inodes reaches the user-specified lower watermark.
1881+
1882+The actual move-down is done by aufs based upon the request from
1883+user-space since we need to maintain the inode number and the internal
1884+pointer arrays in aufs.
1885+
1886+Currently aufs FHSM handles the regular files only. Additionally they
1887+must not be hard-linked nor pseudo-linked.
1888+
1889+
1890+o Cowork of aufs and the user-space daemon
1891+ During the userspace daemon established the connection, aufs sends a
1892+ small notification to it whenever aufs writes something into the
1893+ writable branch. But it may cost high since aufs issues statfs(2)
1894+ internally. So user can specify a new option to cache the
1895+ info. Actually the notification is controlled by these factors.
1896+ + the specified cache time.
1897+ + classified as "force" by aufs internally.
1898+ Until the specified time expires, aufs doesn't send the info
1899+ except the forced cases. When aufs decide forcing, the info is always
1900+ notified to userspace.
1901+ For example, the number of free inodes is generally large enough and
1902+ the shortage of it happens rarely. So aufs doesn't force the
1903+ notification when creating a new file, directory and others. This is
1904+ the typical case which aufs doesn't force.
1905+ When aufs writes the actual filedata and the files consumes any of new
1906+ blocks, the aufs forces notifying.
1907+
1908+
1909+o Interfaces in aufs
1910+- New branch attribute.
1911+ + fhsm
1912+ Specifies that the branch is managed by FHSM feature. In other word,
1913+ participant in the FHSM.
1914+ When nofhsm is set to the branch, it will not be the source/target
1915+ branch of the move-down operation. This attribute is set
1916+ independently from coo and moo attributes, and if you want full
1917+ FHSM, you should specify them as well.
1918+- New mount option.
1919+ + fhsm_sec
1920+ Specifies a second to suppress many less important info to be
1921+ notified.
1922+- New ioctl.
1923+ + AUFS_CTL_FHSM_FD
1924+ create a new file descriptor which userspace can read the notification
1925+ (a subset of struct statfs) from aufs.
1926+- Module parameter 'brs'
1927+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
1928+ be set.
1929+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
1930+ When there are two or more branches with fhsm attributes,
1931+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
1932+ terminates it. As a result of remounting and branch-manipulation, the
1933+ number of branches with fhsm attribute can be one. In this case,
1934+ /sbin/mount.aufs will terminate the user-space daemon.
1935+
1936+
1937+Finally the operation is done as these steps in kernel-space.
1938+- make sure that,
1939+ + no one else is using the file.
1940+ + the file is not hard-linked.
1941+ + the file is not pseudo-linked.
1942+ + the file is a regular file.
1943+ + the parent dir is not opaqued.
1944+- find the target writable branch.
1945+- make sure the file is not whiteout-ed by the upper (than the target)
1946+ branch.
1947+- make the parent dir on the target branch.
1948+- mutex lock the inode on the branch.
1949+- unlink the whiteout on the target branch (if exists).
1950+- lookup and create the whiteout-ed temporary name on the target branch.
1951+- copy the file as the whiteout-ed temporary name on the target branch.
1952+- rename the whiteout-ed temporary name to the original name.
1953+- unlink the file on the source branch.
1954+- maintain the internal pointer array and the external inode number
1955+ table (XINO).
1956+- maintain the timestamps and other attributes of the parent dir and the
1957+ file.
1958+
1959+And of course, in every step, an error may happen. So the operation
1960+should restore the original file state after an error happens.
53392da6
AM
1961diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1962--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 1963+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2016-02-28 11:26:32.569971135 +0100
b912730e 1964@@ -0,0 +1,72 @@
53392da6 1965+
8cdd5066 1966+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1967+#
1968+# This program is free software; you can redistribute it and/or modify
1969+# it under the terms of the GNU General Public License as published by
1970+# the Free Software Foundation; either version 2 of the License, or
1971+# (at your option) any later version.
1972+#
1973+# This program is distributed in the hope that it will be useful,
1974+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1975+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1976+# GNU General Public License for more details.
1977+#
1978+# You should have received a copy of the GNU General Public License
523b37e3 1979+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1980+
1981+mmap(2) -- File Memory Mapping
1982+----------------------------------------------------------------------
1983+In aufs, the file-mapped pages are handled by a branch fs directly, no
1984+interaction with aufs. It means aufs_mmap() calls the branch fs's
1985+->mmap().
1986+This approach is simple and good, but there is one problem.
7e9cd9fe 1987+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
1988+device and inode number), and the printed path will be the path on the
1989+branch fs's instead of virtual aufs's.
1990+This is not a problem in most cases, but some utilities lsof(1) (and its
1991+user) may expect the path on aufs.
1992+
1993+To address this issue, aufs adds a new member called vm_prfile in struct
1994+vm_area_struct (and struct vm_region). The original vm_file points to
1995+the file on the branch fs in order to handle everything correctly as
1996+usual. The new vm_prfile points to a virtual file in aufs, and the
1997+show-functions in procfs refers to vm_prfile if it is set.
1998+Also we need to maintain several other places where touching vm_file
1999+such like
2000+- fork()/clone() copies vma and the reference count of vm_file is
2001+ incremented.
2002+- merging vma maintains the ref count too.
2003+
7e9cd9fe 2004+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
2005+leaves all behaviour around f_mapping unchanged. This is surely an
2006+advantage.
2007+Actually aufs had adopted another complicated approach which calls
2008+generic_file_mmap() and handles struct vm_operations_struct. In this
2009+approach, aufs met a hard problem and I could not solve it without
2010+switching the approach.
b912730e
AM
2011+
2012+There may be one more another approach which is
2013+- bind-mount the branch-root onto the aufs-root internally
2014+- grab the new vfsmount (ie. struct mount)
2015+- lazy-umount the branch-root internally
2016+- in open(2) the aufs-file, open the branch-file with the hidden
2017+ vfsmount (instead of the original branch's vfsmount)
2018+- ideally this "bind-mount and lazy-umount" should be done atomically,
2019+ but it may be possible from userspace by the mount helper.
2020+
2021+Adding the internal hidden vfsmount and using it in opening a file, the
2022+file path under /proc will be printed correctly. This approach looks
2023+smarter, but is not possible I am afraid.
2024+- aufs-root may be bind-mount later. when it happens, another hidden
2025+ vfsmount will be required.
2026+- it is hard to get the chance to bind-mount and lazy-umount
2027+ + in kernel-space, FS can have vfsmount in open(2) via
2028+ file->f_path, and aufs can know its vfsmount. But several locks are
2029+ already acquired, and if aufs tries to bind-mount and lazy-umount
2030+ here, then it may cause a deadlock.
2031+ + in user-space, bind-mount doesn't invoke the mount helper.
2032+- since /proc shows dev and ino, aufs has to give vma these info. it
2033+ means a new member vm_prinode will be necessary. this is essentially
2034+ equivalent to vm_prfile described above.
2035+
2036+I have to give up this "looks-smater" approach.
c1595e42
JR
2037diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2038--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 2039+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2016-02-28 11:26:32.569971135 +0100
c1595e42
JR
2040@@ -0,0 +1,96 @@
2041+
8cdd5066 2042+# Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
2043+#
2044+# This program is free software; you can redistribute it and/or modify
2045+# it under the terms of the GNU General Public License as published by
2046+# the Free Software Foundation; either version 2 of the License, or
2047+# (at your option) any later version.
2048+#
2049+# This program is distributed in the hope that it will be useful,
2050+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2051+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2052+# GNU General Public License for more details.
2053+#
2054+# You should have received a copy of the GNU General Public License
2055+# along with this program; if not, write to the Free Software
2056+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2057+
2058+
2059+Listing XATTR/EA and getting the value
2060+----------------------------------------------------------------------
2061+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2062+shows the values from the topmost existing file. This behaviour is good
7e9cd9fe 2063+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
2064+information. But for the directories, aufs considers all the same named
2065+entries on the lower branches. Which means, if one of the lower entry
2066+rejects readdir call, then aufs returns an error even if the topmost
2067+entry allows it. This behaviour is necessary to respect the branch fs's
2068+security, but can make users confused since the user-visible standard
2069+attributes don't match the behaviour.
2070+To address this issue, aufs has a mount option called dirperm1 which
2071+checks the permission for the topmost entry only, and ignores the lower
2072+entry's permission.
2073+
2074+A similar issue can happen around XATTR.
2075+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7e9cd9fe
AM
2076+always set. Otherwise these very unpleasant situation would happen.
2077+- listxattr(2) may return the duplicated entries.
c1595e42
JR
2078+- users may not be able to remove or reset the XATTR forever,
2079+
2080+
2081+XATTR/EA support in the internal (copy,move)-(up,down)
2082+----------------------------------------------------------------------
7e9cd9fe 2083+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
2084+- "security" for LSM and capability.
2085+- "system" for posix ACL, 'acl' mount option is required for the branch
2086+ fs generally.
2087+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2088+- "user" for userspace, 'user_xattr' mount option is required for the
2089+ branch fs generally.
2090+
2091+Moreover there are some other categories. Aufs handles these rather
2092+unpopular categories as the ordinary ones, ie. there is no special
2093+condition nor exception.
2094+
2095+In copy-up, the support for XATTR on the dst branch may differ from the
2096+src branch. In this case, the copy-up operation will get an error and
7e9cd9fe
AM
2097+the original user operation which triggered the copy-up will fail. It
2098+can happen that even all copy-up will fail.
c1595e42
JR
2099+When both of src and dst branches support XATTR and if an error occurs
2100+during copying XATTR, then the copy-up should fail obviously. That is a
2101+good reason and aufs should return an error to userspace. But when only
7e9cd9fe 2102+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
2103+For example, the src branch supports ACL but the dst branch doesn't
2104+because the dst branch may natively un-support it or temporary
2105+un-support it due to "noacl" mount option. Of course, the dst branch fs
2106+may NOT return an error even if the XATTR is not supported. It is
2107+totally up to the branch fs.
2108+
2109+Anyway when the aufs internal copy-up gets an error from the dst branch
2110+fs, then aufs tries removing the just copied entry and returns the error
2111+to the userspace. The worst case of this situation will be all copy-up
2112+will fail.
2113+
2114+For the copy-up operation, there two basic approaches.
2115+- copy the specified XATTR only (by category above), and return the
7e9cd9fe 2116+ error unconditionally if it happens.
c1595e42
JR
2117+- copy all XATTR, and ignore the error on the specified category only.
2118+
2119+In order to support XATTR and to implement the correct behaviour, aufs
7e9cd9fe
AM
2120+chooses the latter approach and introduces some new branch attributes,
2121+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 2122+They correspond to the XATTR namespaces (see above). Additionally, to be
7e9cd9fe
AM
2123+convenient, "icex" is also provided which means all "icex*" attributes
2124+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
2125+
2126+The meaning of these attributes is to ignore the error from setting
2127+XATTR on that branch.
2128+Note that aufs tries copying all XATTR unconditionally, and ignores the
2129+error from the dst branch according to the specified attributes.
2130+
2131+Some XATTR may have its default value. The default value may come from
2132+the parent dir or the environment. If the default value is set at the
2133+file creating-time, it will be overwritten by copy-up.
2134+Some contradiction may happen I am afraid.
2135+Do we need another attribute to stop copying XATTR? I am unsure. For
2136+now, aufs implements the branch attributes to ignore the error.
53392da6
AM
2137diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2138--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 2139+++ linux/Documentation/filesystems/aufs/design/07export.txt 2016-02-28 11:26:32.569971135 +0100
523b37e3 2140@@ -0,0 +1,58 @@
53392da6 2141+
8cdd5066 2142+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2143+#
2144+# This program is free software; you can redistribute it and/or modify
2145+# it under the terms of the GNU General Public License as published by
2146+# the Free Software Foundation; either version 2 of the License, or
2147+# (at your option) any later version.
2148+#
2149+# This program is distributed in the hope that it will be useful,
2150+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2151+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2152+# GNU General Public License for more details.
2153+#
2154+# You should have received a copy of the GNU General Public License
523b37e3 2155+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2156+
2157+Export Aufs via NFS
2158+----------------------------------------------------------------------
2159+Here is an approach.
2160+- like xino/xib, add a new file 'xigen' which stores aufs inode
2161+ generation.
2162+- iget_locked(): initialize aufs inode generation for a new inode, and
2163+ store it in xigen file.
2164+- destroy_inode(): increment aufs inode generation and store it in xigen
2165+ file. it is necessary even if it is not unlinked, because any data of
2166+ inode may be changed by UDBA.
2167+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2168+ build file handle by
2169+ + branch id (4 bytes)
2170+ + superblock generation (4 bytes)
2171+ + inode number (4 or 8 bytes)
2172+ + parent dir inode number (4 or 8 bytes)
2173+ + inode generation (4 bytes))
2174+ + return value of exportfs_encode_fh() for the parent on a branch (4
2175+ bytes)
2176+ + file handle for a branch (by exportfs_encode_fh())
2177+- fh_to_dentry():
2178+ + find the index of a branch from its id in handle, and check it is
2179+ still exist in aufs.
2180+ + 1st level: get the inode number from handle and search it in cache.
7e9cd9fe
AM
2181+ + 2nd level: if not found in cache, get the parent inode number from
2182+ the handle and search it in cache. and then open the found parent
2183+ dir, find the matching inode number by vfs_readdir() and get its
2184+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
2185+ + 3rd level: if the parent dir is not cached, call
2186+ exportfs_decode_fh() for a branch and get the parent on a branch,
2187+ build a pathname of it, convert it a pathname in aufs, call
2188+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2189+ the 2nd level.
2190+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2191+ for every branch, but not itself. to get this, (currently) aufs
2192+ searches in current->nsproxy->mnt_ns list. it may not be a good
2193+ idea, but I didn't get other approach.
2194+ + test the generation of the gotten inode.
2195+- every inode operation: they may get EBUSY due to UDBA. in this case,
2196+ convert it into ESTALE for NFSD.
2197+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2198+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2199diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2200--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 2201+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2016-02-28 11:26:32.569971135 +0100
523b37e3 2202@@ -0,0 +1,52 @@
53392da6 2203+
8cdd5066 2204+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2205+#
2206+# This program is free software; you can redistribute it and/or modify
2207+# it under the terms of the GNU General Public License as published by
2208+# the Free Software Foundation; either version 2 of the License, or
2209+# (at your option) any later version.
2210+#
2211+# This program is distributed in the hope that it will be useful,
2212+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2213+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2214+# GNU General Public License for more details.
2215+#
2216+# You should have received a copy of the GNU General Public License
523b37e3 2217+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2218+
2219+Show Whiteout Mode (shwh)
2220+----------------------------------------------------------------------
2221+Generally aufs hides the name of whiteouts. But in some cases, to show
2222+them is very useful for users. For instance, creating a new middle layer
2223+(branch) by merging existing layers.
2224+
2225+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2226+When you have three branches,
2227+- Bottom: 'system', squashfs (underlying base system), read-only
2228+- Middle: 'mods', squashfs, read-only
2229+- Top: 'overlay', ram (tmpfs), read-write
2230+
2231+The top layer is loaded at boot time and saved at shutdown, to preserve
2232+the changes made to the system during the session.
2233+When larger changes have been made, or smaller changes have accumulated,
2234+the size of the saved top layer data grows. At this point, it would be
2235+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2236+and rewrite the 'mods' squashfs, clearing the top layer and thus
2237+restoring save and load speed.
2238+
2239+This merging is simplified by the use of another aufs mount, of just the
2240+two overlay branches using the 'shwh' option.
2241+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2242+ aufs /livesys/merge_union
2243+
2244+A merged view of these two branches is then available at
2245+/livesys/merge_union, and the new feature is that the whiteouts are
2246+visible!
2247+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2248+writing to all branches. Also the default mode for all branches is 'ro'.
2249+It is now possible to save the combined contents of the two overlay
2250+branches to a new squashfs, e.g.:
2251+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2252+
2253+This new squashfs archive can be stored on the boot device and the
2254+initramfs will use it to replace the old one at the next boot.
2255diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2256--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
8cdd5066 2257+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 2258@@ -0,0 +1,47 @@
53392da6 2259+
8cdd5066 2260+# Copyright (C) 2010-2016 Junjiro R. Okajima
53392da6
AM
2261+#
2262+# This program is free software; you can redistribute it and/or modify
2263+# it under the terms of the GNU General Public License as published by
2264+# the Free Software Foundation; either version 2 of the License, or
2265+# (at your option) any later version.
2266+#
2267+# This program is distributed in the hope that it will be useful,
2268+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2269+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2270+# GNU General Public License for more details.
2271+#
2272+# You should have received a copy of the GNU General Public License
523b37e3 2273+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2274+
2275+Dynamically customizable FS operations
2276+----------------------------------------------------------------------
2277+Generally FS operations (struct inode_operations, struct
2278+address_space_operations, struct file_operations, etc.) are defined as
2279+"static const", but it never means that FS have only one set of
2280+operation. Some FS have multiple sets of them. For instance, ext2 has
2281+three sets, one for XIP, for NOBH, and for normal.
2282+Since aufs overrides and redirects these operations, sometimes aufs has
7e9cd9fe 2283+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
2284+VFS acts differently if a function (member in the struct) is set or
2285+not. It means aufs should have several sets of operations and select one
2286+among them according to the branch FS definition.
2287+
7e9cd9fe 2288+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 2289+aufs defines these operations dynamically. For instance, aufs defines
7e9cd9fe
AM
2290+dummy direct_IO function for struct address_space_operations, but it may
2291+not be set to the address_space_operations actually. When the branch FS
2292+doesn't have it, aufs doesn't set it to its address_space_operations
2293+while the function definition itself is still alive. So the behaviour
2294+itself will not change, and it will return an error when direct_IO is
2295+not set.
53392da6
AM
2296+
2297+The lifetime of these dynamically generated operation object is
2298+maintained by aufs branch object. When the branch is removed from aufs,
2299+the reference counter of the object is decremented. When it reaches
2300+zero, the dynamically generated operation object will be freed.
2301+
7e9cd9fe
AM
2302+This approach is designed to support AIO (io_submit), Direct I/O and
2303+XIP (DAX) mainly.
2304+Currently this approach is applied to address_space_operations for
2305+regular files only.
53392da6
AM
2306diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2307--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
2308+++ linux/Documentation/filesystems/aufs/README 2016-02-28 11:26:32.569971135 +0100
2309@@ -0,0 +1,391 @@
53392da6 2310+
5527c038 2311+Aufs4 -- advanced multi layered unification filesystem version 4.x
53392da6
AM
2312+http://aufs.sf.net
2313+Junjiro R. Okajima
2314+
2315+
2316+0. Introduction
2317+----------------------------------------
2318+In the early days, aufs was entirely re-designed and re-implemented
7e9cd9fe 2319+Unionfs Version 1.x series. Adding many original ideas, approaches,
53392da6
AM
2320+improvements and implementations, it becomes totally different from
2321+Unionfs while keeping the basic features.
2322+Recently, Unionfs Version 2.x series begin taking some of the same
2323+approaches to aufs1's.
2324+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2325+University and his team.
2326+
5527c038 2327+Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
53392da6
AM
2328+If you want older kernel version support, try aufs2-2.6.git or
2329+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2330+
2331+Note: it becomes clear that "Aufs was rejected. Let's give it up."
38d290e6
JR
2332+ According to Christoph Hellwig, linux rejects all union-type
2333+ filesystems but UnionMount.
53392da6
AM
2334+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2335+
38d290e6
JR
2336+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2337+ UnionMount, and he pointed out an issue around a directory mutex
2338+ lock and aufs addressed it. But it is still unsure whether aufs will
2339+ be merged (or any other union solution).
076b876e 2340+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
38d290e6 2341+
53392da6
AM
2342+
2343+1. Features
2344+----------------------------------------
2345+- unite several directories into a single virtual filesystem. The member
2346+ directory is called as a branch.
2347+- you can specify the permission flags to the branch, which are 'readonly',
2348+ 'readwrite' and 'whiteout-able.'
2349+- by upper writable branch, internal copyup and whiteout, files/dirs on
2350+ readonly branch are modifiable logically.
2351+- dynamic branch manipulation, add, del.
2352+- etc...
2353+
7e9cd9fe
AM
2354+Also there are many enhancements in aufs, such as:
2355+- test only the highest one for the directory permission (dirperm1)
2356+- copyup on open (coo=)
2357+- 'move' policy for copy-up between two writable branches, after
2358+ checking free space.
2359+- xattr, acl
53392da6
AM
2360+- readdir(3) in userspace.
2361+- keep inode number by external inode number table
2362+- keep the timestamps of file/dir in internal copyup operation
2363+- seekable directory, supporting NFS readdir.
2364+- whiteout is hardlinked in order to reduce the consumption of inodes
2365+ on branch
2366+- do not copyup, nor create a whiteout when it is unnecessary
2367+- revert a single systemcall when an error occurs in aufs
2368+- remount interface instead of ioctl
2369+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2370+- loopback mounted filesystem as a branch
2371+- kernel thread for removing the dir who has a plenty of whiteouts
2372+- support copyup sparse file (a file which has a 'hole' in it)
2373+- default permission flags for branches
2374+- selectable permission flags for ro branch, whether whiteout can
2375+ exist or not
2376+- export via NFS.
2377+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2378+- support multiple writable branches, some policies to select one
2379+ among multiple writable branches.
2380+- a new semantics for link(2) and rename(2) to support multiple
2381+ writable branches.
2382+- no glibc changes are required.
2383+- pseudo hardlink (hardlink over branches)
2384+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2385+ including NFS or remote filesystem branch.
2386+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2387+- and more...
2388+
5527c038 2389+Currently these features are dropped temporary from aufs4.
53392da6 2390+See design/08plan.txt in detail.
53392da6
AM
2391+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2392+ (robr)
2393+- statistics of aufs thread (/sys/fs/aufs/stat)
53392da6
AM
2394+
2395+Features or just an idea in the future (see also design/*.txt),
2396+- reorder the branch index without del/re-add.
2397+- permanent xino files for NFSD
2398+- an option for refreshing the opened files after add/del branches
53392da6
AM
2399+- light version, without branch manipulation. (unnecessary?)
2400+- copyup in userspace
2401+- inotify in userspace
2402+- readv/writev
53392da6
AM
2403+
2404+
2405+2. Download
2406+----------------------------------------
5527c038
JR
2407+There are three GIT trees for aufs4, aufs4-linux.git,
2408+aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
1e00d052 2409+"aufs-util.git."
5527c038
JR
2410+While the aufs-util is always necessary, you need either of aufs4-linux
2411+or aufs4-standalone.
1e00d052 2412+
5527c038 2413+The aufs4-linux tree includes the whole linux mainline GIT tree,
1e00d052
AM
2414+git://git.kernel.org/.../torvalds/linux.git.
2415+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
5527c038 2416+build aufs4 as an external kernel module.
2000de60 2417+Several extra patches are not included in this tree. Only
be52b249 2418+aufs4-standalone tree contains them. They are described in the later
2000de60 2419+section "Configuration and Compilation."
1e00d052 2420+
5527c038 2421+On the other hand, the aufs4-standalone tree has only aufs source files
53392da6 2422+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2000de60 2423+But you need to apply all aufs patches manually.
53392da6 2424+
5527c038
JR
2425+You will find GIT branches whose name is in form of "aufs4.x" where "x"
2426+represents the linux kernel version, "linux-4.x". For instance,
2427+"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
2428+"aufs4.x-rcN" branch.
1e00d052 2429+
5527c038 2430+o aufs4-linux tree
1e00d052 2431+$ git clone --reference /your/linux/git/tree \
5527c038 2432+ git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
1e00d052 2433+- if you don't have linux GIT tree, then remove "--reference ..."
5527c038
JR
2434+$ cd aufs4-linux.git
2435+$ git checkout origin/aufs4.0
53392da6 2436+
2000de60
JR
2437+Or You may want to directly git-pull aufs into your linux GIT tree, and
2438+leave the patch-work to GIT.
2439+$ cd /your/linux/git/tree
5527c038
JR
2440+$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
2441+$ git fetch aufs4
2442+$ git checkout -b my4.0 v4.0
2443+$ (add your local change...)
2444+$ git pull aufs4 aufs4.0
2445+- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
2000de60 2446+- you may need to solve some conflicts between your_changes and
5527c038
JR
2447+ aufs4.0. in this case, git-rerere is recommended so that you can
2448+ solve the similar conflicts automatically when you upgrade to 4.1 or
2000de60
JR
2449+ later in the future.
2450+
5527c038
JR
2451+o aufs4-standalone tree
2452+$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
2453+$ cd aufs4-standalone.git
2454+$ git checkout origin/aufs4.0
53392da6
AM
2455+
2456+o aufs-util tree
5527c038
JR
2457+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2458+- note that the public aufs-util.git is on SourceForge instead of
2459+ GitHUB.
53392da6 2460+$ cd aufs-util.git
5527c038 2461+$ git checkout origin/aufs4.0
53392da6 2462+
5527c038
JR
2463+Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
2464+The minor version number, 'x' in '4.x', of aufs may not always
9dbd164d
AM
2465+follow the minor version number of the kernel.
2466+Because changes in the kernel that cause the use of a new
2467+minor version number do not always require changes to aufs-util.
2468+
2469+Since aufs-util has its own minor version number, you may not be
2470+able to find a GIT branch in aufs-util for your kernel's
2471+exact minor version number.
2472+In this case, you should git-checkout the branch for the
53392da6 2473+nearest lower number.
9dbd164d
AM
2474+
2475+For (an unreleased) example:
5527c038
JR
2476+If you are using "linux-4.10" and the "aufs4.10" branch
2477+does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
9dbd164d
AM
2478+or something numerically smaller is the branch for your kernel.
2479+
53392da6
AM
2480+Also you can view all branches by
2481+ $ git branch -a
2482+
2483+
2484+3. Configuration and Compilation
2485+----------------------------------------
2486+Make sure you have git-checkout'ed the correct branch.
2487+
5527c038 2488+For aufs4-linux tree,
c06a8ce3 2489+- enable CONFIG_AUFS_FS.
1e00d052
AM
2490+- set other aufs configurations if necessary.
2491+
5527c038 2492+For aufs4-standalone tree,
53392da6
AM
2493+There are several ways to build.
2494+
2495+1.
5527c038
JR
2496+- apply ./aufs4-kbuild.patch to your kernel source files.
2497+- apply ./aufs4-base.patch too.
2498+- apply ./aufs4-mmap.patch too.
2499+- apply ./aufs4-standalone.patch too, if you have a plan to set
2500+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
537831f9
AM
2501+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2502+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
c06a8ce3 2503+- enable CONFIG_AUFS_FS, you can select either
53392da6
AM
2504+ =m or =y.
2505+- and build your kernel as usual.
2506+- install the built kernel.
c06a8ce3
AM
2507+ Note: Since linux-3.9, every filesystem module requires an alias
2508+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2509+ modules.aliases file if you set CONFIG_AUFS_FS=m.
7eafdf33
AM
2510+- install the header files too by "make headers_install" to the
2511+ directory where you specify. By default, it is $PWD/usr.
b4510431 2512+ "make help" shows a brief note for headers_install.
53392da6
AM
2513+- and reboot your system.
2514+
2515+2.
2516+- module only (CONFIG_AUFS_FS=m).
5527c038
JR
2517+- apply ./aufs4-base.patch to your kernel source files.
2518+- apply ./aufs4-mmap.patch too.
2519+- apply ./aufs4-standalone.patch too.
53392da6
AM
2520+- build your kernel, don't forget "make headers_install", and reboot.
2521+- edit ./config.mk and set other aufs configurations if necessary.
b4510431 2522+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
53392da6
AM
2523+ every aufs configurations.
2524+- build the module by simple "make".
c06a8ce3
AM
2525+ Note: Since linux-3.9, every filesystem module requires an alias
2526+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2527+ modules.aliases file.
53392da6
AM
2528+- you can specify ${KDIR} make variable which points to your kernel
2529+ source tree.
2530+- install the files
2531+ + run "make install" to install the aufs module, or copy the built
b4510431
AM
2532+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2533+ + run "make install_headers" (instead of headers_install) to install
2534+ the modified aufs header file (you can specify DESTDIR which is
2535+ available in aufs standalone version's Makefile only), or copy
2536+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2537+ you like manually. By default, the target directory is $PWD/usr.
5527c038 2538+- no need to apply aufs4-kbuild.patch, nor copying source files to your
53392da6
AM
2539+ kernel source tree.
2540+
b4510431 2541+Note: The header file aufs_type.h is necessary to build aufs-util
53392da6
AM
2542+ as well as "make headers_install" in the kernel source tree.
2543+ headers_install is subject to be forgotten, but it is essentially
2544+ necessary, not only for building aufs-util.
2545+ You may not meet problems without headers_install in some older
2546+ version though.
2547+
2548+And then,
2549+- read README in aufs-util, build and install it
9dbd164d
AM
2550+- note that your distribution may contain an obsoleted version of
2551+ aufs_type.h in /usr/include/linux or something. When you build aufs
2552+ utilities, make sure that your compiler refers the correct aufs header
2553+ file which is built by "make headers_install."
53392da6
AM
2554+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2555+ then run "make install_ulib" too. And refer to the aufs manual in
2556+ detail.
2557+
5527c038 2558+There several other patches in aufs4-standalone.git. They are all
38d290e6 2559+optional. When you meet some problems, they will help you.
5527c038 2560+- aufs4-loopback.patch
38d290e6
JR
2561+ Supports a nested loopback mount in a branch-fs. This patch is
2562+ unnecessary until aufs produces a message like "you may want to try
2563+ another patch for loopback file".
2564+- vfs-ino.patch
2565+ Modifies a system global kernel internal function get_next_ino() in
2566+ order to stop assigning 0 for an inode-number. Not directly related to
2567+ aufs, but recommended generally.
2568+- tmpfs-idr.patch
2569+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2570+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2571+ duplication of inode number, which is important for backup tools and
2572+ other utilities. When you find aufs XINO files for tmpfs branch
2573+ growing too much, try this patch.
be52b249
AM
2574+- lockdep-debug.patch
2575+ Because aufs is not only an ordinary filesystem (callee of VFS), but
2576+ also a caller of VFS functions for branch filesystems, subclassing of
2577+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
2578+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
2579+ need to apply this debug patch to expand several constant values.
2580+ If don't know what LOCKDEP, then you don't have apply this patch.
38d290e6 2581+
53392da6
AM
2582+
2583+4. Usage
2584+----------------------------------------
2585+At first, make sure aufs-util are installed, and please read the aufs
2586+manual, aufs.5 in aufs-util.git tree.
2587+$ man -l aufs.5
2588+
2589+And then,
2590+$ mkdir /tmp/rw /tmp/aufs
2591+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2592+
2593+Here is another example. The result is equivalent.
2594+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2595+ Or
2596+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2597+# mount -o remount,append:${HOME} /tmp/aufs
2598+
2599+Then, you can see whole tree of your home dir through /tmp/aufs. If
2600+you modify a file under /tmp/aufs, the one on your home directory is
2601+not affected, instead the same named file will be newly created under
2602+/tmp/rw. And all of your modification to a file will be applied to
2603+the one under /tmp/rw. This is called the file based Copy on Write
2604+(COW) method.
2605+Aufs mount options are described in aufs.5.
2606+If you run chroot or something and make your aufs as a root directory,
2607+then you need to customize the shutdown script. See the aufs manual in
2608+detail.
2609+
2610+Additionally, there are some sample usages of aufs which are a
2611+diskless system with network booting, and LiveCD over NFS.
2612+See sample dir in CVS tree on SourceForge.
2613+
2614+
2615+5. Contact
2616+----------------------------------------
2617+When you have any problems or strange behaviour in aufs, please let me
2618+know with:
2619+- /proc/mounts (instead of the output of mount(8))
2620+- /sys/module/aufs/*
2621+- /sys/fs/aufs/* (if you have them)
2622+- /debug/aufs/* (if you have them)
2623+- linux kernel version
2624+ if your kernel is not plain, for example modified by distributor,
2625+ the url where i can download its source is necessary too.
2626+- aufs version which was printed at loading the module or booting the
2627+ system, instead of the date you downloaded.
2628+- configuration (define/undefine CONFIG_AUFS_xxx)
2629+- kernel configuration or /proc/config.gz (if you have it)
2630+- behaviour which you think to be incorrect
2631+- actual operation, reproducible one is better
2632+- mailto: aufs-users at lists.sourceforge.net
2633+
2634+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2635+and Feature Requests) on SourceForge. Please join and write to
2636+aufs-users ML.
2637+
2638+
2639+6. Acknowledgements
2640+----------------------------------------
2641+Thanks to everyone who have tried and are using aufs, whoever
2642+have reported a bug or any feedback.
2643+
2644+Especially donators:
2645+Tomas Matejicek(slax.org) made a donation (much more than once).
2646+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2647+ scripts) is making "doubling" donations.
2648+ Unfortunately I cannot list all of the donators, but I really
b4510431 2649+ appreciate.
53392da6
AM
2650+ It ends Aug 2010, but the ordinary donation URL is still available.
2651+ <http://sourceforge.net/donate/index.php?group_id=167503>
2652+Dai Itasaka made a donation (2007/8).
2653+Chuck Smith made a donation (2008/4, 10 and 12).
2654+Henk Schoneveld made a donation (2008/9).
2655+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2656+Francois Dupoux made a donation (2008/11).
2657+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2658+ aufs2 GIT tree (2009/2).
2659+William Grant made a donation (2009/3).
2660+Patrick Lane made a donation (2009/4).
2661+The Mail Archive (mail-archive.com) made donations (2009/5).
2662+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2663+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2664+Pavel Pronskiy made a donation (2011/2).
2665+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2666+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
537831f9
AM
2667+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2668+11).
1e00d052 2669+Sam Liddicott made a donation (2011/9).
86dc4139
AM
2670+Era Scarecrow made a donation (2013/4).
2671+Bor Ratajc made a donation (2013/4).
2672+Alessandro Gorreta made a donation (2013/4).
2673+POIRETTE Marc made a donation (2013/4).
2674+Alessandro Gorreta made a donation (2013/4).
2675+lauri kasvandik made a donation (2013/5).
392086de 2676+"pemasu from Finland" made a donation (2013/7).
523b37e3
AM
2677+The Parted Magic Project made a donation (2013/9 and 11).
2678+Pavel Barta made a donation (2013/10).
38d290e6 2679+Nikolay Pertsev made a donation (2014/5).
c2c0f25c 2680+James B made a donation (2014/7 and 2015/7).
076b876e 2681+Stefano Di Biase made a donation (2014/8).
2000de60 2682+Daniel Epellei made a donation (2015/1).
8cdd5066 2683+OmegaPhil made a donation (2016/1).
53392da6
AM
2684+
2685+Thank you very much.
2686+Donations are always, including future donations, very important and
2687+helpful for me to keep on developing aufs.
2688+
2689+
2690+7.
2691+----------------------------------------
2692+If you are an experienced user, no explanation is needed. Aufs is
2693+just a linux filesystem.
2694+
2695+
2696+Enjoy!
2697+
2698+# Local variables: ;
2699+# mode: text;
2700+# End: ;
7f207e10
AM
2701diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2702--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 2703+++ linux/fs/aufs/aufs.h 2016-02-28 11:26:32.569971135 +0100
523b37e3 2704@@ -0,0 +1,59 @@
7f207e10 2705+/*
8cdd5066 2706+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2707+ *
2708+ * This program, aufs is free software; you can redistribute it and/or modify
2709+ * it under the terms of the GNU General Public License as published by
2710+ * the Free Software Foundation; either version 2 of the License, or
2711+ * (at your option) any later version.
2712+ *
2713+ * This program is distributed in the hope that it will be useful,
2714+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2715+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2716+ * GNU General Public License for more details.
2717+ *
2718+ * You should have received a copy of the GNU General Public License
523b37e3 2719+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2720+ */
2721+
2722+/*
2723+ * all header files
2724+ */
2725+
2726+#ifndef __AUFS_H__
2727+#define __AUFS_H__
2728+
2729+#ifdef __KERNEL__
2730+
2731+#define AuStub(type, name, body, ...) \
2732+ static inline type name(__VA_ARGS__) { body; }
2733+
2734+#define AuStubVoid(name, ...) \
2735+ AuStub(void, name, , __VA_ARGS__)
2736+#define AuStubInt0(name, ...) \
2737+ AuStub(int, name, return 0, __VA_ARGS__)
2738+
2739+#include "debug.h"
2740+
2741+#include "branch.h"
2742+#include "cpup.h"
2743+#include "dcsub.h"
2744+#include "dbgaufs.h"
2745+#include "dentry.h"
2746+#include "dir.h"
2747+#include "dynop.h"
2748+#include "file.h"
2749+#include "fstype.h"
2750+#include "inode.h"
2751+#include "loop.h"
2752+#include "module.h"
7f207e10
AM
2753+#include "opts.h"
2754+#include "rwsem.h"
2755+#include "spl.h"
2756+#include "super.h"
2757+#include "sysaufs.h"
2758+#include "vfsub.h"
2759+#include "whout.h"
2760+#include "wkq.h"
2761+
2762+#endif /* __KERNEL__ */
2763+#endif /* __AUFS_H__ */
2764diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2765--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 2766+++ linux/fs/aufs/branch.c 2016-02-28 11:26:32.569971135 +0100
be52b249 2767@@ -0,0 +1,1407 @@
7f207e10 2768+/*
8cdd5066 2769+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2770+ *
2771+ * This program, aufs is free software; you can redistribute it and/or modify
2772+ * it under the terms of the GNU General Public License as published by
2773+ * the Free Software Foundation; either version 2 of the License, or
2774+ * (at your option) any later version.
2775+ *
2776+ * This program is distributed in the hope that it will be useful,
2777+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2778+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2779+ * GNU General Public License for more details.
2780+ *
2781+ * You should have received a copy of the GNU General Public License
523b37e3 2782+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2783+ */
2784+
2785+/*
2786+ * branch management
2787+ */
2788+
027c5e7a 2789+#include <linux/compat.h>
7f207e10
AM
2790+#include <linux/statfs.h>
2791+#include "aufs.h"
2792+
2793+/*
2794+ * free a single branch
1facf9fc 2795+ */
2796+static void au_br_do_free(struct au_branch *br)
2797+{
2798+ int i;
2799+ struct au_wbr *wbr;
4a4d8108 2800+ struct au_dykey **key;
1facf9fc 2801+
027c5e7a
AM
2802+ au_hnotify_fin_br(br);
2803+
1facf9fc 2804+ if (br->br_xino.xi_file)
2805+ fput(br->br_xino.xi_file);
2806+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2807+
2808+ AuDebugOn(atomic_read(&br->br_count));
2809+
2810+ wbr = br->br_wbr;
2811+ if (wbr) {
2812+ for (i = 0; i < AuBrWh_Last; i++)
2813+ dput(wbr->wbr_wh[i]);
2814+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2815+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2816+ }
2817+
076b876e
AM
2818+ if (br->br_fhsm) {
2819+ au_br_fhsm_fin(br->br_fhsm);
2820+ kfree(br->br_fhsm);
2821+ }
2822+
4a4d8108
AM
2823+ key = br->br_dykey;
2824+ for (i = 0; i < AuBrDynOp; i++, key++)
2825+ if (*key)
2826+ au_dy_put(*key);
2827+ else
2828+ break;
2829+
537831f9
AM
2830+ /* recursive lock, s_umount of branch's */
2831+ lockdep_off();
86dc4139 2832+ path_put(&br->br_path);
537831f9 2833+ lockdep_on();
1facf9fc 2834+ kfree(wbr);
2835+ kfree(br);
2836+}
2837+
2838+/*
2839+ * frees all branches
2840+ */
2841+void au_br_free(struct au_sbinfo *sbinfo)
2842+{
2843+ aufs_bindex_t bmax;
2844+ struct au_branch **br;
2845+
dece6358
AM
2846+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2847+
1facf9fc 2848+ bmax = sbinfo->si_bend + 1;
2849+ br = sbinfo->si_branch;
2850+ while (bmax--)
2851+ au_br_do_free(*br++);
2852+}
2853+
2854+/*
2855+ * find the index of a branch which is specified by @br_id.
2856+ */
2857+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2858+{
2859+ aufs_bindex_t bindex, bend;
2860+
2861+ bend = au_sbend(sb);
2862+ for (bindex = 0; bindex <= bend; bindex++)
2863+ if (au_sbr_id(sb, bindex) == br_id)
2864+ return bindex;
2865+ return -1;
2866+}
2867+
2868+/* ---------------------------------------------------------------------- */
2869+
2870+/*
2871+ * add a branch
2872+ */
2873+
b752ccd1
AM
2874+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2875+ struct dentry *h_root)
1facf9fc 2876+{
b752ccd1
AM
2877+ if (unlikely(h_adding == h_root
2878+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2879+ return 1;
b752ccd1
AM
2880+ if (h_adding->d_sb != h_root->d_sb)
2881+ return 0;
2882+ return au_test_subdir(h_adding, h_root)
2883+ || au_test_subdir(h_root, h_adding);
1facf9fc 2884+}
2885+
2886+/*
2887+ * returns a newly allocated branch. @new_nbranch is a number of branches
2888+ * after adding a branch.
2889+ */
2890+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
2891+ int perm)
2892+{
2893+ struct au_branch *add_branch;
2894+ struct dentry *root;
5527c038 2895+ struct inode *inode;
4a4d8108 2896+ int err;
1facf9fc 2897+
4a4d8108 2898+ err = -ENOMEM;
1facf9fc 2899+ root = sb->s_root;
be52b249 2900+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
1facf9fc 2901+ if (unlikely(!add_branch))
2902+ goto out;
2903+
027c5e7a
AM
2904+ err = au_hnotify_init_br(add_branch, perm);
2905+ if (unlikely(err))
2906+ goto out_br;
2907+
1facf9fc 2908+ if (au_br_writable(perm)) {
2909+ /* may be freed separately at changing the branch permission */
be52b249 2910+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
1facf9fc 2911+ GFP_NOFS);
2912+ if (unlikely(!add_branch->br_wbr))
027c5e7a 2913+ goto out_hnotify;
1facf9fc 2914+ }
2915+
076b876e
AM
2916+ if (au_br_fhsm(perm)) {
2917+ err = au_fhsm_br_alloc(add_branch);
2918+ if (unlikely(err))
2919+ goto out_wbr;
2920+ }
2921+
4a4d8108
AM
2922+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
2923+ if (!err)
2924+ err = au_di_realloc(au_di(root), new_nbranch);
5527c038
JR
2925+ if (!err) {
2926+ inode = d_inode(root);
2927+ err = au_ii_realloc(au_ii(inode), new_nbranch);
2928+ }
4a4d8108
AM
2929+ if (!err)
2930+ return add_branch; /* success */
1facf9fc 2931+
076b876e 2932+out_wbr:
1facf9fc 2933+ kfree(add_branch->br_wbr);
027c5e7a
AM
2934+out_hnotify:
2935+ au_hnotify_fin_br(add_branch);
4f0767ce 2936+out_br:
1facf9fc 2937+ kfree(add_branch);
4f0767ce 2938+out:
4a4d8108 2939+ return ERR_PTR(err);
1facf9fc 2940+}
2941+
2942+/*
2943+ * test if the branch permission is legal or not.
2944+ */
2945+static int test_br(struct inode *inode, int brperm, char *path)
2946+{
2947+ int err;
2948+
4a4d8108
AM
2949+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
2950+ if (!err)
2951+ goto out;
1facf9fc 2952+
4a4d8108
AM
2953+ err = -EINVAL;
2954+ pr_err("write permission for readonly mount or inode, %s\n", path);
2955+
4f0767ce 2956+out:
1facf9fc 2957+ return err;
2958+}
2959+
2960+/*
2961+ * returns:
2962+ * 0: success, the caller will add it
2963+ * plus: success, it is already unified, the caller should ignore it
2964+ * minus: error
2965+ */
2966+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
2967+{
2968+ int err;
2969+ aufs_bindex_t bend, bindex;
5527c038 2970+ struct dentry *root, *h_dentry;
1facf9fc 2971+ struct inode *inode, *h_inode;
2972+
2973+ root = sb->s_root;
2974+ bend = au_sbend(sb);
2975+ if (unlikely(bend >= 0
2976+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
2977+ err = 1;
2978+ if (!remount) {
2979+ err = -EINVAL;
4a4d8108 2980+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 2981+ }
2982+ goto out;
2983+ }
2984+
2985+ err = -ENOSPC; /* -E2BIG; */
2986+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
2987+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 2988+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 2989+ goto out;
2990+ }
2991+
2992+ err = -EDOM;
2993+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 2994+ pr_err("bad index %d\n", add->bindex);
1facf9fc 2995+ goto out;
2996+ }
2997+
5527c038 2998+ inode = d_inode(add->path.dentry);
1facf9fc 2999+ err = -ENOENT;
3000+ if (unlikely(!inode->i_nlink)) {
4a4d8108 3001+ pr_err("no existence %s\n", add->pathname);
1facf9fc 3002+ goto out;
3003+ }
3004+
3005+ err = -EINVAL;
3006+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 3007+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 3008+ goto out;
3009+ }
3010+
3011+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
3012+ pr_err("unsupported filesystem, %s (%s)\n",
3013+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 3014+ goto out;
3015+ }
3016+
c1595e42
JR
3017+ if (unlikely(inode->i_sb->s_stack_depth)) {
3018+ pr_err("already stacked, %s (%s)\n",
3019+ add->pathname, au_sbtype(inode->i_sb));
3020+ goto out;
3021+ }
3022+
5527c038 3023+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
1facf9fc 3024+ if (unlikely(err))
3025+ goto out;
3026+
3027+ if (bend < 0)
3028+ return 0; /* success */
3029+
3030+ err = -EINVAL;
3031+ for (bindex = 0; bindex <= bend; bindex++)
3032+ if (unlikely(test_overlap(sb, add->path.dentry,
3033+ au_h_dptr(root, bindex)))) {
4a4d8108 3034+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 3035+ goto out;
3036+ }
3037+
3038+ err = 0;
3039+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
5527c038
JR
3040+ h_dentry = au_h_dptr(root, 0);
3041+ h_inode = d_inode(h_dentry);
1facf9fc 3042+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
3043+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3044+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3045+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3046+ add->pathname,
3047+ i_uid_read(inode), i_gid_read(inode),
3048+ (inode->i_mode & S_IALLUGO),
3049+ i_uid_read(h_inode), i_gid_read(h_inode),
3050+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 3051+ }
3052+
4f0767ce 3053+out:
1facf9fc 3054+ return err;
3055+}
3056+
3057+/*
3058+ * initialize or clean the whiteouts for an adding branch
3059+ */
3060+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 3061+ int new_perm)
1facf9fc 3062+{
3063+ int err, old_perm;
3064+ aufs_bindex_t bindex;
3065+ struct mutex *h_mtx;
3066+ struct au_wbr *wbr;
3067+ struct au_hinode *hdir;
5527c038 3068+ struct dentry *h_dentry;
1facf9fc 3069+
86dc4139
AM
3070+ err = vfsub_mnt_want_write(au_br_mnt(br));
3071+ if (unlikely(err))
3072+ goto out;
3073+
1facf9fc 3074+ wbr = br->br_wbr;
3075+ old_perm = br->br_perm;
3076+ br->br_perm = new_perm;
3077+ hdir = NULL;
3078+ h_mtx = NULL;
3079+ bindex = au_br_index(sb, br->br_id);
3080+ if (0 <= bindex) {
5527c038 3081+ hdir = au_hi(d_inode(sb->s_root), bindex);
4a4d8108 3082+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 3083+ } else {
5527c038
JR
3084+ h_dentry = au_br_dentry(br);
3085+ h_mtx = &d_inode(h_dentry)->i_mutex;
1facf9fc 3086+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
3087+ }
3088+ if (!wbr)
86dc4139 3089+ err = au_wh_init(br, sb);
1facf9fc 3090+ else {
3091+ wbr_wh_write_lock(wbr);
86dc4139 3092+ err = au_wh_init(br, sb);
1facf9fc 3093+ wbr_wh_write_unlock(wbr);
3094+ }
3095+ if (hdir)
4a4d8108 3096+ au_hn_imtx_unlock(hdir);
1facf9fc 3097+ else
3098+ mutex_unlock(h_mtx);
86dc4139 3099+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 3100+ br->br_perm = old_perm;
3101+
3102+ if (!err && wbr && !au_br_writable(new_perm)) {
3103+ kfree(wbr);
3104+ br->br_wbr = NULL;
3105+ }
3106+
86dc4139 3107+out:
1facf9fc 3108+ return err;
3109+}
3110+
3111+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 3112+ int perm)
1facf9fc 3113+{
3114+ int err;
4a4d8108 3115+ struct kstatfs kst;
1facf9fc 3116+ struct au_wbr *wbr;
3117+
3118+ wbr = br->br_wbr;
dece6358 3119+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 3120+ atomic_set(&wbr->wbr_wh_running, 0);
1facf9fc 3121+
4a4d8108
AM
3122+ /*
3123+ * a limit for rmdir/rename a dir
523b37e3 3124+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 3125+ */
86dc4139 3126+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
3127+ if (unlikely(err))
3128+ goto out;
3129+ err = -EINVAL;
3130+ if (kst.f_namelen >= NAME_MAX)
86dc4139 3131+ err = au_br_init_wh(sb, br, perm);
4a4d8108 3132+ else
523b37e3
AM
3133+ pr_err("%pd(%s), unsupported namelen %ld\n",
3134+ au_br_dentry(br),
86dc4139 3135+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 3136+
4f0767ce 3137+out:
1facf9fc 3138+ return err;
3139+}
3140+
c1595e42 3141+/* initialize a new branch */
1facf9fc 3142+static int au_br_init(struct au_branch *br, struct super_block *sb,
3143+ struct au_opt_add *add)
3144+{
3145+ int err;
5527c038 3146+ struct inode *h_inode;
1facf9fc 3147+
3148+ err = 0;
1facf9fc 3149+ mutex_init(&br->br_xino.xi_nondir_mtx);
3150+ br->br_perm = add->perm;
86dc4139 3151+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108 3152+ spin_lock_init(&br->br_dykey_lock);
1facf9fc 3153+ atomic_set(&br->br_count, 0);
1facf9fc 3154+ atomic_set(&br->br_xino_running, 0);
3155+ br->br_id = au_new_br_id(sb);
7f207e10 3156+ AuDebugOn(br->br_id < 0);
1facf9fc 3157+
3158+ if (au_br_writable(add->perm)) {
86dc4139 3159+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 3160+ if (unlikely(err))
b752ccd1 3161+ goto out_err;
1facf9fc 3162+ }
3163+
3164+ if (au_opt_test(au_mntflags(sb), XINO)) {
5527c038
JR
3165+ h_inode = d_inode(add->path.dentry);
3166+ err = au_xino_br(sb, br, h_inode->i_ino,
1facf9fc 3167+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3168+ if (unlikely(err)) {
3169+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 3170+ goto out_err;
1facf9fc 3171+ }
3172+ }
3173+
3174+ sysaufs_br_init(br);
86dc4139 3175+ path_get(&br->br_path);
b752ccd1 3176+ goto out; /* success */
1facf9fc 3177+
4f0767ce 3178+out_err:
86dc4139 3179+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 3180+out:
1facf9fc 3181+ return err;
3182+}
3183+
3184+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
3185+ struct au_branch *br, aufs_bindex_t bend,
3186+ aufs_bindex_t amount)
3187+{
3188+ struct au_branch **brp;
3189+
dece6358
AM
3190+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3191+
1facf9fc 3192+ brp = sbinfo->si_branch + bindex;
3193+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3194+ *brp = br;
3195+ sbinfo->si_bend++;
3196+ if (unlikely(bend < 0))
3197+ sbinfo->si_bend = 0;
3198+}
3199+
3200+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
3201+ aufs_bindex_t bend, aufs_bindex_t amount)
3202+{
3203+ struct au_hdentry *hdp;
3204+
1308ab2a 3205+ AuRwMustWriteLock(&dinfo->di_rwsem);
3206+
1facf9fc 3207+ hdp = dinfo->di_hdentry + bindex;
3208+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3209+ au_h_dentry_init(hdp);
3210+ dinfo->di_bend++;
3211+ if (unlikely(bend < 0))
3212+ dinfo->di_bstart = 0;
3213+}
3214+
3215+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
3216+ aufs_bindex_t bend, aufs_bindex_t amount)
3217+{
3218+ struct au_hinode *hip;
3219+
1308ab2a 3220+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3221+
1facf9fc 3222+ hip = iinfo->ii_hinode + bindex;
3223+ memmove(hip + 1, hip, sizeof(*hip) * amount);
3224+ hip->hi_inode = NULL;
4a4d8108 3225+ au_hn_init(hip);
1facf9fc 3226+ iinfo->ii_bend++;
3227+ if (unlikely(bend < 0))
3228+ iinfo->ii_bstart = 0;
3229+}
3230+
86dc4139
AM
3231+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3232+ aufs_bindex_t bindex)
1facf9fc 3233+{
86dc4139 3234+ struct dentry *root, *h_dentry;
5527c038 3235+ struct inode *root_inode, *h_inode;
1facf9fc 3236+ aufs_bindex_t bend, amount;
3237+
3238+ root = sb->s_root;
5527c038 3239+ root_inode = d_inode(root);
1facf9fc 3240+ bend = au_sbend(sb);
3241+ amount = bend + 1 - bindex;
86dc4139 3242+ h_dentry = au_br_dentry(br);
53392da6 3243+ au_sbilist_lock();
1facf9fc 3244+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
3245+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
3246+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
3247+ au_set_h_dptr(root, bindex, dget(h_dentry));
5527c038
JR
3248+ h_inode = d_inode(h_dentry);
3249+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
53392da6 3250+ au_sbilist_unlock();
1facf9fc 3251+}
3252+
3253+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3254+{
3255+ int err;
1facf9fc 3256+ aufs_bindex_t bend, add_bindex;
3257+ struct dentry *root, *h_dentry;
3258+ struct inode *root_inode;
3259+ struct au_branch *add_branch;
3260+
3261+ root = sb->s_root;
5527c038 3262+ root_inode = d_inode(root);
1facf9fc 3263+ IMustLock(root_inode);
3264+ err = test_add(sb, add, remount);
3265+ if (unlikely(err < 0))
3266+ goto out;
3267+ if (err) {
3268+ err = 0;
3269+ goto out; /* success */
3270+ }
3271+
3272+ bend = au_sbend(sb);
3273+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
3274+ err = PTR_ERR(add_branch);
3275+ if (IS_ERR(add_branch))
3276+ goto out;
3277+
3278+ err = au_br_init(add_branch, sb, add);
3279+ if (unlikely(err)) {
3280+ au_br_do_free(add_branch);
3281+ goto out;
3282+ }
3283+
3284+ add_bindex = add->bindex;
1facf9fc 3285+ if (!remount)
86dc4139 3286+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3287+ else {
3288+ sysaufs_brs_del(sb, add_bindex);
86dc4139 3289+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3290+ sysaufs_brs_add(sb, add_bindex);
3291+ }
3292+
86dc4139 3293+ h_dentry = add->path.dentry;
1308ab2a 3294+ if (!add_bindex) {
1facf9fc 3295+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 3296+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3297+ } else
5527c038 3298+ au_add_nlink(root_inode, d_inode(h_dentry));
1facf9fc 3299+
3300+ /*
4a4d8108 3301+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3302+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3303+ * once detached from aufs.
3304+ */
3305+ if (au_xino_brid(sb) < 0
3306+ && au_br_writable(add_branch->br_perm)
3307+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3308+ && add_branch->br_xino.xi_file
2000de60 3309+ && add_branch->br_xino.xi_file->f_path.dentry->d_parent == h_dentry)
1facf9fc 3310+ au_xino_brid_set(sb, add_branch->br_id);
3311+
4f0767ce 3312+out:
1facf9fc 3313+ return err;
3314+}
3315+
3316+/* ---------------------------------------------------------------------- */
3317+
79b8bda9 3318+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
076b876e
AM
3319+ unsigned long long max __maybe_unused,
3320+ void *arg)
3321+{
3322+ unsigned long long n;
3323+ struct file **p, *f;
3324+ struct au_sphlhead *files;
3325+ struct au_finfo *finfo;
076b876e
AM
3326+
3327+ n = 0;
3328+ p = a;
3329+ files = &au_sbi(sb)->si_files;
3330+ spin_lock(&files->spin);
3331+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3332+ f = finfo->fi_file;
3333+ if (file_count(f)
3334+ && !special_file(file_inode(f)->i_mode)) {
3335+ get_file(f);
3336+ *p++ = f;
3337+ n++;
3338+ AuDebugOn(n > max);
3339+ }
3340+ }
3341+ spin_unlock(&files->spin);
3342+
3343+ return n;
3344+}
3345+
3346+static struct file **au_farray_alloc(struct super_block *sb,
3347+ unsigned long long *max)
3348+{
3349+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
79b8bda9 3350+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
076b876e
AM
3351+}
3352+
3353+static void au_farray_free(struct file **a, unsigned long long max)
3354+{
3355+ unsigned long long ull;
3356+
3357+ for (ull = 0; ull < max; ull++)
3358+ if (a[ull])
3359+ fput(a[ull]);
be52b249 3360+ kvfree(a);
076b876e
AM
3361+}
3362+
3363+/* ---------------------------------------------------------------------- */
3364+
1facf9fc 3365+/*
3366+ * delete a branch
3367+ */
3368+
3369+/* to show the line number, do not make it inlined function */
4a4d8108 3370+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3371+ if (do_info) \
4a4d8108 3372+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3373+} while (0)
3374+
027c5e7a
AM
3375+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
3376+ aufs_bindex_t bend)
3377+{
3378+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
3379+}
3380+
3381+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
3382+ aufs_bindex_t bend)
3383+{
5527c038 3384+ return au_test_ibusy(d_inode(dentry), bstart, bend);
027c5e7a
AM
3385+}
3386+
1facf9fc 3387+/*
3388+ * test if the branch is deletable or not.
3389+ */
3390+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3391+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3392+{
3393+ int err, i, j, ndentry;
3394+ aufs_bindex_t bstart, bend;
1facf9fc 3395+ struct au_dcsub_pages dpages;
3396+ struct au_dpage *dpage;
3397+ struct dentry *d;
1facf9fc 3398+
3399+ err = au_dpages_init(&dpages, GFP_NOFS);
3400+ if (unlikely(err))
3401+ goto out;
3402+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3403+ if (unlikely(err))
3404+ goto out_dpages;
3405+
1facf9fc 3406+ for (i = 0; !err && i < dpages.ndpage; i++) {
3407+ dpage = dpages.dpages + i;
3408+ ndentry = dpage->ndentry;
3409+ for (j = 0; !err && j < ndentry; j++) {
3410+ d = dpage->dentries[j];
c1595e42 3411+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3412+ if (!au_digen_test(d, sigen)) {
1facf9fc 3413+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3414+ if (unlikely(au_dbrange_test(d))) {
3415+ di_read_unlock(d, AuLock_IR);
3416+ continue;
3417+ }
3418+ } else {
1facf9fc 3419+ di_write_lock_child(d);
027c5e7a
AM
3420+ if (unlikely(au_dbrange_test(d))) {
3421+ di_write_unlock(d);
3422+ continue;
3423+ }
1facf9fc 3424+ err = au_reval_dpath(d, sigen);
3425+ if (!err)
3426+ di_downgrade_lock(d, AuLock_IR);
3427+ else {
3428+ di_write_unlock(d);
3429+ break;
3430+ }
3431+ }
3432+
027c5e7a 3433+ /* AuDbgDentry(d); */
1facf9fc 3434+ bstart = au_dbstart(d);
3435+ bend = au_dbend(d);
3436+ if (bstart <= bindex
3437+ && bindex <= bend
3438+ && au_h_dptr(d, bindex)
027c5e7a 3439+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 3440+ err = -EBUSY;
523b37e3 3441+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3442+ AuDbgDentry(d);
1facf9fc 3443+ }
3444+ di_read_unlock(d, AuLock_IR);
3445+ }
3446+ }
3447+
4f0767ce 3448+out_dpages:
1facf9fc 3449+ au_dpages_free(&dpages);
4f0767ce 3450+out:
1facf9fc 3451+ return err;
3452+}
3453+
3454+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3455+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3456+{
3457+ int err;
7f207e10
AM
3458+ unsigned long long max, ull;
3459+ struct inode *i, **array;
1facf9fc 3460+ aufs_bindex_t bstart, bend;
1facf9fc 3461+
7f207e10
AM
3462+ array = au_iarray_alloc(sb, &max);
3463+ err = PTR_ERR(array);
3464+ if (IS_ERR(array))
3465+ goto out;
3466+
1facf9fc 3467+ err = 0;
7f207e10
AM
3468+ AuDbg("b%d\n", bindex);
3469+ for (ull = 0; !err && ull < max; ull++) {
3470+ i = array[ull];
076b876e
AM
3471+ if (unlikely(!i))
3472+ break;
7f207e10 3473+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3474+ continue;
3475+
7f207e10 3476+ /* AuDbgInode(i); */
537831f9 3477+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3478+ ii_read_lock_child(i);
3479+ else {
3480+ ii_write_lock_child(i);
027c5e7a
AM
3481+ err = au_refresh_hinode_self(i);
3482+ au_iigen_dec(i);
1facf9fc 3483+ if (!err)
3484+ ii_downgrade_lock(i);
3485+ else {
3486+ ii_write_unlock(i);
3487+ break;
3488+ }
3489+ }
3490+
3491+ bstart = au_ibstart(i);
3492+ bend = au_ibend(i);
3493+ if (bstart <= bindex
3494+ && bindex <= bend
3495+ && au_h_iptr(i, bindex)
027c5e7a 3496+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 3497+ err = -EBUSY;
3498+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3499+ AuDbgInode(i);
1facf9fc 3500+ }
3501+ ii_read_unlock(i);
3502+ }
7f207e10 3503+ au_iarray_free(array, max);
1facf9fc 3504+
7f207e10 3505+out:
1facf9fc 3506+ return err;
3507+}
3508+
b752ccd1
AM
3509+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3510+ const unsigned int verbose)
1facf9fc 3511+{
3512+ int err;
3513+ unsigned int sigen;
3514+
3515+ sigen = au_sigen(root->d_sb);
3516+ DiMustNoWaiters(root);
5527c038 3517+ IiMustNoWaiters(d_inode(root));
1facf9fc 3518+ di_write_unlock(root);
b752ccd1 3519+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3520+ if (!err)
b752ccd1 3521+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3522+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3523+
3524+ return err;
3525+}
3526+
076b876e
AM
3527+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3528+ struct file **to_free, int *idx)
3529+{
3530+ int err;
c1595e42 3531+ unsigned char matched, root;
076b876e
AM
3532+ aufs_bindex_t bindex, bend;
3533+ struct au_fidir *fidir;
3534+ struct au_hfile *hfile;
3535+
3536+ err = 0;
2000de60 3537+ root = IS_ROOT(file->f_path.dentry);
c1595e42
JR
3538+ if (root) {
3539+ get_file(file);
3540+ to_free[*idx] = file;
3541+ (*idx)++;
3542+ goto out;
3543+ }
3544+
076b876e 3545+ matched = 0;
076b876e
AM
3546+ fidir = au_fi(file)->fi_hdir;
3547+ AuDebugOn(!fidir);
3548+ bend = au_fbend_dir(file);
3549+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) {
3550+ hfile = fidir->fd_hfile + bindex;
3551+ if (!hfile->hf_file)
3552+ continue;
3553+
c1595e42 3554+ if (hfile->hf_br->br_id == br_id) {
076b876e 3555+ matched = 1;
076b876e 3556+ break;
c1595e42 3557+ }
076b876e 3558+ }
c1595e42 3559+ if (matched)
076b876e
AM
3560+ err = -EBUSY;
3561+
3562+out:
3563+ return err;
3564+}
3565+
3566+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3567+ struct file **to_free, int opened)
3568+{
3569+ int err, idx;
3570+ unsigned long long ull, max;
3571+ aufs_bindex_t bstart;
3572+ struct file *file, **array;
076b876e
AM
3573+ struct dentry *root;
3574+ struct au_hfile *hfile;
3575+
3576+ array = au_farray_alloc(sb, &max);
3577+ err = PTR_ERR(array);
3578+ if (IS_ERR(array))
3579+ goto out;
3580+
3581+ err = 0;
3582+ idx = 0;
3583+ root = sb->s_root;
3584+ di_write_unlock(root);
3585+ for (ull = 0; ull < max; ull++) {
3586+ file = array[ull];
3587+ if (unlikely(!file))
3588+ break;
3589+
3590+ /* AuDbg("%pD\n", file); */
3591+ fi_read_lock(file);
3592+ bstart = au_fbstart(file);
2000de60 3593+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3594+ hfile = &au_fi(file)->fi_htop;
3595+ if (hfile->hf_br->br_id == br_id)
3596+ err = -EBUSY;
3597+ } else
3598+ err = test_dir_busy(file, br_id, to_free, &idx);
3599+ fi_read_unlock(file);
3600+ if (unlikely(err))
3601+ break;
3602+ }
3603+ di_write_lock_child(root);
3604+ au_farray_free(array, max);
3605+ AuDebugOn(idx > opened);
3606+
3607+out:
3608+ return err;
3609+}
3610+
3611+static void br_del_file(struct file **to_free, unsigned long long opened,
3612+ aufs_bindex_t br_id)
3613+{
3614+ unsigned long long ull;
3615+ aufs_bindex_t bindex, bstart, bend, bfound;
3616+ struct file *file;
3617+ struct au_fidir *fidir;
3618+ struct au_hfile *hfile;
3619+
3620+ for (ull = 0; ull < opened; ull++) {
3621+ file = to_free[ull];
3622+ if (unlikely(!file))
3623+ break;
3624+
3625+ /* AuDbg("%pD\n", file); */
2000de60 3626+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3627+ bfound = -1;
3628+ fidir = au_fi(file)->fi_hdir;
3629+ AuDebugOn(!fidir);
3630+ fi_write_lock(file);
3631+ bstart = au_fbstart(file);
3632+ bend = au_fbend_dir(file);
3633+ for (bindex = bstart; bindex <= bend; bindex++) {
3634+ hfile = fidir->fd_hfile + bindex;
3635+ if (!hfile->hf_file)
3636+ continue;
3637+
3638+ if (hfile->hf_br->br_id == br_id) {
3639+ bfound = bindex;
3640+ break;
3641+ }
3642+ }
3643+ AuDebugOn(bfound < 0);
3644+ au_set_h_fptr(file, bfound, NULL);
3645+ if (bfound == bstart) {
3646+ for (bstart++; bstart <= bend; bstart++)
3647+ if (au_hf_dir(file, bstart)) {
3648+ au_set_fbstart(file, bstart);
3649+ break;
3650+ }
3651+ }
3652+ fi_write_unlock(file);
3653+ }
3654+}
3655+
1facf9fc 3656+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3657+ const aufs_bindex_t bindex,
3658+ const aufs_bindex_t bend)
3659+{
3660+ struct au_branch **brp, **p;
3661+
dece6358
AM
3662+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3663+
1facf9fc 3664+ brp = sbinfo->si_branch + bindex;
3665+ if (bindex < bend)
3666+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
3667+ sbinfo->si_branch[0 + bend] = NULL;
3668+ sbinfo->si_bend--;
3669+
53392da6 3670+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3671+ if (p)
3672+ sbinfo->si_branch = p;
4a4d8108 3673+ /* harmless error */
1facf9fc 3674+}
3675+
3676+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
3677+ const aufs_bindex_t bend)
3678+{
3679+ struct au_hdentry *hdp, *p;
3680+
1308ab2a 3681+ AuRwMustWriteLock(&dinfo->di_rwsem);
3682+
4a4d8108 3683+ hdp = dinfo->di_hdentry;
1facf9fc 3684+ if (bindex < bend)
4a4d8108
AM
3685+ memmove(hdp + bindex, hdp + bindex + 1,
3686+ sizeof(*hdp) * (bend - bindex));
3687+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 3688+ dinfo->di_bend--;
3689+
53392da6 3690+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3691+ if (p)
3692+ dinfo->di_hdentry = p;
4a4d8108 3693+ /* harmless error */
1facf9fc 3694+}
3695+
3696+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
3697+ const aufs_bindex_t bend)
3698+{
3699+ struct au_hinode *hip, *p;
3700+
1308ab2a 3701+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3702+
1facf9fc 3703+ hip = iinfo->ii_hinode + bindex;
3704+ if (bindex < bend)
3705+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
3706+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 3707+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 3708+ iinfo->ii_bend--;
3709+
53392da6 3710+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3711+ if (p)
3712+ iinfo->ii_hinode = p;
4a4d8108 3713+ /* harmless error */
1facf9fc 3714+}
3715+
3716+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3717+ struct au_branch *br)
3718+{
3719+ aufs_bindex_t bend;
3720+ struct au_sbinfo *sbinfo;
53392da6
AM
3721+ struct dentry *root, *h_root;
3722+ struct inode *inode, *h_inode;
3723+ struct au_hinode *hinode;
1facf9fc 3724+
dece6358
AM
3725+ SiMustWriteLock(sb);
3726+
1facf9fc 3727+ root = sb->s_root;
5527c038 3728+ inode = d_inode(root);
1facf9fc 3729+ sbinfo = au_sbi(sb);
3730+ bend = sbinfo->si_bend;
3731+
53392da6
AM
3732+ h_root = au_h_dptr(root, bindex);
3733+ hinode = au_hi(inode, bindex);
3734+ h_inode = au_igrab(hinode->hi_inode);
3735+ au_hiput(hinode);
1facf9fc 3736+
53392da6 3737+ au_sbilist_lock();
1facf9fc 3738+ au_br_do_del_brp(sbinfo, bindex, bend);
3739+ au_br_do_del_hdp(au_di(root), bindex, bend);
3740+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
3741+ au_sbilist_unlock();
3742+
3743+ dput(h_root);
3744+ iput(h_inode);
3745+ au_br_do_free(br);
1facf9fc 3746+}
3747+
79b8bda9
AM
3748+static unsigned long long empty_cb(struct super_block *sb, void *array,
3749+ unsigned long long max, void *arg)
076b876e
AM
3750+{
3751+ return max;
3752+}
3753+
1facf9fc 3754+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3755+{
3756+ int err, rerr, i;
076b876e 3757+ unsigned long long opened;
1facf9fc 3758+ unsigned int mnt_flags;
3759+ aufs_bindex_t bindex, bend, br_id;
3760+ unsigned char do_wh, verbose;
3761+ struct au_branch *br;
3762+ struct au_wbr *wbr;
076b876e
AM
3763+ struct dentry *root;
3764+ struct file **to_free;
1facf9fc 3765+
3766+ err = 0;
076b876e
AM
3767+ opened = 0;
3768+ to_free = NULL;
3769+ root = sb->s_root;
3770+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3771+ if (bindex < 0) {
3772+ if (remount)
3773+ goto out; /* success */
3774+ err = -ENOENT;
4a4d8108 3775+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3776+ goto out;
3777+ }
3778+ AuDbg("bindex b%d\n", bindex);
3779+
3780+ err = -EBUSY;
3781+ mnt_flags = au_mntflags(sb);
3782+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3783+ bend = au_sbend(sb);
3784+ if (unlikely(!bend)) {
3785+ AuVerbose(verbose, "no more branches left\n");
3786+ goto out;
3787+ }
3788+ br = au_sbr(sb, bindex);
86dc4139 3789+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3790+
3791+ br_id = br->br_id;
3792+ opened = atomic_read(&br->br_count);
3793+ if (unlikely(opened)) {
79b8bda9 3794+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
076b876e
AM
3795+ err = PTR_ERR(to_free);
3796+ if (IS_ERR(to_free))
3797+ goto out;
3798+
3799+ err = test_file_busy(sb, br_id, to_free, opened);
3800+ if (unlikely(err)) {
3801+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3802+ goto out;
3803+ }
1facf9fc 3804+ }
3805+
3806+ wbr = br->br_wbr;
3807+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3808+ if (do_wh) {
1308ab2a 3809+ /* instead of WbrWhMustWriteLock(wbr) */
3810+ SiMustWriteLock(sb);
1facf9fc 3811+ for (i = 0; i < AuBrWh_Last; i++) {
3812+ dput(wbr->wbr_wh[i]);
3813+ wbr->wbr_wh[i] = NULL;
3814+ }
3815+ }
3816+
076b876e 3817+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3818+ if (unlikely(err)) {
3819+ if (do_wh)
3820+ goto out_wh;
3821+ goto out;
3822+ }
3823+
3824+ err = 0;
076b876e
AM
3825+ if (to_free) {
3826+ /*
3827+ * now we confirmed the branch is deletable.
3828+ * let's free the remaining opened dirs on the branch.
3829+ */
3830+ di_write_unlock(root);
3831+ br_del_file(to_free, opened, br_id);
3832+ di_write_lock_child(root);
3833+ }
3834+
1facf9fc 3835+ if (!remount)
3836+ au_br_do_del(sb, bindex, br);
3837+ else {
3838+ sysaufs_brs_del(sb, bindex);
3839+ au_br_do_del(sb, bindex, br);
3840+ sysaufs_brs_add(sb, bindex);
3841+ }
3842+
1308ab2a 3843+ if (!bindex) {
5527c038 3844+ au_cpup_attr_all(d_inode(root), /*force*/1);
1308ab2a 3845+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3846+ } else
5527c038 3847+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
1facf9fc 3848+ if (au_opt_test(mnt_flags, PLINK))
3849+ au_plink_half_refresh(sb, br_id);
3850+
b752ccd1 3851+ if (au_xino_brid(sb) == br_id)
1facf9fc 3852+ au_xino_brid_set(sb, -1);
3853+ goto out; /* success */
3854+
4f0767ce 3855+out_wh:
1facf9fc 3856+ /* revert */
86dc4139 3857+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3858+ if (rerr)
0c3ec466
AM
3859+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3860+ del->pathname, rerr);
4f0767ce 3861+out:
076b876e
AM
3862+ if (to_free)
3863+ au_farray_free(to_free, opened);
1facf9fc 3864+ return err;
3865+}
3866+
3867+/* ---------------------------------------------------------------------- */
3868+
027c5e7a
AM
3869+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3870+{
3871+ int err;
3872+ aufs_bindex_t bstart, bend;
3873+ struct aufs_ibusy ibusy;
3874+ struct inode *inode, *h_inode;
3875+
3876+ err = -EPERM;
3877+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3878+ goto out;
3879+
3880+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3881+ if (!err)
3882+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3883+ if (unlikely(err)) {
3884+ err = -EFAULT;
3885+ AuTraceErr(err);
3886+ goto out;
3887+ }
3888+
3889+ err = -EINVAL;
3890+ si_read_lock(sb, AuLock_FLUSH);
3891+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
3892+ goto out_unlock;
3893+
3894+ err = 0;
3895+ ibusy.h_ino = 0; /* invalid */
3896+ inode = ilookup(sb, ibusy.ino);
3897+ if (!inode
3898+ || inode->i_ino == AUFS_ROOT_INO
3899+ || is_bad_inode(inode))
3900+ goto out_unlock;
3901+
3902+ ii_read_lock_child(inode);
3903+ bstart = au_ibstart(inode);
3904+ bend = au_ibend(inode);
3905+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
3906+ h_inode = au_h_iptr(inode, ibusy.bindex);
3907+ if (h_inode && au_test_ibusy(inode, bstart, bend))
3908+ ibusy.h_ino = h_inode->i_ino;
3909+ }
3910+ ii_read_unlock(inode);
3911+ iput(inode);
3912+
3913+out_unlock:
3914+ si_read_unlock(sb);
3915+ if (!err) {
3916+ err = __put_user(ibusy.h_ino, &arg->h_ino);
3917+ if (unlikely(err)) {
3918+ err = -EFAULT;
3919+ AuTraceErr(err);
3920+ }
3921+ }
3922+out:
3923+ return err;
3924+}
3925+
3926+long au_ibusy_ioctl(struct file *file, unsigned long arg)
3927+{
2000de60 3928+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
027c5e7a
AM
3929+}
3930+
3931+#ifdef CONFIG_COMPAT
3932+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
3933+{
2000de60 3934+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
027c5e7a
AM
3935+}
3936+#endif
3937+
3938+/* ---------------------------------------------------------------------- */
3939+
1facf9fc 3940+/*
3941+ * change a branch permission
3942+ */
3943+
dece6358
AM
3944+static void au_warn_ima(void)
3945+{
3946+#ifdef CONFIG_IMA
1308ab2a 3947+ /* since it doesn't support mark_files_ro() */
027c5e7a 3948+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
3949+#endif
3950+}
3951+
1facf9fc 3952+static int do_need_sigen_inc(int a, int b)
3953+{
3954+ return au_br_whable(a) && !au_br_whable(b);
3955+}
3956+
3957+static int need_sigen_inc(int old, int new)
3958+{
3959+ return do_need_sigen_inc(old, new)
3960+ || do_need_sigen_inc(new, old);
3961+}
3962+
3963+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
3964+{
7f207e10 3965+ int err, do_warn;
027c5e7a 3966+ unsigned int mnt_flags;
7f207e10 3967+ unsigned long long ull, max;
e49829fe 3968+ aufs_bindex_t br_id;
38d290e6 3969+ unsigned char verbose, writer;
7f207e10 3970+ struct file *file, *hf, **array;
e49829fe 3971+ struct au_hfile *hfile;
1facf9fc 3972+
027c5e7a
AM
3973+ mnt_flags = au_mntflags(sb);
3974+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3975+
7f207e10
AM
3976+ array = au_farray_alloc(sb, &max);
3977+ err = PTR_ERR(array);
3978+ if (IS_ERR(array))
1facf9fc 3979+ goto out;
3980+
7f207e10 3981+ do_warn = 0;
e49829fe 3982+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
3983+ for (ull = 0; ull < max; ull++) {
3984+ file = array[ull];
076b876e
AM
3985+ if (unlikely(!file))
3986+ break;
1facf9fc 3987+
523b37e3 3988+ /* AuDbg("%pD\n", file); */
1facf9fc 3989+ fi_read_lock(file);
3990+ if (unlikely(au_test_mmapped(file))) {
3991+ err = -EBUSY;
523b37e3 3992+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 3993+ AuDbgFile(file);
1facf9fc 3994+ FiMustNoWaiters(file);
3995+ fi_read_unlock(file);
7f207e10 3996+ goto out_array;
1facf9fc 3997+ }
3998+
e49829fe
JR
3999+ hfile = &au_fi(file)->fi_htop;
4000+ hf = hfile->hf_file;
7e9cd9fe 4001+ if (!d_is_reg(file->f_path.dentry)
1facf9fc 4002+ || !(file->f_mode & FMODE_WRITE)
e49829fe 4003+ || hfile->hf_br->br_id != br_id
7f207e10
AM
4004+ || !(hf->f_mode & FMODE_WRITE))
4005+ array[ull] = NULL;
4006+ else {
4007+ do_warn = 1;
4008+ get_file(file);
1facf9fc 4009+ }
4010+
1facf9fc 4011+ FiMustNoWaiters(file);
4012+ fi_read_unlock(file);
7f207e10
AM
4013+ fput(file);
4014+ }
1facf9fc 4015+
4016+ err = 0;
7f207e10 4017+ if (do_warn)
dece6358 4018+ au_warn_ima();
7f207e10
AM
4019+
4020+ for (ull = 0; ull < max; ull++) {
4021+ file = array[ull];
4022+ if (!file)
4023+ continue;
4024+
1facf9fc 4025+ /* todo: already flushed? */
523b37e3
AM
4026+ /*
4027+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4028+ * approach which resets f_mode and calls mnt_drop_write() and
4029+ * file_release_write() for each file, because the branch
4030+ * attribute in aufs world is totally different from the native
4031+ * fs rw/ro mode.
4032+ */
7f207e10
AM
4033+ /* fi_read_lock(file); */
4034+ hfile = &au_fi(file)->fi_htop;
4035+ hf = hfile->hf_file;
4036+ /* fi_read_unlock(file); */
027c5e7a 4037+ spin_lock(&hf->f_lock);
38d290e6
JR
4038+ writer = !!(hf->f_mode & FMODE_WRITER);
4039+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 4040+ spin_unlock(&hf->f_lock);
38d290e6
JR
4041+ if (writer) {
4042+ put_write_access(file_inode(hf));
c06a8ce3 4043+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 4044+ }
4045+ }
4046+
7f207e10
AM
4047+out_array:
4048+ au_farray_free(array, max);
4f0767ce 4049+out:
7f207e10 4050+ AuTraceErr(err);
1facf9fc 4051+ return err;
4052+}
4053+
4054+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4055+ int *do_refresh)
1facf9fc 4056+{
4057+ int err, rerr;
4058+ aufs_bindex_t bindex;
4059+ struct dentry *root;
4060+ struct au_branch *br;
076b876e 4061+ struct au_br_fhsm *bf;
1facf9fc 4062+
4063+ root = sb->s_root;
1facf9fc 4064+ bindex = au_find_dbindex(root, mod->h_root);
4065+ if (bindex < 0) {
4066+ if (remount)
4067+ return 0; /* success */
4068+ err = -ENOENT;
4a4d8108 4069+ pr_err("%s no such branch\n", mod->path);
1facf9fc 4070+ goto out;
4071+ }
4072+ AuDbg("bindex b%d\n", bindex);
4073+
5527c038 4074+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
1facf9fc 4075+ if (unlikely(err))
4076+ goto out;
4077+
4078+ br = au_sbr(sb, bindex);
86dc4139 4079+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 4080+ if (br->br_perm == mod->perm)
4081+ return 0; /* success */
4082+
076b876e
AM
4083+ /* pre-allocate for non-fhsm --> fhsm */
4084+ bf = NULL;
4085+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4086+ err = au_fhsm_br_alloc(br);
4087+ if (unlikely(err))
4088+ goto out;
4089+ bf = br->br_fhsm;
4090+ br->br_fhsm = NULL;
4091+ }
4092+
1facf9fc 4093+ if (au_br_writable(br->br_perm)) {
4094+ /* remove whiteout base */
86dc4139 4095+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 4096+ if (unlikely(err))
076b876e 4097+ goto out_bf;
1facf9fc 4098+
4099+ if (!au_br_writable(mod->perm)) {
4100+ /* rw --> ro, file might be mmapped */
4101+ DiMustNoWaiters(root);
5527c038 4102+ IiMustNoWaiters(d_inode(root));
1facf9fc 4103+ di_write_unlock(root);
4104+ err = au_br_mod_files_ro(sb, bindex);
4105+ /* aufs_write_lock() calls ..._child() */
4106+ di_write_lock_child(root);
4107+
4108+ if (unlikely(err)) {
4109+ rerr = -ENOMEM;
be52b249 4110+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
1facf9fc 4111+ GFP_NOFS);
86dc4139
AM
4112+ if (br->br_wbr)
4113+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 4114+ if (unlikely(rerr)) {
4115+ AuIOErr("nested error %d (%d)\n",
4116+ rerr, err);
4117+ br->br_perm = mod->perm;
4118+ }
4119+ }
4120+ }
4121+ } else if (au_br_writable(mod->perm)) {
4122+ /* ro --> rw */
4123+ err = -ENOMEM;
be52b249 4124+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
1facf9fc 4125+ if (br->br_wbr) {
86dc4139 4126+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 4127+ if (unlikely(err)) {
4128+ kfree(br->br_wbr);
4129+ br->br_wbr = NULL;
4130+ }
4131+ }
4132+ }
076b876e
AM
4133+ if (unlikely(err))
4134+ goto out_bf;
4135+
4136+ if (au_br_fhsm(br->br_perm)) {
4137+ if (!au_br_fhsm(mod->perm)) {
4138+ /* fhsm --> non-fhsm */
4139+ au_br_fhsm_fin(br->br_fhsm);
4140+ kfree(br->br_fhsm);
4141+ br->br_fhsm = NULL;
4142+ }
4143+ } else if (au_br_fhsm(mod->perm))
4144+ /* non-fhsm --> fhsm */
4145+ br->br_fhsm = bf;
4146+
076b876e
AM
4147+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4148+ br->br_perm = mod->perm;
4149+ goto out; /* success */
1facf9fc 4150+
076b876e
AM
4151+out_bf:
4152+ kfree(bf);
4153+out:
4154+ AuTraceErr(err);
4155+ return err;
4156+}
4157+
4158+/* ---------------------------------------------------------------------- */
4159+
4160+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4161+{
4162+ int err;
4163+ struct kstatfs kstfs;
4164+
4165+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 4166+ if (!err) {
076b876e
AM
4167+ stfs->f_blocks = kstfs.f_blocks;
4168+ stfs->f_bavail = kstfs.f_bavail;
4169+ stfs->f_files = kstfs.f_files;
4170+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 4171+ }
4172+
1facf9fc 4173+ return err;
4174+}
7f207e10
AM
4175diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4176--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 4177+++ linux/fs/aufs/branch.h 2016-02-28 11:26:32.569971135 +0100
b912730e 4178@@ -0,0 +1,279 @@
1facf9fc 4179+/*
8cdd5066 4180+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4181+ *
4182+ * This program, aufs is free software; you can redistribute it and/or modify
4183+ * it under the terms of the GNU General Public License as published by
4184+ * the Free Software Foundation; either version 2 of the License, or
4185+ * (at your option) any later version.
dece6358
AM
4186+ *
4187+ * This program is distributed in the hope that it will be useful,
4188+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4189+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4190+ * GNU General Public License for more details.
4191+ *
4192+ * You should have received a copy of the GNU General Public License
523b37e3 4193+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4194+ */
4195+
4196+/*
4197+ * branch filesystems and xino for them
4198+ */
4199+
4200+#ifndef __AUFS_BRANCH_H__
4201+#define __AUFS_BRANCH_H__
4202+
4203+#ifdef __KERNEL__
4204+
1facf9fc 4205+#include <linux/mount.h>
4a4d8108 4206+#include "dynop.h"
1facf9fc 4207+#include "rwsem.h"
4208+#include "super.h"
4209+
4210+/* ---------------------------------------------------------------------- */
4211+
4212+/* a xino file */
4213+struct au_xino_file {
4214+ struct file *xi_file;
4215+ struct mutex xi_nondir_mtx;
4216+
4217+ /* todo: make xino files an array to support huge inode number */
4218+
4219+#ifdef CONFIG_DEBUG_FS
4220+ struct dentry *xi_dbgaufs;
4221+#endif
4222+};
4223+
076b876e
AM
4224+/* File-based Hierarchical Storage Management */
4225+struct au_br_fhsm {
4226+#ifdef CONFIG_AUFS_FHSM
4227+ struct mutex bf_lock;
4228+ unsigned long bf_jiffy;
4229+ struct aufs_stfs bf_stfs;
4230+ int bf_readable;
4231+#endif
4232+};
4233+
1facf9fc 4234+/* members for writable branch only */
4235+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4236+struct au_wbr {
dece6358 4237+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 4238+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 4239+ atomic_t wbr_wh_running;
1facf9fc 4240+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4241+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4242+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4243+
4244+ /* mfs mode */
4245+ unsigned long long wbr_bytes;
4246+};
4247+
4a4d8108
AM
4248+/* ext2 has 3 types of operations at least, ext3 has 4 */
4249+#define AuBrDynOp (AuDyLast * 4)
4250+
1716fcea
AM
4251+#ifdef CONFIG_AUFS_HFSNOTIFY
4252+/* support for asynchronous destruction */
4253+struct au_br_hfsnotify {
4254+ struct fsnotify_group *hfsn_group;
4255+};
4256+#endif
4257+
392086de
AM
4258+/* sysfs entries */
4259+struct au_brsysfs {
4260+ char name[16];
4261+ struct attribute attr;
4262+};
4263+
4264+enum {
4265+ AuBrSysfs_BR,
4266+ AuBrSysfs_BRID,
4267+ AuBrSysfs_Last
4268+};
4269+
1facf9fc 4270+/* protected by superblock rwsem */
4271+struct au_branch {
4272+ struct au_xino_file br_xino;
4273+
4274+ aufs_bindex_t br_id;
4275+
4276+ int br_perm;
86dc4139 4277+ struct path br_path;
4a4d8108
AM
4278+ spinlock_t br_dykey_lock;
4279+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 4280+ atomic_t br_count;
4281+
4282+ struct au_wbr *br_wbr;
076b876e 4283+ struct au_br_fhsm *br_fhsm;
1facf9fc 4284+
4285+ /* xino truncation */
1facf9fc 4286+ atomic_t br_xino_running;
4287+
027c5e7a 4288+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4289+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4290+#endif
4291+
1facf9fc 4292+#ifdef CONFIG_SYSFS
392086de
AM
4293+ /* entries under sysfs per mount-point */
4294+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4295+#endif
4296+};
4297+
4298+/* ---------------------------------------------------------------------- */
4299+
86dc4139
AM
4300+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4301+{
4302+ return br->br_path.mnt;
4303+}
4304+
4305+static inline struct dentry *au_br_dentry(struct au_branch *br)
4306+{
4307+ return br->br_path.dentry;
4308+}
4309+
4310+static inline struct super_block *au_br_sb(struct au_branch *br)
4311+{
4312+ return au_br_mnt(br)->mnt_sb;
4313+}
4314+
1facf9fc 4315+static inline int au_br_rdonly(struct au_branch *br)
4316+{
86dc4139 4317+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4318+ || !au_br_writable(br->br_perm))
4319+ ? -EROFS : 0;
4320+}
4321+
4a4d8108 4322+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4323+{
4a4d8108 4324+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4325+ return !(brperm & AuBrPerm_RR);
1facf9fc 4326+#else
4327+ return 0;
4328+#endif
4329+}
4330+
b912730e
AM
4331+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4332+{
4333+ int err, exec_flag;
4334+
4335+ err = 0;
4336+ exec_flag = oflag & __FMODE_EXEC;
79b8bda9 4337+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
b912730e
AM
4338+ err = -EACCES;
4339+
4340+ return err;
4341+}
4342+
1facf9fc 4343+/* ---------------------------------------------------------------------- */
4344+
4345+/* branch.c */
4346+struct au_sbinfo;
4347+void au_br_free(struct au_sbinfo *sinfo);
4348+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4349+struct au_opt_add;
4350+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4351+struct au_opt_del;
4352+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4353+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4354+#ifdef CONFIG_COMPAT
4355+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4356+#endif
1facf9fc 4357+struct au_opt_mod;
4358+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4359+ int *do_refresh);
076b876e
AM
4360+struct aufs_stfs;
4361+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4362+
4363+/* xino.c */
4364+static const loff_t au_loff_max = LLONG_MAX;
4365+
4366+int au_xib_trunc(struct super_block *sb);
5527c038 4367+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
1facf9fc 4368+ loff_t *pos);
5527c038
JR
4369+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
4370+ size_t size, loff_t *pos);
1facf9fc 4371+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4372+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4373+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4374+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4375+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4376+ ino_t ino);
4377+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4378+ ino_t *ino);
4379+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4380+ struct file *base_file, int do_test);
4381+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4382+
4383+struct au_opt_xino;
4384+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4385+void au_xino_clr(struct super_block *sb);
4386+struct file *au_xino_def(struct super_block *sb);
4387+int au_xino_path(struct seq_file *seq, struct file *file);
4388+
4389+/* ---------------------------------------------------------------------- */
4390+
4391+/* Superblock to branch */
4392+static inline
4393+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4394+{
4395+ return au_sbr(sb, bindex)->br_id;
4396+}
4397+
4398+static inline
4399+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4400+{
86dc4139 4401+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4402+}
4403+
4404+static inline
4405+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4406+{
86dc4139 4407+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4408+}
4409+
4410+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4411+{
e49829fe 4412+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 4413+}
4414+
4415+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4416+{
4417+ return au_sbr(sb, bindex)->br_perm;
4418+}
4419+
4420+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4421+{
4422+ return au_br_whable(au_sbr_perm(sb, bindex));
4423+}
4424+
4425+/* ---------------------------------------------------------------------- */
4426+
4427+/*
4428+ * wbr_wh_read_lock, wbr_wh_write_lock
4429+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4430+ */
4431+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4432+
dece6358
AM
4433+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4434+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4435+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4436+
076b876e
AM
4437+/* ---------------------------------------------------------------------- */
4438+
4439+#ifdef CONFIG_AUFS_FHSM
4440+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4441+{
4442+ mutex_init(&brfhsm->bf_lock);
4443+ brfhsm->bf_jiffy = 0;
4444+ brfhsm->bf_readable = 0;
4445+}
4446+
4447+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4448+{
4449+ mutex_destroy(&brfhsm->bf_lock);
4450+}
4451+#else
4452+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4453+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4454+#endif
4455+
1facf9fc 4456+#endif /* __KERNEL__ */
4457+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
4458diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4459--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
8cdd5066 4460+++ linux/fs/aufs/conf.mk 2016-02-28 11:26:32.569971135 +0100
c1595e42 4461@@ -0,0 +1,38 @@
4a4d8108
AM
4462+
4463+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4464+
4465+define AuConf
4466+ifdef ${1}
4467+AuConfStr += ${1}=${${1}}
4468+endif
4469+endef
4470+
b752ccd1 4471+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4472+ SBILIST \
7f207e10 4473+ HNOTIFY HFSNOTIFY \
4a4d8108 4474+ EXPORT INO_T_64 \
c1595e42 4475+ XATTR \
076b876e 4476+ FHSM \
4a4d8108 4477+ RDU \
4a4d8108
AM
4478+ SHWH \
4479+ BR_RAMFS \
4480+ BR_FUSE POLL \
4481+ BR_HFSPLUS \
4482+ BDEV_LOOP \
b752ccd1
AM
4483+ DEBUG MAGIC_SYSRQ
4484+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4485+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4486+
4487+AuConfName = ${obj}/conf.str
4488+${AuConfName}.tmp: FORCE
4489+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4490+${AuConfName}: ${AuConfName}.tmp
4491+ @diff -q $< $@ > /dev/null 2>&1 || { \
4492+ echo ' GEN ' $@; \
4493+ cp -p $< $@; \
4494+ }
4495+FORCE:
4496+clean-files += ${AuConfName} ${AuConfName}.tmp
4497+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4498+
4499+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
4500diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4501--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
4502+++ linux/fs/aufs/cpup.c 2016-02-28 11:26:32.569971135 +0100
4503@@ -0,0 +1,1379 @@
1facf9fc 4504+/*
8cdd5066 4505+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4506+ *
4507+ * This program, aufs is free software; you can redistribute it and/or modify
4508+ * it under the terms of the GNU General Public License as published by
4509+ * the Free Software Foundation; either version 2 of the License, or
4510+ * (at your option) any later version.
dece6358
AM
4511+ *
4512+ * This program is distributed in the hope that it will be useful,
4513+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4514+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4515+ * GNU General Public License for more details.
4516+ *
4517+ * You should have received a copy of the GNU General Public License
523b37e3 4518+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4519+ */
4520+
4521+/*
4522+ * copy-up functions, see wbr_policy.c for copy-down
4523+ */
4524+
4525+#include <linux/fs_stack.h>
dece6358 4526+#include <linux/mm.h>
8cdd5066 4527+#include <linux/task_work.h>
1facf9fc 4528+#include "aufs.h"
4529+
86dc4139 4530+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4531+{
4532+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4533+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4534+
86dc4139
AM
4535+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4536+
4537+ dst->i_flags |= iflags & ~mask;
1facf9fc 4538+ if (au_test_fs_notime(dst->i_sb))
4539+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4540+}
4541+
4542+void au_cpup_attr_timesizes(struct inode *inode)
4543+{
4544+ struct inode *h_inode;
4545+
4546+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4547+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4548+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4549+}
4550+
4551+void au_cpup_attr_nlink(struct inode *inode, int force)
4552+{
4553+ struct inode *h_inode;
4554+ struct super_block *sb;
4555+ aufs_bindex_t bindex, bend;
4556+
4557+ sb = inode->i_sb;
4558+ bindex = au_ibstart(inode);
4559+ h_inode = au_h_iptr(inode, bindex);
4560+ if (!force
4561+ && !S_ISDIR(h_inode->i_mode)
4562+ && au_opt_test(au_mntflags(sb), PLINK)
4563+ && au_plink_test(inode))
4564+ return;
4565+
7eafdf33
AM
4566+ /*
4567+ * 0 can happen in revalidating.
38d290e6
JR
4568+ * h_inode->i_mutex may not be held here, but it is harmless since once
4569+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4570+ * case.
4571+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4572+ * the incorrect link count.
7eafdf33 4573+ */
92d182d2 4574+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4575+
4576+ /*
4577+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4578+ * it may includes whplink directory.
4579+ */
4580+ if (S_ISDIR(h_inode->i_mode)) {
4581+ bend = au_ibend(inode);
4582+ for (bindex++; bindex <= bend; bindex++) {
4583+ h_inode = au_h_iptr(inode, bindex);
4584+ if (h_inode)
4585+ au_add_nlink(inode, h_inode);
4586+ }
4587+ }
4588+}
4589+
4590+void au_cpup_attr_changeable(struct inode *inode)
4591+{
4592+ struct inode *h_inode;
4593+
4594+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4595+ inode->i_mode = h_inode->i_mode;
4596+ inode->i_uid = h_inode->i_uid;
4597+ inode->i_gid = h_inode->i_gid;
4598+ au_cpup_attr_timesizes(inode);
86dc4139 4599+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4600+}
4601+
4602+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4603+{
4604+ struct au_iinfo *iinfo = au_ii(inode);
4605+
1308ab2a 4606+ IiMustWriteLock(inode);
4607+
1facf9fc 4608+ iinfo->ii_higen = h_inode->i_generation;
4609+ iinfo->ii_hsb1 = h_inode->i_sb;
4610+}
4611+
4612+void au_cpup_attr_all(struct inode *inode, int force)
4613+{
4614+ struct inode *h_inode;
4615+
4616+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4617+ au_cpup_attr_changeable(inode);
4618+ if (inode->i_nlink > 0)
4619+ au_cpup_attr_nlink(inode, force);
4620+ inode->i_rdev = h_inode->i_rdev;
4621+ inode->i_blkbits = h_inode->i_blkbits;
4622+ au_cpup_igen(inode, h_inode);
4623+}
4624+
4625+/* ---------------------------------------------------------------------- */
4626+
4627+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4628+
4629+/* keep the timestamps of the parent dir when cpup */
4630+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4631+ struct path *h_path)
4632+{
4633+ struct inode *h_inode;
4634+
4635+ dt->dt_dentry = dentry;
4636+ dt->dt_h_path = *h_path;
5527c038 4637+ h_inode = d_inode(h_path->dentry);
1facf9fc 4638+ dt->dt_atime = h_inode->i_atime;
4639+ dt->dt_mtime = h_inode->i_mtime;
4640+ /* smp_mb(); */
4641+}
4642+
4643+void au_dtime_revert(struct au_dtime *dt)
4644+{
4645+ struct iattr attr;
4646+ int err;
4647+
4648+ attr.ia_atime = dt->dt_atime;
4649+ attr.ia_mtime = dt->dt_mtime;
4650+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4651+ | ATTR_ATIME | ATTR_ATIME_SET;
4652+
523b37e3
AM
4653+ /* no delegation since this is a directory */
4654+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4655+ if (unlikely(err))
0c3ec466 4656+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4657+}
4658+
4659+/* ---------------------------------------------------------------------- */
4660+
86dc4139
AM
4661+/* internal use only */
4662+struct au_cpup_reg_attr {
4663+ int valid;
4664+ struct kstat st;
4665+ unsigned int iflags; /* inode->i_flags */
4666+};
4667+
1facf9fc 4668+static noinline_for_stack
86dc4139
AM
4669+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4670+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4671+{
c1595e42 4672+ int err, sbits, icex;
7e9cd9fe
AM
4673+ unsigned int mnt_flags;
4674+ unsigned char verbose;
1facf9fc 4675+ struct iattr ia;
4676+ struct path h_path;
1308ab2a 4677+ struct inode *h_isrc, *h_idst;
86dc4139 4678+ struct kstat *h_st;
c1595e42 4679+ struct au_branch *br;
1facf9fc 4680+
4681+ h_path.dentry = au_h_dptr(dst, bindex);
5527c038 4682+ h_idst = d_inode(h_path.dentry);
c1595e42
JR
4683+ br = au_sbr(dst->d_sb, bindex);
4684+ h_path.mnt = au_br_mnt(br);
5527c038 4685+ h_isrc = d_inode(h_src);
1308ab2a 4686+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4687+ | ATTR_ATIME | ATTR_MTIME
4688+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4689+ if (h_src_attr && h_src_attr->valid) {
4690+ h_st = &h_src_attr->st;
4691+ ia.ia_uid = h_st->uid;
4692+ ia.ia_gid = h_st->gid;
4693+ ia.ia_atime = h_st->atime;
4694+ ia.ia_mtime = h_st->mtime;
4695+ if (h_idst->i_mode != h_st->mode
4696+ && !S_ISLNK(h_idst->i_mode)) {
4697+ ia.ia_valid |= ATTR_MODE;
4698+ ia.ia_mode = h_st->mode;
4699+ }
4700+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4701+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4702+ } else {
4703+ ia.ia_uid = h_isrc->i_uid;
4704+ ia.ia_gid = h_isrc->i_gid;
4705+ ia.ia_atime = h_isrc->i_atime;
4706+ ia.ia_mtime = h_isrc->i_mtime;
4707+ if (h_idst->i_mode != h_isrc->i_mode
4708+ && !S_ISLNK(h_idst->i_mode)) {
4709+ ia.ia_valid |= ATTR_MODE;
4710+ ia.ia_mode = h_isrc->i_mode;
4711+ }
4712+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4713+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4714+ }
523b37e3
AM
4715+ /* no delegation since it is just created */
4716+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4717+
4718+ /* is this nfs only? */
4719+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4720+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4721+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4722+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4723+ }
4724+
c1595e42 4725+ icex = br->br_perm & AuBrAttr_ICEX;
7e9cd9fe
AM
4726+ if (!err) {
4727+ mnt_flags = au_mntflags(dst->d_sb);
4728+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4729+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4730+ }
c1595e42 4731+
1facf9fc 4732+ return err;
4733+}
4734+
4735+/* ---------------------------------------------------------------------- */
4736+
4737+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4738+ char *buf, unsigned long blksize)
4739+{
4740+ int err;
4741+ size_t sz, rbytes, wbytes;
4742+ unsigned char all_zero;
4743+ char *p, *zp;
4744+ struct mutex *h_mtx;
4745+ /* reduce stack usage */
4746+ struct iattr *ia;
4747+
4748+ zp = page_address(ZERO_PAGE(0));
4749+ if (unlikely(!zp))
4750+ return -ENOMEM; /* possible? */
4751+
4752+ err = 0;
4753+ all_zero = 0;
4754+ while (len) {
4755+ AuDbg("len %lld\n", len);
4756+ sz = blksize;
4757+ if (len < blksize)
4758+ sz = len;
4759+
4760+ rbytes = 0;
4761+ /* todo: signal_pending? */
4762+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4763+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4764+ err = rbytes;
4765+ }
4766+ if (unlikely(err < 0))
4767+ break;
4768+
4769+ all_zero = 0;
4770+ if (len >= rbytes && rbytes == blksize)
4771+ all_zero = !memcmp(buf, zp, rbytes);
4772+ if (!all_zero) {
4773+ wbytes = rbytes;
4774+ p = buf;
4775+ while (wbytes) {
4776+ size_t b;
4777+
4778+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4779+ err = b;
4780+ /* todo: signal_pending? */
4781+ if (unlikely(err == -EAGAIN || err == -EINTR))
4782+ continue;
4783+ if (unlikely(err < 0))
4784+ break;
4785+ wbytes -= b;
4786+ p += b;
4787+ }
392086de
AM
4788+ if (unlikely(err < 0))
4789+ break;
1facf9fc 4790+ } else {
4791+ loff_t res;
4792+
4793+ AuLabel(hole);
4794+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4795+ err = res;
4796+ if (unlikely(res < 0))
4797+ break;
4798+ }
4799+ len -= rbytes;
4800+ err = 0;
4801+ }
4802+
4803+ /* the last block may be a hole */
4804+ if (!err && all_zero) {
4805+ AuLabel(last hole);
4806+
4807+ err = 1;
2000de60 4808+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
1facf9fc 4809+ /* nfs requires this step to make last hole */
4810+ /* is this only nfs? */
4811+ do {
4812+ /* todo: signal_pending? */
4813+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4814+ } while (err == -EAGAIN || err == -EINTR);
4815+ if (err == 1)
4816+ dst->f_pos--;
4817+ }
4818+
4819+ if (err == 1) {
4820+ ia = (void *)buf;
4821+ ia->ia_size = dst->f_pos;
4822+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4823+ ia->ia_file = dst;
c06a8ce3 4824+ h_mtx = &file_inode(dst)->i_mutex;
1facf9fc 4825+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
523b37e3
AM
4826+ /* no delegation since it is just created */
4827+ err = vfsub_notify_change(&dst->f_path, ia,
4828+ /*delegated*/NULL);
1facf9fc 4829+ mutex_unlock(h_mtx);
4830+ }
4831+ }
4832+
4833+ return err;
4834+}
4835+
4836+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4837+{
4838+ int err;
4839+ unsigned long blksize;
4840+ unsigned char do_kfree;
4841+ char *buf;
4842+
4843+ err = -ENOMEM;
2000de60 4844+ blksize = dst->f_path.dentry->d_sb->s_blocksize;
1facf9fc 4845+ if (!blksize || PAGE_SIZE < blksize)
4846+ blksize = PAGE_SIZE;
4847+ AuDbg("blksize %lu\n", blksize);
4848+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4849+ if (do_kfree)
4850+ buf = kmalloc(blksize, GFP_NOFS);
4851+ else
4852+ buf = (void *)__get_free_page(GFP_NOFS);
4853+ if (unlikely(!buf))
4854+ goto out;
4855+
4856+ if (len > (1 << 22))
4857+ AuDbg("copying a large file %lld\n", (long long)len);
4858+
4859+ src->f_pos = 0;
4860+ dst->f_pos = 0;
4861+ err = au_do_copy_file(dst, src, len, buf, blksize);
4862+ if (do_kfree)
4863+ kfree(buf);
4864+ else
4865+ free_page((unsigned long)buf);
4866+
4f0767ce 4867+out:
1facf9fc 4868+ return err;
4869+}
4870+
4871+/*
4872+ * to support a sparse file which is opened with O_APPEND,
4873+ * we need to close the file.
4874+ */
c2b27bf2 4875+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 4876+{
4877+ int err, i;
4878+ enum { SRC, DST };
4879+ struct {
4880+ aufs_bindex_t bindex;
4881+ unsigned int flags;
4882+ struct dentry *dentry;
392086de 4883+ int force_wr;
1facf9fc 4884+ struct file *file;
523b37e3 4885+ void *label;
1facf9fc 4886+ } *f, file[] = {
4887+ {
c2b27bf2 4888+ .bindex = cpg->bsrc,
1facf9fc 4889+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 4890+ .label = &&out
1facf9fc 4891+ },
4892+ {
c2b27bf2 4893+ .bindex = cpg->bdst,
1facf9fc 4894+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 4895+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 4896+ .label = &&out_src
1facf9fc 4897+ }
4898+ };
4899+ struct super_block *sb;
8cdd5066 4900+ struct task_struct *tsk = current;
1facf9fc 4901+
4902+ /* bsrc branch can be ro/rw. */
c2b27bf2 4903+ sb = cpg->dentry->d_sb;
1facf9fc 4904+ f = file;
4905+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
4906+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
4907+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 4908+ /*file*/NULL, f->force_wr);
1facf9fc 4909+ err = PTR_ERR(f->file);
4910+ if (IS_ERR(f->file))
4911+ goto *f->label;
1facf9fc 4912+ }
4913+
4914+ /* try stopping to update while we copyup */
5527c038 4915+ IMustLock(d_inode(file[SRC].dentry));
c2b27bf2 4916+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
1facf9fc 4917+
8cdd5066
JR
4918+ /* i wonder if we had O_NO_DELAY_FPUT flag */
4919+ if (tsk->flags & PF_KTHREAD)
4920+ __fput_sync(file[DST].file);
4921+ else {
4922+ WARN(1, "%pD\nPlease report this warning to aufs-users ML",
4923+ file[DST].file);
4924+ fput(file[DST].file);
4925+ /*
4926+ * too bad.
4927+ * we have to call both since we don't know which place the file
4928+ * was added to.
4929+ */
4930+ task_work_run();
4931+ flush_delayed_fput();
4932+ }
1facf9fc 4933+ au_sbr_put(sb, file[DST].bindex);
523b37e3 4934+
4f0767ce 4935+out_src:
1facf9fc 4936+ fput(file[SRC].file);
4937+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 4938+out:
1facf9fc 4939+ return err;
4940+}
4941+
c2b27bf2 4942+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 4943+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4944+{
4945+ int err, rerr;
4946+ loff_t l;
86dc4139 4947+ struct path h_path;
38d290e6 4948+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 4949+
4950+ err = 0;
5527c038 4951+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
86dc4139 4952+ l = i_size_read(h_src_inode);
c2b27bf2
AM
4953+ if (cpg->len == -1 || l < cpg->len)
4954+ cpg->len = l;
4955+ if (cpg->len) {
86dc4139
AM
4956+ /* try stopping to update while we are referencing */
4957+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2 4958+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 4959+
c2b27bf2
AM
4960+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
4961+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139 4962+ h_src_attr->iflags = h_src_inode->i_flags;
5527c038
JR
4963+ if (!au_test_nfs(h_src_inode->i_sb))
4964+ err = vfs_getattr(&h_path, &h_src_attr->st);
4965+ else {
4966+ mutex_unlock(&h_src_inode->i_mutex);
4967+ err = vfs_getattr(&h_path, &h_src_attr->st);
4968+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
4969+ }
86dc4139
AM
4970+ if (unlikely(err)) {
4971+ mutex_unlock(&h_src_inode->i_mutex);
4972+ goto out;
4973+ }
4974+ h_src_attr->valid = 1;
c2b27bf2 4975+ err = au_cp_regular(cpg);
86dc4139 4976+ mutex_unlock(&h_src_inode->i_mutex);
c2b27bf2 4977+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
4978+ if (!err && rerr)
4979+ err = rerr;
1facf9fc 4980+ }
38d290e6
JR
4981+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
4982+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5527c038 4983+ h_dst_inode = d_inode(h_path.dentry);
38d290e6
JR
4984+ spin_lock(&h_dst_inode->i_lock);
4985+ h_dst_inode->i_state |= I_LINKABLE;
4986+ spin_unlock(&h_dst_inode->i_lock);
4987+ }
1facf9fc 4988+
4f0767ce 4989+out:
1facf9fc 4990+ return err;
4991+}
4992+
4993+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
4994+ struct inode *h_dir)
4995+{
4996+ int err, symlen;
4997+ mm_segment_t old_fs;
b752ccd1
AM
4998+ union {
4999+ char *k;
5000+ char __user *u;
5001+ } sym;
5527c038
JR
5002+ struct inode *h_inode = d_inode(h_src);
5003+ const struct inode_operations *h_iop = h_inode->i_op;
1facf9fc 5004+
5005+ err = -ENOSYS;
5527c038 5006+ if (unlikely(!h_iop->readlink))
1facf9fc 5007+ goto out;
5008+
5009+ err = -ENOMEM;
537831f9 5010+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 5011+ if (unlikely(!sym.k))
1facf9fc 5012+ goto out;
5013+
9dbd164d 5014+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 5015+ old_fs = get_fs();
5016+ set_fs(KERNEL_DS);
5527c038 5017+ symlen = h_iop->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 5018+ err = symlen;
5019+ set_fs(old_fs);
5020+
5021+ if (symlen > 0) {
b752ccd1
AM
5022+ sym.k[symlen] = 0;
5023+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 5024+ }
537831f9 5025+ free_page((unsigned long)sym.k);
1facf9fc 5026+
4f0767ce 5027+out:
1facf9fc 5028+ return err;
5029+}
5030+
8cdd5066
JR
5031+/*
5032+ * regardless 'acl' option, reset all ACL.
5033+ * All ACL will be copied up later from the original entry on the lower branch.
5034+ */
5035+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
5036+{
5037+ int err;
5038+ struct dentry *h_dentry;
5039+ struct inode *h_inode;
5040+
5041+ h_dentry = h_path->dentry;
5042+ h_inode = d_inode(h_dentry);
5043+ /* forget_all_cached_acls(h_inode)); */
5044+ err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
5045+ AuTraceErr(err);
5046+ if (err == -EOPNOTSUPP)
5047+ err = 0;
5048+ if (!err)
5049+ err = vfsub_acl_chmod(h_inode, mode);
5050+
5051+ AuTraceErr(err);
5052+ return err;
5053+}
5054+
5055+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
5056+ struct inode *h_dir, struct path *h_path)
5057+{
5058+ int err;
5059+ struct inode *dir, *inode;
5060+
5061+ err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
5062+ AuTraceErr(err);
5063+ if (err == -EOPNOTSUPP)
5064+ err = 0;
5065+ if (unlikely(err))
5066+ goto out;
5067+
5068+ /*
5069+ * strange behaviour from the users view,
5070+ * particularry setattr case
5071+ */
5072+ dir = d_inode(dst_parent);
5073+ if (au_ibstart(dir) == cpg->bdst)
5074+ au_cpup_attr_nlink(dir, /*force*/1);
5075+ inode = d_inode(cpg->dentry);
5076+ au_cpup_attr_nlink(inode, /*force*/1);
5077+
5078+out:
5079+ return err;
5080+}
5081+
1facf9fc 5082+static noinline_for_stack
c2b27bf2 5083+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 5084+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5085+{
5086+ int err;
5087+ umode_t mode;
5088+ unsigned int mnt_flags;
076b876e 5089+ unsigned char isdir, isreg, force;
c2b27bf2 5090+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5091+ struct au_dtime dt;
5092+ struct path h_path;
5093+ struct dentry *h_src, *h_dst, *h_parent;
8cdd5066 5094+ struct inode *h_inode, *h_dir;
1facf9fc 5095+ struct super_block *sb;
5096+
5097+ /* bsrc branch can be ro/rw. */
c2b27bf2 5098+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038
JR
5099+ h_inode = d_inode(h_src);
5100+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
1facf9fc 5101+
5102+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
5103+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5104+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
5105+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5106+ AUFS_WH_PFX_LEN));
1facf9fc 5107+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5108+ h_dir = d_inode(h_parent);
1facf9fc 5109+ IMustLock(h_dir);
5110+ AuDebugOn(h_parent != h_dst->d_parent);
5111+
c2b27bf2
AM
5112+ sb = cpg->dentry->d_sb;
5113+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 5114+ if (do_dt) {
5115+ h_path.dentry = h_parent;
5116+ au_dtime_store(&dt, dst_parent, &h_path);
5117+ }
5118+ h_path.dentry = h_dst;
5119+
076b876e 5120+ isreg = 0;
1facf9fc 5121+ isdir = 0;
5122+ mode = h_inode->i_mode;
5123+ switch (mode & S_IFMT) {
5124+ case S_IFREG:
076b876e 5125+ isreg = 1;
8cdd5066 5126+ err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
b4510431 5127+ /*want_excl*/true);
1facf9fc 5128+ if (!err)
c2b27bf2 5129+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 5130+ break;
5131+ case S_IFDIR:
5132+ isdir = 1;
5133+ err = vfsub_mkdir(h_dir, &h_path, mode);
8cdd5066
JR
5134+ if (!err)
5135+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
1facf9fc 5136+ break;
5137+ case S_IFLNK:
5138+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5139+ break;
5140+ case S_IFCHR:
5141+ case S_IFBLK:
5142+ AuDebugOn(!capable(CAP_MKNOD));
5143+ /*FALLTHROUGH*/
5144+ case S_IFIFO:
5145+ case S_IFSOCK:
5146+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5147+ break;
5148+ default:
5149+ AuIOErr("Unknown inode type 0%o\n", mode);
5150+ err = -EIO;
5151+ }
8cdd5066
JR
5152+ if (!err)
5153+ err = au_reset_acl(h_dir, &h_path, mode);
1facf9fc 5154+
5155+ mnt_flags = au_mntflags(sb);
5156+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5157+ && !isdir
5158+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
5159+ && (h_inode->i_nlink == 1
5160+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 5161+ /* todo: unnecessary? */
5527c038 5162+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
c2b27bf2
AM
5163+ && cpg->bdst < cpg->bsrc
5164+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5165+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 5166+ /* ignore this error */
5167+
076b876e
AM
5168+ if (!err) {
5169+ force = 0;
5170+ if (isreg) {
5171+ force = !!cpg->len;
5172+ if (cpg->len == -1)
5173+ force = !!i_size_read(h_inode);
5174+ }
5175+ au_fhsm_wrote(sb, cpg->bdst, force);
5176+ }
5177+
1facf9fc 5178+ if (do_dt)
5179+ au_dtime_revert(&dt);
5180+ return err;
5181+}
5182+
392086de 5183+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
5184+{
5185+ int err;
392086de 5186+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 5187+ struct inode *h_dir;
392086de 5188+ aufs_bindex_t bdst;
86dc4139 5189+
392086de
AM
5190+ dentry = cpg->dentry;
5191+ bdst = cpg->bdst;
5192+ h_dentry = au_h_dptr(dentry, bdst);
5193+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5194+ dget(h_dentry);
5195+ au_set_h_dptr(dentry, bdst, NULL);
5196+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5197+ if (!err)
5198+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 5199+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
5200+ } else {
5201+ err = 0;
5202+ parent = dget_parent(dentry);
5203+ h_parent = au_h_dptr(parent, bdst);
5204+ dput(parent);
5205+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5206+ if (IS_ERR(h_path->dentry))
5207+ err = PTR_ERR(h_path->dentry);
86dc4139 5208+ }
392086de
AM
5209+ if (unlikely(err))
5210+ goto out;
86dc4139 5211+
86dc4139 5212+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 5213+ h_dir = d_inode(h_parent);
86dc4139 5214+ IMustLock(h_dir);
523b37e3
AM
5215+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5216+ /* no delegation since it is just created */
5217+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL);
86dc4139
AM
5218+ dput(h_path->dentry);
5219+
5220+out:
5221+ return err;
5222+}
5223+
1facf9fc 5224+/*
5225+ * copyup the @dentry from @bsrc to @bdst.
5226+ * the caller must set the both of lower dentries.
5227+ * @len is for truncating when it is -1 copyup the entire file.
5228+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5229+ * basic->bsrc can be larger than basic->bdst.
1facf9fc 5230+ */
c2b27bf2 5231+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5232+{
5233+ int err, rerr;
5234+ aufs_bindex_t old_ibstart;
5235+ unsigned char isdir, plink;
1facf9fc 5236+ struct dentry *h_src, *h_dst, *h_parent;
5527c038 5237+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
1facf9fc 5238+ struct super_block *sb;
86dc4139 5239+ struct au_branch *br;
c2b27bf2
AM
5240+ /* to reuduce stack size */
5241+ struct {
5242+ struct au_dtime dt;
5243+ struct path h_path;
5244+ struct au_cpup_reg_attr h_src_attr;
5245+ } *a;
1facf9fc 5246+
c2b27bf2
AM
5247+ err = -ENOMEM;
5248+ a = kmalloc(sizeof(*a), GFP_NOFS);
5249+ if (unlikely(!a))
5250+ goto out;
5251+ a->h_src_attr.valid = 0;
1facf9fc 5252+
c2b27bf2
AM
5253+ sb = cpg->dentry->d_sb;
5254+ br = au_sbr(sb, cpg->bdst);
5255+ a->h_path.mnt = au_br_mnt(br);
5256+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5257+ h_parent = h_dst->d_parent; /* dir inode is locked */
5527c038 5258+ h_dir = d_inode(h_parent);
1facf9fc 5259+ IMustLock(h_dir);
5260+
c2b27bf2 5261+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5262+ inode = d_inode(cpg->dentry);
1facf9fc 5263+
5264+ if (!dst_parent)
c2b27bf2 5265+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5266+ else
5267+ dget(dst_parent);
5268+
5269+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5270+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5271+ if (dst_inode) {
5272+ if (unlikely(!plink)) {
5273+ err = -EIO;
027c5e7a
AM
5274+ AuIOErr("hi%lu(i%lu) exists on b%d "
5275+ "but plink is disabled\n",
c2b27bf2
AM
5276+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5277+ goto out_parent;
1facf9fc 5278+ }
5279+
5280+ if (dst_inode->i_nlink) {
c2b27bf2 5281+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5282+
c2b27bf2 5283+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5284+ err = PTR_ERR(h_src);
5285+ if (IS_ERR(h_src))
c2b27bf2 5286+ goto out_parent;
5527c038 5287+ if (unlikely(d_is_negative(h_src))) {
1facf9fc 5288+ err = -EIO;
79b8bda9 5289+ AuIOErr("i%lu exists on b%d "
027c5e7a 5290+ "but not pseudo-linked\n",
79b8bda9 5291+ inode->i_ino, cpg->bdst);
1facf9fc 5292+ dput(h_src);
c2b27bf2 5293+ goto out_parent;
1facf9fc 5294+ }
5295+
5296+ if (do_dt) {
c2b27bf2
AM
5297+ a->h_path.dentry = h_parent;
5298+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5299+ }
86dc4139 5300+
c2b27bf2 5301+ a->h_path.dentry = h_dst;
523b37e3
AM
5302+ delegated = NULL;
5303+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5304+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5305+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5306+ if (do_dt)
c2b27bf2 5307+ au_dtime_revert(&a->dt);
523b37e3
AM
5308+ if (unlikely(err == -EWOULDBLOCK)) {
5309+ pr_warn("cannot retry for NFSv4 delegation"
5310+ " for an internal link\n");
5311+ iput(delegated);
5312+ }
1facf9fc 5313+ dput(h_src);
c2b27bf2 5314+ goto out_parent;
1facf9fc 5315+ } else
5316+ /* todo: cpup_wh_file? */
5317+ /* udba work */
4a4d8108 5318+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5319+ }
5320+
86dc4139 5321+ isdir = S_ISDIR(inode->i_mode);
1facf9fc 5322+ old_ibstart = au_ibstart(inode);
c2b27bf2 5323+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5324+ if (unlikely(err))
86dc4139 5325+ goto out_rev;
5527c038 5326+ dst_inode = d_inode(h_dst);
1facf9fc 5327+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
86dc4139 5328+ /* todo: necessary? */
c2b27bf2 5329+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5330+
c2b27bf2 5331+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5332+ if (unlikely(err)) {
5333+ /* todo: necessary? */
c2b27bf2 5334+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
86dc4139
AM
5335+ mutex_unlock(&dst_inode->i_mutex);
5336+ goto out_rev;
5337+ }
5338+
c2b27bf2 5339+ if (cpg->bdst < old_ibstart) {
86dc4139 5340+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5341+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5342+ if (unlikely(err)) {
c2b27bf2
AM
5343+ /* ignore an error */
5344+ /* au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5345+ mutex_unlock(&dst_inode->i_mutex);
5346+ goto out_rev;
4a4d8108 5347+ }
4a4d8108 5348+ }
c2b27bf2
AM
5349+ au_set_ibstart(inode, cpg->bdst);
5350+ } else
5351+ au_set_ibend(inode, cpg->bdst);
5352+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5353+ au_hi_flags(inode, isdir));
5354+
5355+ /* todo: necessary? */
c2b27bf2 5356+ /* err = au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5357+ mutex_unlock(&dst_inode->i_mutex);
5358+ if (unlikely(err))
5359+ goto out_rev;
5360+
5527c038 5361+ src_inode = d_inode(h_src);
86dc4139 5362+ if (!isdir
5527c038
JR
5363+ && (src_inode->i_nlink > 1
5364+ || src_inode->i_state & I_LINKABLE)
86dc4139 5365+ && plink)
c2b27bf2 5366+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5367+
c2b27bf2
AM
5368+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5369+ a->h_path.dentry = h_dst;
392086de 5370+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5371+ }
5372+ if (!err)
c2b27bf2 5373+ goto out_parent; /* success */
1facf9fc 5374+
5375+ /* revert */
4a4d8108 5376+out_rev:
c2b27bf2
AM
5377+ a->h_path.dentry = h_parent;
5378+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5379+ a->h_path.dentry = h_dst;
86dc4139 5380+ rerr = 0;
5527c038 5381+ if (d_is_positive(h_dst)) {
523b37e3
AM
5382+ if (!isdir) {
5383+ /* no delegation since it is just created */
5384+ rerr = vfsub_unlink(h_dir, &a->h_path,
5385+ /*delegated*/NULL, /*force*/0);
5386+ } else
c2b27bf2 5387+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5388+ }
c2b27bf2 5389+ au_dtime_revert(&a->dt);
1facf9fc 5390+ if (rerr) {
5391+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5392+ err = -EIO;
5393+ }
c2b27bf2 5394+out_parent:
1facf9fc 5395+ dput(dst_parent);
c2b27bf2
AM
5396+ kfree(a);
5397+out:
1facf9fc 5398+ return err;
5399+}
5400+
7e9cd9fe 5401+#if 0 /* reserved */
1facf9fc 5402+struct au_cpup_single_args {
5403+ int *errp;
c2b27bf2 5404+ struct au_cp_generic *cpg;
1facf9fc 5405+ struct dentry *dst_parent;
5406+};
5407+
5408+static void au_call_cpup_single(void *args)
5409+{
5410+ struct au_cpup_single_args *a = args;
86dc4139 5411+
c2b27bf2
AM
5412+ au_pin_hdir_acquire_nest(a->cpg->pin);
5413+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5414+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5415+}
c2b27bf2 5416+#endif
1facf9fc 5417+
53392da6
AM
5418+/*
5419+ * prevent SIGXFSZ in copy-up.
5420+ * testing CAP_MKNOD is for generic fs,
5421+ * but CAP_FSETID is for xfs only, currently.
5422+ */
86dc4139 5423+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5424+{
5425+ int do_sio;
86dc4139
AM
5426+ struct super_block *sb;
5427+ struct inode *h_dir;
53392da6
AM
5428+
5429+ do_sio = 0;
86dc4139 5430+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5431+ if (!au_wkq_test()
5432+ && (!au_sbi(sb)->si_plink_maint_pid
5433+ || au_plink_maint(sb, AuLock_NOPLM))) {
5434+ switch (mode & S_IFMT) {
5435+ case S_IFREG:
5436+ /* no condition about RLIMIT_FSIZE and the file size */
5437+ do_sio = 1;
5438+ break;
5439+ case S_IFCHR:
5440+ case S_IFBLK:
5441+ do_sio = !capable(CAP_MKNOD);
5442+ break;
5443+ }
5444+ if (!do_sio)
5445+ do_sio = ((mode & (S_ISUID | S_ISGID))
5446+ && !capable(CAP_FSETID));
86dc4139
AM
5447+ /* this workaround may be removed in the future */
5448+ if (!do_sio) {
5449+ h_dir = au_pinned_h_dir(pin);
5450+ do_sio = h_dir->i_mode & S_ISVTX;
5451+ }
53392da6
AM
5452+ }
5453+
5454+ return do_sio;
5455+}
5456+
7e9cd9fe 5457+#if 0 /* reserved */
c2b27bf2 5458+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5459+{
5460+ int err, wkq_err;
1facf9fc 5461+ struct dentry *h_dentry;
5462+
c2b27bf2 5463+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5527c038 5464+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
c2b27bf2 5465+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5466+ else {
5467+ struct au_cpup_single_args args = {
5468+ .errp = &err,
c2b27bf2
AM
5469+ .cpg = cpg,
5470+ .dst_parent = dst_parent
1facf9fc 5471+ };
5472+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5473+ if (unlikely(wkq_err))
5474+ err = wkq_err;
5475+ }
5476+
5477+ return err;
5478+}
c2b27bf2 5479+#endif
1facf9fc 5480+
5481+/*
5482+ * copyup the @dentry from the first active lower branch to @bdst,
5483+ * using au_cpup_single().
5484+ */
c2b27bf2 5485+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5486+{
5487+ int err;
c2b27bf2
AM
5488+ unsigned int flags_orig;
5489+ struct dentry *dentry;
5490+
5491+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5492+
c2b27bf2 5493+ dentry = cpg->dentry;
86dc4139 5494+ DiMustWriteLock(dentry);
1facf9fc 5495+
c2b27bf2 5496+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5497+ if (!err) {
c2b27bf2
AM
5498+ flags_orig = cpg->flags;
5499+ au_fset_cpup(cpg->flags, RENAME);
5500+ err = au_cpup_single(cpg, NULL);
5501+ cpg->flags = flags_orig;
1facf9fc 5502+ if (!err)
5503+ return 0; /* success */
5504+
5505+ /* revert */
c2b27bf2
AM
5506+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5507+ au_set_dbstart(dentry, cpg->bsrc);
1facf9fc 5508+ }
5509+
5510+ return err;
5511+}
5512+
5513+struct au_cpup_simple_args {
5514+ int *errp;
c2b27bf2 5515+ struct au_cp_generic *cpg;
1facf9fc 5516+};
5517+
5518+static void au_call_cpup_simple(void *args)
5519+{
5520+ struct au_cpup_simple_args *a = args;
86dc4139 5521+
c2b27bf2
AM
5522+ au_pin_hdir_acquire_nest(a->cpg->pin);
5523+ *a->errp = au_cpup_simple(a->cpg);
5524+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5525+}
5526+
c2b27bf2 5527+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5528+{
5529+ int err, wkq_err;
c2b27bf2
AM
5530+ struct dentry *dentry, *parent;
5531+ struct file *h_file;
1facf9fc 5532+ struct inode *h_dir;
5533+
c2b27bf2
AM
5534+ dentry = cpg->dentry;
5535+ h_file = NULL;
5536+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5537+ AuDebugOn(cpg->bsrc < 0);
392086de 5538+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5539+ err = PTR_ERR(h_file);
5540+ if (IS_ERR(h_file))
5541+ goto out;
5542+ }
5543+
1facf9fc 5544+ parent = dget_parent(dentry);
5527c038 5545+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
53392da6 5546+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
5527c038 5547+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5548+ err = au_cpup_simple(cpg);
1facf9fc 5549+ else {
5550+ struct au_cpup_simple_args args = {
5551+ .errp = &err,
c2b27bf2 5552+ .cpg = cpg
1facf9fc 5553+ };
5554+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5555+ if (unlikely(wkq_err))
5556+ err = wkq_err;
5557+ }
5558+
5559+ dput(parent);
c2b27bf2
AM
5560+ if (h_file)
5561+ au_h_open_post(dentry, cpg->bsrc, h_file);
5562+
5563+out:
1facf9fc 5564+ return err;
5565+}
5566+
c2b27bf2 5567+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5568+{
c2b27bf2
AM
5569+ aufs_bindex_t bsrc, bend;
5570+ struct dentry *dentry, *h_dentry;
367653fa 5571+
c2b27bf2
AM
5572+ if (cpg->bsrc < 0) {
5573+ dentry = cpg->dentry;
5574+ bend = au_dbend(dentry);
5575+ for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
5576+ h_dentry = au_h_dptr(dentry, bsrc);
5577+ if (h_dentry) {
5527c038 5578+ AuDebugOn(d_is_negative(h_dentry));
c2b27bf2
AM
5579+ break;
5580+ }
5581+ }
5582+ AuDebugOn(bsrc > bend);
5583+ cpg->bsrc = bsrc;
367653fa 5584+ }
c2b27bf2
AM
5585+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5586+ return au_do_sio_cpup_simple(cpg);
5587+}
367653fa 5588+
c2b27bf2
AM
5589+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5590+{
5591+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5592+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5593+}
5594+
1facf9fc 5595+/* ---------------------------------------------------------------------- */
5596+
5597+/*
5598+ * copyup the deleted file for writing.
5599+ */
c2b27bf2
AM
5600+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5601+ struct file *file)
1facf9fc 5602+{
5603+ int err;
c2b27bf2
AM
5604+ unsigned int flags_orig;
5605+ aufs_bindex_t bsrc_orig;
1facf9fc 5606+ struct dentry *h_d_dst, *h_d_start;
c2b27bf2 5607+ struct au_dinfo *dinfo;
4a4d8108 5608+ struct au_hdentry *hdp;
1facf9fc 5609+
c2b27bf2 5610+ dinfo = au_di(cpg->dentry);
1308ab2a 5611+ AuRwMustWriteLock(&dinfo->di_rwsem);
5612+
c2b27bf2
AM
5613+ bsrc_orig = cpg->bsrc;
5614+ cpg->bsrc = dinfo->di_bstart;
4a4d8108 5615+ hdp = dinfo->di_hdentry;
c2b27bf2
AM
5616+ h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
5617+ dinfo->di_bstart = cpg->bdst;
5618+ hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
86dc4139 5619+ h_d_start = NULL;
027c5e7a 5620+ if (file) {
c2b27bf2 5621+ h_d_start = hdp[0 + cpg->bsrc].hd_dentry;
2000de60 5622+ hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_path.dentry;
027c5e7a 5623+ }
c2b27bf2
AM
5624+ flags_orig = cpg->flags;
5625+ cpg->flags = !AuCpup_DTIME;
5626+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5627+ cpg->flags = flags_orig;
027c5e7a
AM
5628+ if (file) {
5629+ if (!err)
5630+ err = au_reopen_nondir(file);
c2b27bf2 5631+ hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
1facf9fc 5632+ }
c2b27bf2
AM
5633+ hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
5634+ dinfo->di_bstart = cpg->bsrc;
5635+ cpg->bsrc = bsrc_orig;
1facf9fc 5636+
5637+ return err;
5638+}
5639+
c2b27bf2 5640+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5641+{
5642+ int err;
c2b27bf2 5643+ aufs_bindex_t bdst;
1facf9fc 5644+ struct au_dtime dt;
c2b27bf2 5645+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5646+ struct au_branch *br;
5647+ struct path h_path;
5648+
c2b27bf2
AM
5649+ dentry = cpg->dentry;
5650+ bdst = cpg->bdst;
1facf9fc 5651+ br = au_sbr(dentry->d_sb, bdst);
5652+ parent = dget_parent(dentry);
5653+ h_parent = au_h_dptr(parent, bdst);
5654+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5655+ err = PTR_ERR(wh_dentry);
5656+ if (IS_ERR(wh_dentry))
5657+ goto out;
5658+
5659+ h_path.dentry = h_parent;
86dc4139 5660+ h_path.mnt = au_br_mnt(br);
1facf9fc 5661+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5662+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5663+ if (unlikely(err))
5664+ goto out_wh;
5665+
5666+ dget(wh_dentry);
5667+ h_path.dentry = wh_dentry;
2000de60 5668+ if (!d_is_dir(wh_dentry)) {
523b37e3 5669+ /* no delegation since it is just created */
5527c038 5670+ err = vfsub_unlink(d_inode(h_parent), &h_path,
523b37e3
AM
5671+ /*delegated*/NULL, /*force*/0);
5672+ } else
5527c038 5673+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
1facf9fc 5674+ if (unlikely(err)) {
523b37e3
AM
5675+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5676+ wh_dentry, err);
1facf9fc 5677+ err = -EIO;
5678+ }
5679+ au_dtime_revert(&dt);
5527c038 5680+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
1facf9fc 5681+
4f0767ce 5682+out_wh:
1facf9fc 5683+ dput(wh_dentry);
4f0767ce 5684+out:
1facf9fc 5685+ dput(parent);
5686+ return err;
5687+}
5688+
5689+struct au_cpup_wh_args {
5690+ int *errp;
c2b27bf2 5691+ struct au_cp_generic *cpg;
1facf9fc 5692+ struct file *file;
5693+};
5694+
5695+static void au_call_cpup_wh(void *args)
5696+{
5697+ struct au_cpup_wh_args *a = args;
86dc4139 5698+
c2b27bf2
AM
5699+ au_pin_hdir_acquire_nest(a->cpg->pin);
5700+ *a->errp = au_cpup_wh(a->cpg, a->file);
5701+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5702+}
5703+
c2b27bf2 5704+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5705+{
5706+ int err, wkq_err;
c2b27bf2 5707+ aufs_bindex_t bdst;
c1595e42 5708+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5709+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5710+ struct au_wbr *wbr;
c2b27bf2 5711+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5712+
c2b27bf2
AM
5713+ dentry = cpg->dentry;
5714+ bdst = cpg->bdst;
1facf9fc 5715+ parent = dget_parent(dentry);
5527c038 5716+ dir = d_inode(parent);
1facf9fc 5717+ h_orph = NULL;
5718+ h_parent = NULL;
5719+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5720+ h_tmpdir = h_dir;
c2b27bf2 5721+ pin_orig = NULL;
1facf9fc 5722+ if (!h_dir->i_nlink) {
5723+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5724+ h_orph = wbr->wbr_orph;
5725+
5726+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5727+ au_set_h_dptr(parent, bdst, dget(h_orph));
5527c038 5728+ h_tmpdir = d_inode(h_orph);
1facf9fc 5729+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5730+
dece6358 5731+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
4a4d8108 5732+ /* todo: au_h_open_pre()? */
86dc4139 5733+
c2b27bf2 5734+ pin_orig = cpg->pin;
86dc4139 5735+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5736+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5737+ cpg->pin = &wh_pin;
1facf9fc 5738+ }
5739+
53392da6 5740+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
5527c038 5741+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
c2b27bf2 5742+ err = au_cpup_wh(cpg, file);
1facf9fc 5743+ else {
5744+ struct au_cpup_wh_args args = {
5745+ .errp = &err,
c2b27bf2
AM
5746+ .cpg = cpg,
5747+ .file = file
1facf9fc 5748+ };
5749+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5750+ if (unlikely(wkq_err))
5751+ err = wkq_err;
5752+ }
5753+
5754+ if (h_orph) {
5755+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 5756+ /* todo: au_h_open_post()? */
1facf9fc 5757+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5758+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5759+ AuDebugOn(!pin_orig);
5760+ cpg->pin = pin_orig;
1facf9fc 5761+ }
5762+ iput(h_dir);
5763+ dput(parent);
5764+
5765+ return err;
5766+}
5767+
5768+/* ---------------------------------------------------------------------- */
5769+
5770+/*
5771+ * generic routine for both of copy-up and copy-down.
5772+ */
5773+/* cf. revalidate function in file.c */
5774+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5775+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5776+ struct au_pin *pin,
1facf9fc 5777+ struct dentry *h_parent, void *arg),
5778+ void *arg)
5779+{
5780+ int err;
5781+ struct au_pin pin;
5527c038 5782+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
1facf9fc 5783+
5784+ err = 0;
5785+ parent = dget_parent(dentry);
5786+ if (IS_ROOT(parent))
5787+ goto out;
5788+
5789+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5790+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5791+
5792+ /* do not use au_dpage */
5793+ real_parent = parent;
5794+ while (1) {
5795+ dput(parent);
5796+ parent = dget_parent(dentry);
5797+ h_parent = au_h_dptr(parent, bdst);
5798+ if (h_parent)
5799+ goto out; /* success */
5800+
5801+ /* find top dir which is necessary to cpup */
5802+ do {
5803+ d = parent;
5804+ dput(parent);
5805+ parent = dget_parent(d);
5806+ di_read_lock_parent3(parent, !AuLock_IR);
5807+ h_parent = au_h_dptr(parent, bdst);
5808+ di_read_unlock(parent, !AuLock_IR);
5809+ } while (!h_parent);
5810+
5811+ if (d != real_parent)
5812+ di_write_lock_child3(d);
5813+
5814+ /* somebody else might create while we were sleeping */
5527c038
JR
5815+ h_dentry = au_h_dptr(d, bdst);
5816+ if (!h_dentry || d_is_negative(h_dentry)) {
5817+ if (h_dentry)
1facf9fc 5818+ au_update_dbstart(d);
5819+
5820+ au_pin_set_dentry(&pin, d);
5821+ err = au_do_pin(&pin);
5822+ if (!err) {
86dc4139 5823+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5824+ au_unpin(&pin);
5825+ }
5826+ }
5827+
5828+ if (d != real_parent)
5829+ di_write_unlock(d);
5830+ if (unlikely(err))
5831+ break;
5832+ }
5833+
4f0767ce 5834+out:
1facf9fc 5835+ dput(parent);
5836+ return err;
5837+}
5838+
5839+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5840+ struct au_pin *pin,
2000de60 5841+ struct dentry *h_parent __maybe_unused,
1facf9fc 5842+ void *arg __maybe_unused)
5843+{
c2b27bf2
AM
5844+ struct au_cp_generic cpg = {
5845+ .dentry = dentry,
5846+ .bdst = bdst,
5847+ .bsrc = -1,
5848+ .len = 0,
5849+ .pin = pin,
5850+ .flags = AuCpup_DTIME
5851+ };
5852+ return au_sio_cpup_simple(&cpg);
1facf9fc 5853+}
5854+
5855+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5856+{
5857+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
5858+}
5859+
5860+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5861+{
5862+ int err;
5863+ struct dentry *parent;
5864+ struct inode *dir;
5865+
5866+ parent = dget_parent(dentry);
5527c038 5867+ dir = d_inode(parent);
1facf9fc 5868+ err = 0;
5869+ if (au_h_iptr(dir, bdst))
5870+ goto out;
5871+
5872+ di_read_unlock(parent, AuLock_IR);
5873+ di_write_lock_parent(parent);
5874+ /* someone else might change our inode while we were sleeping */
5875+ if (!au_h_iptr(dir, bdst))
5876+ err = au_cpup_dirs(dentry, bdst);
5877+ di_downgrade_lock(parent, AuLock_IR);
5878+
4f0767ce 5879+out:
1facf9fc 5880+ dput(parent);
5881+ return err;
5882+}
7f207e10
AM
5883diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
5884--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 5885+++ linux/fs/aufs/cpup.h 2016-02-28 11:26:32.569971135 +0100
523b37e3 5886@@ -0,0 +1,94 @@
1facf9fc 5887+/*
8cdd5066 5888+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 5889+ *
5890+ * This program, aufs is free software; you can redistribute it and/or modify
5891+ * it under the terms of the GNU General Public License as published by
5892+ * the Free Software Foundation; either version 2 of the License, or
5893+ * (at your option) any later version.
dece6358
AM
5894+ *
5895+ * This program is distributed in the hope that it will be useful,
5896+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5897+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5898+ * GNU General Public License for more details.
5899+ *
5900+ * You should have received a copy of the GNU General Public License
523b37e3 5901+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5902+ */
5903+
5904+/*
5905+ * copy-up/down functions
5906+ */
5907+
5908+#ifndef __AUFS_CPUP_H__
5909+#define __AUFS_CPUP_H__
5910+
5911+#ifdef __KERNEL__
5912+
dece6358 5913+#include <linux/path.h>
1facf9fc 5914+
dece6358
AM
5915+struct inode;
5916+struct file;
86dc4139 5917+struct au_pin;
dece6358 5918+
86dc4139 5919+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 5920+void au_cpup_attr_timesizes(struct inode *inode);
5921+void au_cpup_attr_nlink(struct inode *inode, int force);
5922+void au_cpup_attr_changeable(struct inode *inode);
5923+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
5924+void au_cpup_attr_all(struct inode *inode, int force);
5925+
5926+/* ---------------------------------------------------------------------- */
5927+
c2b27bf2
AM
5928+struct au_cp_generic {
5929+ struct dentry *dentry;
5930+ aufs_bindex_t bdst, bsrc;
5931+ loff_t len;
5932+ struct au_pin *pin;
5933+ unsigned int flags;
5934+};
5935+
1facf9fc 5936+/* cpup flags */
392086de
AM
5937+#define AuCpup_DTIME 1 /* do dtime_store/revert */
5938+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
5939+ for link(2) */
5940+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
5941+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
5942+ cpup */
5943+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
5944+ existing entry */
5945+#define AuCpup_RWDST (1 << 5) /* force write target even if
5946+ the branch is marked as RO */
c2b27bf2 5947+
1facf9fc 5948+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
5949+#define au_fset_cpup(flags, name) \
5950+ do { (flags) |= AuCpup_##name; } while (0)
5951+#define au_fclr_cpup(flags, name) \
5952+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 5953+
5954+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
5955+int au_sio_cpup_simple(struct au_cp_generic *cpg);
5956+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
5957+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 5958+
5959+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5960+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5961+ struct au_pin *pin,
1facf9fc 5962+ struct dentry *h_parent, void *arg),
5963+ void *arg);
5964+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5965+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
5966+
5967+/* ---------------------------------------------------------------------- */
5968+
5969+/* keep timestamps when copyup */
5970+struct au_dtime {
5971+ struct dentry *dt_dentry;
5972+ struct path dt_h_path;
5973+ struct timespec dt_atime, dt_mtime;
5974+};
5975+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
5976+ struct path *h_path);
5977+void au_dtime_revert(struct au_dtime *dt);
5978+
5979+#endif /* __KERNEL__ */
5980+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
5981diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
5982--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 5983+++ linux/fs/aufs/dbgaufs.c 2016-02-28 11:26:32.569971135 +0100
523b37e3 5984@@ -0,0 +1,432 @@
1facf9fc 5985+/*
8cdd5066 5986+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 5987+ *
5988+ * This program, aufs is free software; you can redistribute it and/or modify
5989+ * it under the terms of the GNU General Public License as published by
5990+ * the Free Software Foundation; either version 2 of the License, or
5991+ * (at your option) any later version.
dece6358
AM
5992+ *
5993+ * This program is distributed in the hope that it will be useful,
5994+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5995+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5996+ * GNU General Public License for more details.
5997+ *
5998+ * You should have received a copy of the GNU General Public License
523b37e3 5999+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6000+ */
6001+
6002+/*
6003+ * debugfs interface
6004+ */
6005+
6006+#include <linux/debugfs.h>
6007+#include "aufs.h"
6008+
6009+#ifndef CONFIG_SYSFS
6010+#error DEBUG_FS depends upon SYSFS
6011+#endif
6012+
6013+static struct dentry *dbgaufs;
6014+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
6015+
6016+/* 20 is max digits length of ulong 64 */
6017+struct dbgaufs_arg {
6018+ int n;
6019+ char a[20 * 4];
6020+};
6021+
6022+/*
6023+ * common function for all XINO files
6024+ */
6025+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
6026+ struct file *file)
6027+{
6028+ kfree(file->private_data);
6029+ return 0;
6030+}
6031+
6032+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
6033+{
6034+ int err;
6035+ struct kstat st;
6036+ struct dbgaufs_arg *p;
6037+
6038+ err = -ENOMEM;
6039+ p = kmalloc(sizeof(*p), GFP_NOFS);
6040+ if (unlikely(!p))
6041+ goto out;
6042+
6043+ err = 0;
6044+ p->n = 0;
6045+ file->private_data = p;
6046+ if (!xf)
6047+ goto out;
6048+
c06a8ce3 6049+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 6050+ if (!err) {
6051+ if (do_fcnt)
6052+ p->n = snprintf
6053+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
6054+ (long)file_count(xf), st.blocks, st.blksize,
6055+ (long long)st.size);
6056+ else
6057+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
6058+ st.blocks, st.blksize,
6059+ (long long)st.size);
6060+ AuDebugOn(p->n >= sizeof(p->a));
6061+ } else {
6062+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
6063+ err = 0;
6064+ }
6065+
4f0767ce 6066+out:
1facf9fc 6067+ return err;
6068+
6069+}
6070+
6071+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
6072+ size_t count, loff_t *ppos)
6073+{
6074+ struct dbgaufs_arg *p;
6075+
6076+ p = file->private_data;
6077+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6078+}
6079+
6080+/* ---------------------------------------------------------------------- */
6081+
86dc4139
AM
6082+struct dbgaufs_plink_arg {
6083+ int n;
6084+ char a[];
6085+};
6086+
6087+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6088+ struct file *file)
6089+{
6090+ free_page((unsigned long)file->private_data);
6091+ return 0;
6092+}
6093+
6094+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6095+{
6096+ int err, i, limit;
6097+ unsigned long n, sum;
6098+ struct dbgaufs_plink_arg *p;
6099+ struct au_sbinfo *sbinfo;
6100+ struct super_block *sb;
6101+ struct au_sphlhead *sphl;
6102+
6103+ err = -ENOMEM;
6104+ p = (void *)get_zeroed_page(GFP_NOFS);
6105+ if (unlikely(!p))
6106+ goto out;
6107+
6108+ err = -EFBIG;
6109+ sbinfo = inode->i_private;
6110+ sb = sbinfo->si_sb;
6111+ si_noflush_read_lock(sb);
6112+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6113+ limit = PAGE_SIZE - sizeof(p->n);
6114+
6115+ /* the number of buckets */
6116+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6117+ p->n += n;
6118+ limit -= n;
6119+
6120+ sum = 0;
6121+ for (i = 0, sphl = sbinfo->si_plink;
6122+ i < AuPlink_NHASH;
6123+ i++, sphl++) {
6124+ n = au_sphl_count(sphl);
6125+ sum += n;
6126+
6127+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6128+ p->n += n;
6129+ limit -= n;
6130+ if (unlikely(limit <= 0))
6131+ goto out_free;
6132+ }
6133+ p->a[p->n - 1] = '\n';
6134+
6135+ /* the sum of plinks */
6136+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6137+ p->n += n;
6138+ limit -= n;
6139+ if (unlikely(limit <= 0))
6140+ goto out_free;
6141+ } else {
6142+#define str "1\n0\n0\n"
6143+ p->n = sizeof(str) - 1;
6144+ strcpy(p->a, str);
6145+#undef str
6146+ }
6147+ si_read_unlock(sb);
6148+
6149+ err = 0;
6150+ file->private_data = p;
6151+ goto out; /* success */
6152+
6153+out_free:
6154+ free_page((unsigned long)p);
6155+out:
6156+ return err;
6157+}
6158+
6159+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6160+ size_t count, loff_t *ppos)
6161+{
6162+ struct dbgaufs_plink_arg *p;
6163+
6164+ p = file->private_data;
6165+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6166+}
6167+
6168+static const struct file_operations dbgaufs_plink_fop = {
6169+ .owner = THIS_MODULE,
6170+ .open = dbgaufs_plink_open,
6171+ .release = dbgaufs_plink_release,
6172+ .read = dbgaufs_plink_read
6173+};
6174+
6175+/* ---------------------------------------------------------------------- */
6176+
1facf9fc 6177+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6178+{
6179+ int err;
6180+ struct au_sbinfo *sbinfo;
6181+ struct super_block *sb;
6182+
6183+ sbinfo = inode->i_private;
6184+ sb = sbinfo->si_sb;
6185+ si_noflush_read_lock(sb);
6186+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6187+ si_read_unlock(sb);
6188+ return err;
6189+}
6190+
6191+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 6192+ .owner = THIS_MODULE,
1facf9fc 6193+ .open = dbgaufs_xib_open,
6194+ .release = dbgaufs_xi_release,
6195+ .read = dbgaufs_xi_read
6196+};
6197+
6198+/* ---------------------------------------------------------------------- */
6199+
6200+#define DbgaufsXi_PREFIX "xi"
6201+
6202+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6203+{
6204+ int err;
6205+ long l;
6206+ struct au_sbinfo *sbinfo;
6207+ struct super_block *sb;
6208+ struct file *xf;
6209+ struct qstr *name;
6210+
6211+ err = -ENOENT;
6212+ xf = NULL;
2000de60 6213+ name = &file->f_path.dentry->d_name;
1facf9fc 6214+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6215+ || memcmp(name->name, DbgaufsXi_PREFIX,
6216+ sizeof(DbgaufsXi_PREFIX) - 1)))
6217+ goto out;
9dbd164d 6218+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6219+ if (unlikely(err))
6220+ goto out;
6221+
6222+ sbinfo = inode->i_private;
6223+ sb = sbinfo->si_sb;
6224+ si_noflush_read_lock(sb);
6225+ if (l <= au_sbend(sb)) {
6226+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6227+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6228+ } else
6229+ err = -ENOENT;
6230+ si_read_unlock(sb);
6231+
4f0767ce 6232+out:
1facf9fc 6233+ return err;
6234+}
6235+
6236+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6237+ .owner = THIS_MODULE,
1facf9fc 6238+ .open = dbgaufs_xino_open,
6239+ .release = dbgaufs_xi_release,
6240+ .read = dbgaufs_xi_read
6241+};
6242+
6243+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6244+{
6245+ aufs_bindex_t bend;
6246+ struct au_branch *br;
6247+ struct au_xino_file *xi;
6248+
6249+ if (!au_sbi(sb)->si_dbgaufs)
6250+ return;
6251+
6252+ bend = au_sbend(sb);
6253+ for (; bindex <= bend; bindex++) {
6254+ br = au_sbr(sb, bindex);
6255+ xi = &br->br_xino;
c06a8ce3
AM
6256+ debugfs_remove(xi->xi_dbgaufs);
6257+ xi->xi_dbgaufs = NULL;
1facf9fc 6258+ }
6259+}
6260+
6261+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6262+{
6263+ struct au_sbinfo *sbinfo;
6264+ struct dentry *parent;
6265+ struct au_branch *br;
6266+ struct au_xino_file *xi;
6267+ aufs_bindex_t bend;
6268+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6269+
6270+ sbinfo = au_sbi(sb);
6271+ parent = sbinfo->si_dbgaufs;
6272+ if (!parent)
6273+ return;
6274+
6275+ bend = au_sbend(sb);
6276+ for (; bindex <= bend; bindex++) {
6277+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6278+ br = au_sbr(sb, bindex);
6279+ xi = &br->br_xino;
6280+ AuDebugOn(xi->xi_dbgaufs);
6281+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6282+ sbinfo, &dbgaufs_xino_fop);
6283+ /* ignore an error */
6284+ if (unlikely(!xi->xi_dbgaufs))
6285+ AuWarn1("failed %s under debugfs\n", name);
6286+ }
6287+}
6288+
6289+/* ---------------------------------------------------------------------- */
6290+
6291+#ifdef CONFIG_AUFS_EXPORT
6292+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6293+{
6294+ int err;
6295+ struct au_sbinfo *sbinfo;
6296+ struct super_block *sb;
6297+
6298+ sbinfo = inode->i_private;
6299+ sb = sbinfo->si_sb;
6300+ si_noflush_read_lock(sb);
6301+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6302+ si_read_unlock(sb);
6303+ return err;
6304+}
6305+
6306+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6307+ .owner = THIS_MODULE,
1facf9fc 6308+ .open = dbgaufs_xigen_open,
6309+ .release = dbgaufs_xi_release,
6310+ .read = dbgaufs_xi_read
6311+};
6312+
6313+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6314+{
6315+ int err;
6316+
dece6358 6317+ /*
c1595e42 6318+ * This function is a dynamic '__init' function actually,
dece6358
AM
6319+ * so the tiny check for si_rwsem is unnecessary.
6320+ */
6321+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6322+
1facf9fc 6323+ err = -EIO;
6324+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6325+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6326+ &dbgaufs_xigen_fop);
6327+ if (sbinfo->si_dbgaufs_xigen)
6328+ err = 0;
6329+
6330+ return err;
6331+}
6332+#else
6333+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6334+{
6335+ return 0;
6336+}
6337+#endif /* CONFIG_AUFS_EXPORT */
6338+
6339+/* ---------------------------------------------------------------------- */
6340+
6341+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6342+{
dece6358 6343+ /*
7e9cd9fe 6344+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6345+ * so the tiny check for si_rwsem is unnecessary.
6346+ */
6347+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6348+
1facf9fc 6349+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6350+ sbinfo->si_dbgaufs = NULL;
6351+ kobject_put(&sbinfo->si_kobj);
6352+}
6353+
6354+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6355+{
6356+ int err;
6357+ char name[SysaufsSiNameLen];
6358+
dece6358 6359+ /*
c1595e42 6360+ * This function is a dynamic '__init' function actually,
dece6358
AM
6361+ * so the tiny check for si_rwsem is unnecessary.
6362+ */
6363+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6364+
1facf9fc 6365+ err = -ENOENT;
6366+ if (!dbgaufs) {
6367+ AuErr1("/debug/aufs is uninitialized\n");
6368+ goto out;
6369+ }
6370+
6371+ err = -EIO;
6372+ sysaufs_name(sbinfo, name);
6373+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6374+ if (unlikely(!sbinfo->si_dbgaufs))
6375+ goto out;
6376+ kobject_get(&sbinfo->si_kobj);
6377+
6378+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6379+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6380+ &dbgaufs_xib_fop);
6381+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6382+ goto out_dir;
6383+
86dc4139
AM
6384+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6385+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6386+ &dbgaufs_plink_fop);
6387+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6388+ goto out_dir;
6389+
1facf9fc 6390+ err = dbgaufs_xigen_init(sbinfo);
6391+ if (!err)
6392+ goto out; /* success */
6393+
4f0767ce 6394+out_dir:
1facf9fc 6395+ dbgaufs_si_fin(sbinfo);
4f0767ce 6396+out:
1facf9fc 6397+ return err;
6398+}
6399+
6400+/* ---------------------------------------------------------------------- */
6401+
6402+void dbgaufs_fin(void)
6403+{
6404+ debugfs_remove(dbgaufs);
6405+}
6406+
6407+int __init dbgaufs_init(void)
6408+{
6409+ int err;
6410+
6411+ err = -EIO;
6412+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6413+ if (dbgaufs)
6414+ err = 0;
6415+ return err;
6416+}
7f207e10
AM
6417diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6418--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 6419+++ linux/fs/aufs/dbgaufs.h 2016-02-28 11:26:32.569971135 +0100
523b37e3 6420@@ -0,0 +1,48 @@
1facf9fc 6421+/*
8cdd5066 6422+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6423+ *
6424+ * This program, aufs is free software; you can redistribute it and/or modify
6425+ * it under the terms of the GNU General Public License as published by
6426+ * the Free Software Foundation; either version 2 of the License, or
6427+ * (at your option) any later version.
dece6358
AM
6428+ *
6429+ * This program is distributed in the hope that it will be useful,
6430+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6431+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6432+ * GNU General Public License for more details.
6433+ *
6434+ * You should have received a copy of the GNU General Public License
523b37e3 6435+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6436+ */
6437+
6438+/*
6439+ * debugfs interface
6440+ */
6441+
6442+#ifndef __DBGAUFS_H__
6443+#define __DBGAUFS_H__
6444+
6445+#ifdef __KERNEL__
6446+
dece6358 6447+struct super_block;
1facf9fc 6448+struct au_sbinfo;
dece6358 6449+
1facf9fc 6450+#ifdef CONFIG_DEBUG_FS
6451+/* dbgaufs.c */
6452+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6453+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6454+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6455+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6456+void dbgaufs_fin(void);
6457+int __init dbgaufs_init(void);
1facf9fc 6458+#else
4a4d8108
AM
6459+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6460+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6461+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6462+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6463+AuStubVoid(dbgaufs_fin, void)
6464+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6465+#endif /* CONFIG_DEBUG_FS */
6466+
6467+#endif /* __KERNEL__ */
6468+#endif /* __DBGAUFS_H__ */
7f207e10
AM
6469diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6470--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 6471+++ linux/fs/aufs/dcsub.c 2016-02-28 11:26:32.569971135 +0100
c1595e42 6472@@ -0,0 +1,224 @@
1facf9fc 6473+/*
8cdd5066 6474+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6475+ *
6476+ * This program, aufs is free software; you can redistribute it and/or modify
6477+ * it under the terms of the GNU General Public License as published by
6478+ * the Free Software Foundation; either version 2 of the License, or
6479+ * (at your option) any later version.
dece6358
AM
6480+ *
6481+ * This program is distributed in the hope that it will be useful,
6482+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6483+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6484+ * GNU General Public License for more details.
6485+ *
6486+ * You should have received a copy of the GNU General Public License
523b37e3 6487+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6488+ */
6489+
6490+/*
6491+ * sub-routines for dentry cache
6492+ */
6493+
6494+#include "aufs.h"
6495+
6496+static void au_dpage_free(struct au_dpage *dpage)
6497+{
6498+ int i;
6499+ struct dentry **p;
6500+
6501+ p = dpage->dentries;
6502+ for (i = 0; i < dpage->ndentry; i++)
6503+ dput(*p++);
6504+ free_page((unsigned long)dpage->dentries);
6505+}
6506+
6507+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6508+{
6509+ int err;
6510+ void *p;
6511+
6512+ err = -ENOMEM;
6513+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6514+ if (unlikely(!dpages->dpages))
6515+ goto out;
6516+
6517+ p = (void *)__get_free_page(gfp);
6518+ if (unlikely(!p))
6519+ goto out_dpages;
6520+
6521+ dpages->dpages[0].ndentry = 0;
6522+ dpages->dpages[0].dentries = p;
6523+ dpages->ndpage = 1;
6524+ return 0; /* success */
6525+
4f0767ce 6526+out_dpages:
1facf9fc 6527+ kfree(dpages->dpages);
4f0767ce 6528+out:
1facf9fc 6529+ return err;
6530+}
6531+
6532+void au_dpages_free(struct au_dcsub_pages *dpages)
6533+{
6534+ int i;
6535+ struct au_dpage *p;
6536+
6537+ p = dpages->dpages;
6538+ for (i = 0; i < dpages->ndpage; i++)
6539+ au_dpage_free(p++);
6540+ kfree(dpages->dpages);
6541+}
6542+
6543+static int au_dpages_append(struct au_dcsub_pages *dpages,
6544+ struct dentry *dentry, gfp_t gfp)
6545+{
6546+ int err, sz;
6547+ struct au_dpage *dpage;
6548+ void *p;
6549+
6550+ dpage = dpages->dpages + dpages->ndpage - 1;
6551+ sz = PAGE_SIZE / sizeof(dentry);
6552+ if (unlikely(dpage->ndentry >= sz)) {
6553+ AuLabel(new dpage);
6554+ err = -ENOMEM;
6555+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6556+ p = au_kzrealloc(dpages->dpages, sz,
6557+ sz + sizeof(*dpages->dpages), gfp);
6558+ if (unlikely(!p))
6559+ goto out;
6560+
6561+ dpages->dpages = p;
6562+ dpage = dpages->dpages + dpages->ndpage;
6563+ p = (void *)__get_free_page(gfp);
6564+ if (unlikely(!p))
6565+ goto out;
6566+
6567+ dpage->ndentry = 0;
6568+ dpage->dentries = p;
6569+ dpages->ndpage++;
6570+ }
6571+
c1595e42 6572+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6573+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6574+ return 0; /* success */
6575+
4f0767ce 6576+out:
1facf9fc 6577+ return err;
6578+}
6579+
c1595e42
JR
6580+/* todo: BAD approach */
6581+/* copied from linux/fs/dcache.c */
6582+enum d_walk_ret {
6583+ D_WALK_CONTINUE,
6584+ D_WALK_QUIT,
6585+ D_WALK_NORETRY,
6586+ D_WALK_SKIP,
6587+};
6588+
6589+extern void d_walk(struct dentry *parent, void *data,
6590+ enum d_walk_ret (*enter)(void *, struct dentry *),
6591+ void (*finish)(void *));
6592+
6593+struct ac_dpages_arg {
1facf9fc 6594+ int err;
c1595e42
JR
6595+ struct au_dcsub_pages *dpages;
6596+ struct super_block *sb;
6597+ au_dpages_test test;
6598+ void *arg;
6599+};
1facf9fc 6600+
c1595e42
JR
6601+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6602+{
6603+ enum d_walk_ret ret;
6604+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6605+
c1595e42
JR
6606+ ret = D_WALK_CONTINUE;
6607+ if (dentry->d_sb == arg->sb
6608+ && !IS_ROOT(dentry)
6609+ && au_dcount(dentry) > 0
6610+ && au_di(dentry)
6611+ && (!arg->test || arg->test(dentry, arg->arg))) {
6612+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6613+ if (unlikely(arg->err))
6614+ ret = D_WALK_QUIT;
1facf9fc 6615+ }
6616+
c1595e42
JR
6617+ return ret;
6618+}
027c5e7a 6619+
c1595e42
JR
6620+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6621+ au_dpages_test test, void *arg)
6622+{
6623+ struct ac_dpages_arg args = {
6624+ .err = 0,
6625+ .dpages = dpages,
6626+ .sb = root->d_sb,
6627+ .test = test,
6628+ .arg = arg
6629+ };
027c5e7a 6630+
c1595e42
JR
6631+ d_walk(root, &args, au_call_dpages_append, NULL);
6632+
6633+ return args.err;
1facf9fc 6634+}
6635+
6636+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6637+ int do_include, au_dpages_test test, void *arg)
6638+{
6639+ int err;
6640+
6641+ err = 0;
027c5e7a
AM
6642+ write_seqlock(&rename_lock);
6643+ spin_lock(&dentry->d_lock);
6644+ if (do_include
c1595e42 6645+ && au_dcount(dentry) > 0
027c5e7a 6646+ && (!test || test(dentry, arg)))
1facf9fc 6647+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6648+ spin_unlock(&dentry->d_lock);
6649+ if (unlikely(err))
6650+ goto out;
6651+
6652+ /*
523b37e3 6653+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6654+ * mount
6655+ */
1facf9fc 6656+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6657+ dentry = dentry->d_parent; /* rename_lock is locked */
6658+ spin_lock(&dentry->d_lock);
c1595e42 6659+ if (au_dcount(dentry) > 0
027c5e7a 6660+ && (!test || test(dentry, arg)))
1facf9fc 6661+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6662+ spin_unlock(&dentry->d_lock);
6663+ if (unlikely(err))
6664+ break;
1facf9fc 6665+ }
6666+
4f0767ce 6667+out:
027c5e7a 6668+ write_sequnlock(&rename_lock);
1facf9fc 6669+ return err;
6670+}
6671+
027c5e7a
AM
6672+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6673+{
6674+ return au_di(dentry) && dentry->d_sb == arg;
6675+}
6676+
6677+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6678+ struct dentry *dentry, int do_include)
6679+{
6680+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6681+ au_dcsub_dpages_aufs, dentry->d_sb);
6682+}
6683+
4a4d8108 6684+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6685+{
4a4d8108
AM
6686+ struct path path[2] = {
6687+ {
6688+ .dentry = d1
6689+ },
6690+ {
6691+ .dentry = d2
6692+ }
6693+ };
1facf9fc 6694+
4a4d8108 6695+ return path_is_under(path + 0, path + 1);
1facf9fc 6696+}
7f207e10
AM
6697diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6698--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 6699+++ linux/fs/aufs/dcsub.h 2016-02-28 11:26:32.569971135 +0100
5527c038 6700@@ -0,0 +1,136 @@
1facf9fc 6701+/*
8cdd5066 6702+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6703+ *
6704+ * This program, aufs is free software; you can redistribute it and/or modify
6705+ * it under the terms of the GNU General Public License as published by
6706+ * the Free Software Foundation; either version 2 of the License, or
6707+ * (at your option) any later version.
dece6358
AM
6708+ *
6709+ * This program is distributed in the hope that it will be useful,
6710+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6711+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6712+ * GNU General Public License for more details.
6713+ *
6714+ * You should have received a copy of the GNU General Public License
523b37e3 6715+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6716+ */
6717+
6718+/*
6719+ * sub-routines for dentry cache
6720+ */
6721+
6722+#ifndef __AUFS_DCSUB_H__
6723+#define __AUFS_DCSUB_H__
6724+
6725+#ifdef __KERNEL__
6726+
7f207e10 6727+#include <linux/dcache.h>
027c5e7a 6728+#include <linux/fs.h>
dece6358 6729+
1facf9fc 6730+struct au_dpage {
6731+ int ndentry;
6732+ struct dentry **dentries;
6733+};
6734+
6735+struct au_dcsub_pages {
6736+ int ndpage;
6737+ struct au_dpage *dpages;
6738+};
6739+
6740+/* ---------------------------------------------------------------------- */
6741+
7f207e10 6742+/* dcsub.c */
1facf9fc 6743+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6744+void au_dpages_free(struct au_dcsub_pages *dpages);
6745+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6746+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6747+ au_dpages_test test, void *arg);
6748+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6749+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6750+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6751+ struct dentry *dentry, int do_include);
4a4d8108 6752+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6753+
7f207e10
AM
6754+/* ---------------------------------------------------------------------- */
6755+
523b37e3
AM
6756+/*
6757+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6758+ * include/linux/dcache.h. Try them (in the future).
6759+ */
6760+
027c5e7a
AM
6761+static inline int au_d_hashed_positive(struct dentry *d)
6762+{
6763+ int err;
5527c038 6764+ struct inode *inode = d_inode(d);
076b876e 6765+
027c5e7a 6766+ err = 0;
5527c038
JR
6767+ if (unlikely(d_unhashed(d)
6768+ || d_is_negative(d)
6769+ || !inode->i_nlink))
027c5e7a
AM
6770+ err = -ENOENT;
6771+ return err;
6772+}
6773+
38d290e6
JR
6774+static inline int au_d_linkable(struct dentry *d)
6775+{
6776+ int err;
5527c038 6777+ struct inode *inode = d_inode(d);
076b876e 6778+
38d290e6
JR
6779+ err = au_d_hashed_positive(d);
6780+ if (err
5527c038 6781+ && d_is_positive(d)
38d290e6
JR
6782+ && (inode->i_state & I_LINKABLE))
6783+ err = 0;
6784+ return err;
6785+}
6786+
027c5e7a
AM
6787+static inline int au_d_alive(struct dentry *d)
6788+{
6789+ int err;
6790+ struct inode *inode;
076b876e 6791+
027c5e7a
AM
6792+ err = 0;
6793+ if (!IS_ROOT(d))
6794+ err = au_d_hashed_positive(d);
6795+ else {
5527c038
JR
6796+ inode = d_inode(d);
6797+ if (unlikely(d_unlinked(d)
6798+ || d_is_negative(d)
6799+ || !inode->i_nlink))
027c5e7a
AM
6800+ err = -ENOENT;
6801+ }
6802+ return err;
6803+}
6804+
6805+static inline int au_alive_dir(struct dentry *d)
7f207e10 6806+{
027c5e7a 6807+ int err;
076b876e 6808+
027c5e7a 6809+ err = au_d_alive(d);
5527c038 6810+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
027c5e7a
AM
6811+ err = -ENOENT;
6812+ return err;
7f207e10
AM
6813+}
6814+
38d290e6
JR
6815+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6816+{
6817+ return a->len == b->len
6818+ && !memcmp(a->name, b->name, a->len);
6819+}
6820+
7e9cd9fe
AM
6821+/*
6822+ * by the commit
6823+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
6824+ * taking d_lock
6825+ * the type of d_lockref.count became int, but the inlined function d_count()
6826+ * still returns unsigned int.
6827+ * I don't know why. Maybe it is for every d_count() users?
6828+ * Anyway au_dcount() lives on.
6829+ */
c1595e42
JR
6830+static inline int au_dcount(struct dentry *d)
6831+{
6832+ return (int)d_count(d);
6833+}
6834+
1facf9fc 6835+#endif /* __KERNEL__ */
6836+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
6837diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
6838--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 6839+++ linux/fs/aufs/debug.c 2016-02-28 11:26:32.569971135 +0100
be52b249 6840@@ -0,0 +1,438 @@
1facf9fc 6841+/*
8cdd5066 6842+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6843+ *
6844+ * This program, aufs is free software; you can redistribute it and/or modify
6845+ * it under the terms of the GNU General Public License as published by
6846+ * the Free Software Foundation; either version 2 of the License, or
6847+ * (at your option) any later version.
dece6358
AM
6848+ *
6849+ * This program is distributed in the hope that it will be useful,
6850+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6851+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6852+ * GNU General Public License for more details.
6853+ *
6854+ * You should have received a copy of the GNU General Public License
523b37e3 6855+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6856+ */
6857+
6858+/*
6859+ * debug print functions
6860+ */
6861+
6862+#include "aufs.h"
6863+
392086de
AM
6864+/* Returns 0, or -errno. arg is in kp->arg. */
6865+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
6866+{
6867+ int err, n;
6868+
6869+ err = kstrtoint(val, 0, &n);
6870+ if (!err) {
6871+ if (n > 0)
6872+ au_debug_on();
6873+ else
6874+ au_debug_off();
6875+ }
6876+ return err;
6877+}
6878+
6879+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
6880+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
6881+{
6882+ atomic_t *a;
6883+
6884+ a = kp->arg;
6885+ return sprintf(buffer, "%d", atomic_read(a));
6886+}
6887+
6888+static struct kernel_param_ops param_ops_atomic_t = {
6889+ .set = param_atomic_t_set,
6890+ .get = param_atomic_t_get
6891+ /* void (*free)(void *arg) */
6892+};
6893+
6894+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 6895+MODULE_PARM_DESC(debug, "debug print");
392086de 6896+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 6897+
c1595e42 6898+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 6899+char *au_plevel = KERN_DEBUG;
e49829fe
JR
6900+#define dpri(fmt, ...) do { \
6901+ if ((au_plevel \
6902+ && strcmp(au_plevel, KERN_DEBUG)) \
6903+ || au_debug_test()) \
6904+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 6905+} while (0)
6906+
6907+/* ---------------------------------------------------------------------- */
6908+
6909+void au_dpri_whlist(struct au_nhash *whlist)
6910+{
6911+ unsigned long ul, n;
6912+ struct hlist_head *head;
c06a8ce3 6913+ struct au_vdir_wh *pos;
1facf9fc 6914+
6915+ n = whlist->nh_num;
6916+ head = whlist->nh_head;
6917+ for (ul = 0; ul < n; ul++) {
c06a8ce3 6918+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 6919+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
6920+ pos->wh_bindex,
6921+ pos->wh_str.len, pos->wh_str.name,
6922+ pos->wh_str.len);
1facf9fc 6923+ head++;
6924+ }
6925+}
6926+
6927+void au_dpri_vdir(struct au_vdir *vdir)
6928+{
6929+ unsigned long ul;
6930+ union au_vdir_deblk_p p;
6931+ unsigned char *o;
6932+
6933+ if (!vdir || IS_ERR(vdir)) {
6934+ dpri("err %ld\n", PTR_ERR(vdir));
6935+ return;
6936+ }
6937+
6938+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
6939+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
6940+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
6941+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
6942+ p.deblk = vdir->vd_deblk[ul];
6943+ o = p.deblk;
6944+ dpri("[%lu]: %p\n", ul, o);
6945+ }
6946+}
6947+
53392da6 6948+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 6949+ struct dentry *wh)
6950+{
6951+ char *n = NULL;
6952+ int l = 0;
6953+
6954+ if (!inode || IS_ERR(inode)) {
6955+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
6956+ return -1;
6957+ }
6958+
c2b27bf2 6959+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 6960+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
6961+ && sizeof(inode->i_blocks) != sizeof(u64));
6962+ if (wh) {
6963+ n = (void *)wh->d_name.name;
6964+ l = wh->d_name.len;
6965+ }
6966+
53392da6
AM
6967+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
6968+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
6969+ bindex, inode,
1facf9fc 6970+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
6971+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
6972+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 6973+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 6974+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
6975+ inode->i_state, inode->i_flags, inode->i_version,
6976+ inode->i_generation,
1facf9fc 6977+ l ? ", wh " : "", l, n);
6978+ return 0;
6979+}
6980+
6981+void au_dpri_inode(struct inode *inode)
6982+{
6983+ struct au_iinfo *iinfo;
6984+ aufs_bindex_t bindex;
53392da6 6985+ int err, hn;
1facf9fc 6986+
53392da6 6987+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 6988+ if (err || !au_test_aufs(inode->i_sb))
6989+ return;
6990+
6991+ iinfo = au_ii(inode);
6992+ if (!iinfo)
6993+ return;
6994+ dpri("i-1: bstart %d, bend %d, gen %d\n",
537831f9 6995+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL));
1facf9fc 6996+ if (iinfo->ii_bstart < 0)
6997+ return;
53392da6
AM
6998+ hn = 0;
6999+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
7000+ hn = !!au_hn(iinfo->ii_hinode + bindex);
7001+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 7002+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 7003+ }
1facf9fc 7004+}
7005+
2cbb1c4b
JR
7006+void au_dpri_dalias(struct inode *inode)
7007+{
7008+ struct dentry *d;
7009+
7010+ spin_lock(&inode->i_lock);
c1595e42 7011+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
7012+ au_dpri_dentry(d);
7013+ spin_unlock(&inode->i_lock);
7014+}
7015+
1facf9fc 7016+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
7017+{
7018+ struct dentry *wh = NULL;
53392da6 7019+ int hn;
076b876e 7020+ struct au_iinfo *iinfo;
1facf9fc 7021+
7022+ if (!dentry || IS_ERR(dentry)) {
7023+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
7024+ return -1;
7025+ }
7026+ /* do not call dget_parent() here */
027c5e7a 7027+ /* note: access d_xxx without d_lock */
523b37e3
AM
7028+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
7029+ bindex, dentry, dentry,
1facf9fc 7030+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 7031+ au_dcount(dentry), dentry->d_flags,
523b37e3 7032+ d_unhashed(dentry) ? "un" : "");
53392da6 7033+ hn = -1;
5527c038
JR
7034+ if (bindex >= 0
7035+ && d_is_positive(dentry)
7036+ && au_test_aufs(dentry->d_sb)) {
7037+ iinfo = au_ii(d_inode(dentry));
53392da6
AM
7038+ if (iinfo) {
7039+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 7040+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 7041+ }
1facf9fc 7042+ }
5527c038 7043+ do_pri_inode(bindex, d_inode(dentry), hn, wh);
1facf9fc 7044+ return 0;
7045+}
7046+
7047+void au_dpri_dentry(struct dentry *dentry)
7048+{
7049+ struct au_dinfo *dinfo;
7050+ aufs_bindex_t bindex;
7051+ int err;
4a4d8108 7052+ struct au_hdentry *hdp;
1facf9fc 7053+
7054+ err = do_pri_dentry(-1, dentry);
7055+ if (err || !au_test_aufs(dentry->d_sb))
7056+ return;
7057+
7058+ dinfo = au_di(dentry);
7059+ if (!dinfo)
7060+ return;
38d290e6 7061+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
1facf9fc 7062+ dinfo->di_bstart, dinfo->di_bend,
38d290e6
JR
7063+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
7064+ dinfo->di_tmpfile);
1facf9fc 7065+ if (dinfo->di_bstart < 0)
7066+ return;
4a4d8108 7067+ hdp = dinfo->di_hdentry;
1facf9fc 7068+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 7069+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 7070+}
7071+
7072+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
7073+{
7074+ char a[32];
7075+
7076+ if (!file || IS_ERR(file)) {
7077+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7078+ return -1;
7079+ }
7080+ a[0] = 0;
7081+ if (bindex < 0
b912730e 7082+ && !IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7083+ && au_test_aufs(file->f_path.dentry->d_sb)
1facf9fc 7084+ && au_fi(file))
e49829fe 7085+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 7086+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 7087+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 7088+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 7089+ file->f_version, file->f_pos, a);
b912730e 7090+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
2000de60 7091+ do_pri_dentry(bindex, file->f_path.dentry);
1facf9fc 7092+ return 0;
7093+}
7094+
7095+void au_dpri_file(struct file *file)
7096+{
7097+ struct au_finfo *finfo;
4a4d8108
AM
7098+ struct au_fidir *fidir;
7099+ struct au_hfile *hfile;
1facf9fc 7100+ aufs_bindex_t bindex;
7101+ int err;
7102+
7103+ err = do_pri_file(-1, file);
2000de60 7104+ if (err
b912730e 7105+ || IS_ERR_OR_NULL(file->f_path.dentry)
2000de60 7106+ || !au_test_aufs(file->f_path.dentry->d_sb))
1facf9fc 7107+ return;
7108+
7109+ finfo = au_fi(file);
7110+ if (!finfo)
7111+ return;
4a4d8108 7112+ if (finfo->fi_btop < 0)
1facf9fc 7113+ return;
4a4d8108
AM
7114+ fidir = finfo->fi_hdir;
7115+ if (!fidir)
7116+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7117+ else
e49829fe
JR
7118+ for (bindex = finfo->fi_btop;
7119+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
7120+ bindex++) {
7121+ hfile = fidir->fd_hfile + bindex;
7122+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7123+ }
1facf9fc 7124+}
7125+
7126+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7127+{
7128+ struct vfsmount *mnt;
7129+ struct super_block *sb;
7130+
7131+ if (!br || IS_ERR(br))
7132+ goto out;
86dc4139 7133+ mnt = au_br_mnt(br);
1facf9fc 7134+ if (!mnt || IS_ERR(mnt))
7135+ goto out;
7136+ sb = mnt->mnt_sb;
7137+ if (!sb || IS_ERR(sb))
7138+ goto out;
7139+
1e00d052 7140+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
b752ccd1 7141+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 7142+ "xino %d\n",
1e00d052
AM
7143+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
7144+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 7145+ sb->s_flags, sb->s_count,
1facf9fc 7146+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7147+ return 0;
7148+
4f0767ce 7149+out:
1facf9fc 7150+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7151+ return -1;
7152+}
7153+
7154+void au_dpri_sb(struct super_block *sb)
7155+{
7156+ struct au_sbinfo *sbinfo;
7157+ aufs_bindex_t bindex;
7158+ int err;
7159+ /* to reuduce stack size */
7160+ struct {
7161+ struct vfsmount mnt;
7162+ struct au_branch fake;
7163+ } *a;
7164+
7165+ /* this function can be called from magic sysrq */
7166+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7167+ if (unlikely(!a)) {
7168+ dpri("no memory\n");
7169+ return;
7170+ }
7171+
7172+ a->mnt.mnt_sb = sb;
86dc4139 7173+ a->fake.br_path.mnt = &a->mnt;
1facf9fc 7174+ atomic_set(&a->fake.br_count, 0);
7175+ smp_mb(); /* atomic_set */
7176+ err = do_pri_br(-1, &a->fake);
7177+ kfree(a);
7178+ dpri("dev 0x%x\n", sb->s_dev);
7179+ if (err || !au_test_aufs(sb))
7180+ return;
7181+
7182+ sbinfo = au_sbi(sb);
7183+ if (!sbinfo)
7184+ return;
7185+ dpri("nw %d, gen %u, kobj %d\n",
7186+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
7187+ atomic_read(&sbinfo->si_kobj.kref.refcount));
7188+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
7189+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7190+}
7191+
7192+/* ---------------------------------------------------------------------- */
7193+
027c5e7a
AM
7194+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7195+{
5527c038 7196+ struct inode *h_inode, *inode = d_inode(dentry);
027c5e7a
AM
7197+ struct dentry *h_dentry;
7198+ aufs_bindex_t bindex, bend, bi;
7199+
7200+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7201+ return;
7202+
7203+ bend = au_dbend(dentry);
7204+ bi = au_ibend(inode);
7205+ if (bi < bend)
7206+ bend = bi;
7207+ bindex = au_dbstart(dentry);
7208+ bi = au_ibstart(inode);
7209+ if (bi > bindex)
7210+ bindex = bi;
7211+
7212+ for (; bindex <= bend; bindex++) {
7213+ h_dentry = au_h_dptr(dentry, bindex);
7214+ if (!h_dentry)
7215+ continue;
7216+ h_inode = au_h_iptr(inode, bindex);
5527c038 7217+ if (unlikely(h_inode != d_inode(h_dentry))) {
392086de 7218+ au_debug_on();
027c5e7a
AM
7219+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7220+ AuDbgDentry(dentry);
7221+ AuDbgInode(inode);
392086de 7222+ au_debug_off();
027c5e7a
AM
7223+ BUG();
7224+ }
7225+ }
7226+}
7227+
1facf9fc 7228+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7229+{
7230+ int err, i, j;
7231+ struct au_dcsub_pages dpages;
7232+ struct au_dpage *dpage;
7233+ struct dentry **dentries;
7234+
7235+ err = au_dpages_init(&dpages, GFP_NOFS);
7236+ AuDebugOn(err);
027c5e7a 7237+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7238+ AuDebugOn(err);
7239+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7240+ dpage = dpages.dpages + i;
7241+ dentries = dpage->dentries;
7242+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7243+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7244+ }
7245+ au_dpages_free(&dpages);
7246+}
7247+
1facf9fc 7248+void au_dbg_verify_kthread(void)
7249+{
53392da6 7250+ if (au_wkq_test()) {
1facf9fc 7251+ au_dbg_blocked();
1e00d052
AM
7252+ /*
7253+ * It may be recursive, but udba=notify between two aufs mounts,
7254+ * where a single ro branch is shared, is not a problem.
7255+ */
7256+ /* WARN_ON(1); */
1facf9fc 7257+ }
7258+}
7259+
7260+/* ---------------------------------------------------------------------- */
7261+
1facf9fc 7262+int __init au_debug_init(void)
7263+{
7264+ aufs_bindex_t bindex;
7265+ struct au_vdir_destr destr;
7266+
7267+ bindex = -1;
7268+ AuDebugOn(bindex >= 0);
7269+
7270+ destr.len = -1;
7271+ AuDebugOn(destr.len < NAME_MAX);
7272+
7273+#ifdef CONFIG_4KSTACKS
0c3ec466 7274+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7275+#endif
7276+
1facf9fc 7277+ return 0;
7278+}
7f207e10
AM
7279diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7280--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 7281+++ linux/fs/aufs/debug.h 2016-02-28 11:26:32.569971135 +0100
5527c038 7282@@ -0,0 +1,225 @@
1facf9fc 7283+/*
8cdd5066 7284+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7285+ *
7286+ * This program, aufs is free software; you can redistribute it and/or modify
7287+ * it under the terms of the GNU General Public License as published by
7288+ * the Free Software Foundation; either version 2 of the License, or
7289+ * (at your option) any later version.
dece6358
AM
7290+ *
7291+ * This program is distributed in the hope that it will be useful,
7292+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7293+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7294+ * GNU General Public License for more details.
7295+ *
7296+ * You should have received a copy of the GNU General Public License
523b37e3 7297+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7298+ */
7299+
7300+/*
7301+ * debug print functions
7302+ */
7303+
7304+#ifndef __AUFS_DEBUG_H__
7305+#define __AUFS_DEBUG_H__
7306+
7307+#ifdef __KERNEL__
7308+
392086de 7309+#include <linux/atomic.h>
4a4d8108
AM
7310+#include <linux/module.h>
7311+#include <linux/kallsyms.h>
1facf9fc 7312+#include <linux/sysrq.h>
4a4d8108 7313+
1facf9fc 7314+#ifdef CONFIG_AUFS_DEBUG
7315+#define AuDebugOn(a) BUG_ON(a)
7316+
7317+/* module parameter */
392086de
AM
7318+extern atomic_t aufs_debug;
7319+static inline void au_debug_on(void)
1facf9fc 7320+{
392086de
AM
7321+ atomic_inc(&aufs_debug);
7322+}
7323+static inline void au_debug_off(void)
7324+{
7325+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7326+}
7327+
7328+static inline int au_debug_test(void)
7329+{
392086de 7330+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7331+}
7332+#else
7333+#define AuDebugOn(a) do {} while (0)
392086de
AM
7334+AuStubVoid(au_debug_on, void)
7335+AuStubVoid(au_debug_off, void)
4a4d8108 7336+AuStubInt0(au_debug_test, void)
1facf9fc 7337+#endif /* CONFIG_AUFS_DEBUG */
7338+
392086de
AM
7339+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7340+
1facf9fc 7341+/* ---------------------------------------------------------------------- */
7342+
7343+/* debug print */
7344+
4a4d8108 7345+#define AuDbg(fmt, ...) do { \
1facf9fc 7346+ if (au_debug_test()) \
4a4d8108 7347+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7348+} while (0)
4a4d8108
AM
7349+#define AuLabel(l) AuDbg(#l "\n")
7350+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7351+#define AuWarn1(fmt, ...) do { \
1facf9fc 7352+ static unsigned char _c; \
7353+ if (!_c++) \
0c3ec466 7354+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7355+} while (0)
7356+
4a4d8108 7357+#define AuErr1(fmt, ...) do { \
1facf9fc 7358+ static unsigned char _c; \
7359+ if (!_c++) \
4a4d8108 7360+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7361+} while (0)
7362+
4a4d8108 7363+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7364+ static unsigned char _c; \
7365+ if (!_c++) \
4a4d8108 7366+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7367+} while (0)
7368+
7369+#define AuUnsupportMsg "This operation is not supported." \
7370+ " Please report this application to aufs-users ML."
4a4d8108
AM
7371+#define AuUnsupport(fmt, ...) do { \
7372+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7373+ dump_stack(); \
7374+} while (0)
7375+
7376+#define AuTraceErr(e) do { \
7377+ if (unlikely((e) < 0)) \
7378+ AuDbg("err %d\n", (int)(e)); \
7379+} while (0)
7380+
7381+#define AuTraceErrPtr(p) do { \
7382+ if (IS_ERR(p)) \
7383+ AuDbg("err %ld\n", PTR_ERR(p)); \
7384+} while (0)
7385+
7386+/* dirty macros for debug print, use with "%.*s" and caution */
7387+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7388+
7389+/* ---------------------------------------------------------------------- */
7390+
dece6358 7391+struct dentry;
1facf9fc 7392+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7393+extern struct mutex au_dbg_mtx;
1facf9fc 7394+extern char *au_plevel;
7395+struct au_nhash;
7396+void au_dpri_whlist(struct au_nhash *whlist);
7397+struct au_vdir;
7398+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7399+struct inode;
1facf9fc 7400+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7401+void au_dpri_dalias(struct inode *inode);
1facf9fc 7402+void au_dpri_dentry(struct dentry *dentry);
dece6358 7403+struct file;
1facf9fc 7404+void au_dpri_file(struct file *filp);
dece6358 7405+struct super_block;
1facf9fc 7406+void au_dpri_sb(struct super_block *sb);
7407+
027c5e7a
AM
7408+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7409+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7410+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7411+void au_dbg_verify_kthread(void);
7412+
7413+int __init au_debug_init(void);
7e9cd9fe 7414+
1facf9fc 7415+#define AuDbgWhlist(w) do { \
c1595e42 7416+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7417+ AuDbg(#w "\n"); \
7418+ au_dpri_whlist(w); \
c1595e42 7419+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7420+} while (0)
7421+
7422+#define AuDbgVdir(v) do { \
c1595e42 7423+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7424+ AuDbg(#v "\n"); \
7425+ au_dpri_vdir(v); \
c1595e42 7426+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7427+} while (0)
7428+
7429+#define AuDbgInode(i) do { \
c1595e42 7430+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7431+ AuDbg(#i "\n"); \
7432+ au_dpri_inode(i); \
c1595e42 7433+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7434+} while (0)
7435+
2cbb1c4b 7436+#define AuDbgDAlias(i) do { \
c1595e42 7437+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7438+ AuDbg(#i "\n"); \
7439+ au_dpri_dalias(i); \
c1595e42 7440+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7441+} while (0)
7442+
1facf9fc 7443+#define AuDbgDentry(d) do { \
c1595e42 7444+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7445+ AuDbg(#d "\n"); \
7446+ au_dpri_dentry(d); \
c1595e42 7447+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7448+} while (0)
7449+
7450+#define AuDbgFile(f) do { \
c1595e42 7451+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7452+ AuDbg(#f "\n"); \
7453+ au_dpri_file(f); \
c1595e42 7454+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7455+} while (0)
7456+
7457+#define AuDbgSb(sb) do { \
c1595e42 7458+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7459+ AuDbg(#sb "\n"); \
7460+ au_dpri_sb(sb); \
c1595e42 7461+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7462+} while (0)
7463+
4a4d8108
AM
7464+#define AuDbgSym(addr) do { \
7465+ char sym[KSYM_SYMBOL_LEN]; \
7466+ sprint_symbol(sym, (unsigned long)addr); \
7467+ AuDbg("%s\n", sym); \
7468+} while (0)
1facf9fc 7469+#else
027c5e7a 7470+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7471+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7472+AuStubVoid(au_dbg_verify_kthread, void)
7473+AuStubInt0(__init au_debug_init, void)
1facf9fc 7474+
1facf9fc 7475+#define AuDbgWhlist(w) do {} while (0)
7476+#define AuDbgVdir(v) do {} while (0)
7477+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7478+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7479+#define AuDbgDentry(d) do {} while (0)
7480+#define AuDbgFile(f) do {} while (0)
7481+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7482+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7483+#endif /* CONFIG_AUFS_DEBUG */
7484+
7485+/* ---------------------------------------------------------------------- */
7486+
7487+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7488+int __init au_sysrq_init(void);
7489+void au_sysrq_fin(void);
7490+
7491+#ifdef CONFIG_HW_CONSOLE
7492+#define au_dbg_blocked() do { \
7493+ WARN_ON(1); \
0c5527e5 7494+ handle_sysrq('w'); \
1facf9fc 7495+} while (0)
7496+#else
4a4d8108 7497+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7498+#endif
7499+
7500+#else
4a4d8108
AM
7501+AuStubInt0(__init au_sysrq_init, void)
7502+AuStubVoid(au_sysrq_fin, void)
7503+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7504+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7505+
7506+#endif /* __KERNEL__ */
7507+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
7508diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7509--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 7510+++ linux/fs/aufs/dentry.c 2016-02-28 11:26:32.569971135 +0100
79b8bda9 7511@@ -0,0 +1,1136 @@
1facf9fc 7512+/*
8cdd5066 7513+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7514+ *
7515+ * This program, aufs is free software; you can redistribute it and/or modify
7516+ * it under the terms of the GNU General Public License as published by
7517+ * the Free Software Foundation; either version 2 of the License, or
7518+ * (at your option) any later version.
dece6358
AM
7519+ *
7520+ * This program is distributed in the hope that it will be useful,
7521+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7522+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7523+ * GNU General Public License for more details.
7524+ *
7525+ * You should have received a copy of the GNU General Public License
523b37e3 7526+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7527+ */
7528+
7529+/*
7530+ * lookup and dentry operations
7531+ */
7532+
dece6358 7533+#include <linux/namei.h>
1facf9fc 7534+#include "aufs.h"
7535+
1facf9fc 7536+#define AuLkup_ALLOW_NEG 1
076b876e 7537+#define AuLkup_IGNORE_PERM (1 << 1)
1facf9fc 7538+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
7539+#define au_fset_lkup(flags, name) \
7540+ do { (flags) |= AuLkup_##name; } while (0)
7541+#define au_fclr_lkup(flags, name) \
7542+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 7543+
7544+struct au_do_lookup_args {
7545+ unsigned int flags;
7546+ mode_t type;
1facf9fc 7547+};
7548+
7549+/*
7550+ * returns positive/negative dentry, NULL or an error.
7551+ * NULL means whiteout-ed or not-found.
7552+ */
7553+static struct dentry*
7554+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7555+ aufs_bindex_t bindex, struct qstr *wh_name,
7556+ struct au_do_lookup_args *args)
7557+{
7558+ struct dentry *h_dentry;
2000de60 7559+ struct inode *h_inode;
1facf9fc 7560+ struct au_branch *br;
7561+ int wh_found, opq;
7562+ unsigned char wh_able;
7563+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7564+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7565+ IGNORE_PERM);
1facf9fc 7566+
1facf9fc 7567+ wh_found = 0;
7568+ br = au_sbr(dentry->d_sb, bindex);
7569+ wh_able = !!au_br_whable(br->br_perm);
7570+ if (wh_able)
076b876e 7571+ wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0);
1facf9fc 7572+ h_dentry = ERR_PTR(wh_found);
7573+ if (!wh_found)
7574+ goto real_lookup;
7575+ if (unlikely(wh_found < 0))
7576+ goto out;
7577+
7578+ /* We found a whiteout */
7579+ /* au_set_dbend(dentry, bindex); */
7580+ au_set_dbwh(dentry, bindex);
7581+ if (!allow_neg)
7582+ return NULL; /* success */
7583+
4f0767ce 7584+real_lookup:
076b876e
AM
7585+ if (!ignore_perm)
7586+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7587+ else
7588+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
2000de60
JR
7589+ if (IS_ERR(h_dentry)) {
7590+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7591+ && !allow_neg)
7592+ h_dentry = NULL;
1facf9fc 7593+ goto out;
2000de60 7594+ }
1facf9fc 7595+
5527c038
JR
7596+ h_inode = d_inode(h_dentry);
7597+ if (d_is_negative(h_dentry)) {
1facf9fc 7598+ if (!allow_neg)
7599+ goto out_neg;
7600+ } else if (wh_found
7601+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7602+ goto out_neg;
7603+
7604+ if (au_dbend(dentry) <= bindex)
7605+ au_set_dbend(dentry, bindex);
7606+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7607+ au_set_dbstart(dentry, bindex);
7608+ au_set_h_dptr(dentry, bindex, h_dentry);
7609+
2000de60
JR
7610+ if (!d_is_dir(h_dentry)
7611+ || !wh_able
5527c038 7612+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7613+ goto out; /* success */
7614+
7615+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
076b876e 7616+ opq = au_diropq_test(h_dentry);
1facf9fc 7617+ mutex_unlock(&h_inode->i_mutex);
7618+ if (opq > 0)
7619+ au_set_dbdiropq(dentry, bindex);
7620+ else if (unlikely(opq < 0)) {
7621+ au_set_h_dptr(dentry, bindex, NULL);
7622+ h_dentry = ERR_PTR(opq);
7623+ }
7624+ goto out;
7625+
4f0767ce 7626+out_neg:
1facf9fc 7627+ dput(h_dentry);
7628+ h_dentry = NULL;
4f0767ce 7629+out:
1facf9fc 7630+ return h_dentry;
7631+}
7632+
dece6358
AM
7633+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7634+{
7635+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7636+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7637+ return -EPERM;
7638+ return 0;
7639+}
7640+
1facf9fc 7641+/*
7642+ * returns the number of lower positive dentries,
7643+ * otherwise an error.
7644+ * can be called at unlinking with @type is zero.
7645+ */
537831f9 7646+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
1facf9fc 7647+{
7648+ int npositive, err;
7649+ aufs_bindex_t bindex, btail, bdiropq;
076b876e 7650+ unsigned char isdir, dirperm1;
1facf9fc 7651+ struct qstr whname;
7652+ struct au_do_lookup_args args = {
b4510431 7653+ .flags = 0,
537831f9 7654+ .type = type
1facf9fc 7655+ };
7656+ const struct qstr *name = &dentry->d_name;
7657+ struct dentry *parent;
076b876e 7658+ struct super_block *sb;
1facf9fc 7659+
076b876e
AM
7660+ sb = dentry->d_sb;
7661+ err = au_test_shwh(sb, name);
dece6358 7662+ if (unlikely(err))
1facf9fc 7663+ goto out;
7664+
7665+ err = au_wh_name_alloc(&whname, name);
7666+ if (unlikely(err))
7667+ goto out;
7668+
2000de60 7669+ isdir = !!d_is_dir(dentry);
1facf9fc 7670+ if (!type)
7671+ au_fset_lkup(args.flags, ALLOW_NEG);
076b876e 7672+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
1facf9fc 7673+
7674+ npositive = 0;
4a4d8108 7675+ parent = dget_parent(dentry);
1facf9fc 7676+ btail = au_dbtaildir(parent);
7677+ for (bindex = bstart; bindex <= btail; bindex++) {
7678+ struct dentry *h_parent, *h_dentry;
7679+ struct inode *h_inode, *h_dir;
7680+
7681+ h_dentry = au_h_dptr(dentry, bindex);
7682+ if (h_dentry) {
5527c038 7683+ if (d_is_positive(h_dentry))
1facf9fc 7684+ npositive++;
7685+ if (type != S_IFDIR)
7686+ break;
7687+ continue;
7688+ }
7689+ h_parent = au_h_dptr(parent, bindex);
2000de60 7690+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7691+ continue;
7692+
5527c038 7693+ h_dir = d_inode(h_parent);
1facf9fc 7694+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7695+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7696+ &args);
7697+ mutex_unlock(&h_dir->i_mutex);
7698+ err = PTR_ERR(h_dentry);
7699+ if (IS_ERR(h_dentry))
4a4d8108 7700+ goto out_parent;
2000de60
JR
7701+ if (h_dentry)
7702+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7703+ if (dirperm1)
7704+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7705+
79b8bda9 7706+ if (au_dbwh(dentry) == bindex)
1facf9fc 7707+ break;
7708+ if (!h_dentry)
7709+ continue;
5527c038 7710+ if (d_is_negative(h_dentry))
1facf9fc 7711+ continue;
5527c038 7712+ h_inode = d_inode(h_dentry);
1facf9fc 7713+ npositive++;
7714+ if (!args.type)
7715+ args.type = h_inode->i_mode & S_IFMT;
7716+ if (args.type != S_IFDIR)
7717+ break;
7718+ else if (isdir) {
7719+ /* the type of lower may be different */
7720+ bdiropq = au_dbdiropq(dentry);
7721+ if (bdiropq >= 0 && bdiropq <= bindex)
7722+ break;
7723+ }
7724+ }
7725+
7726+ if (npositive) {
7727+ AuLabel(positive);
7728+ au_update_dbstart(dentry);
7729+ }
7730+ err = npositive;
076b876e 7731+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
027c5e7a 7732+ && au_dbstart(dentry) < 0)) {
1facf9fc 7733+ err = -EIO;
523b37e3
AM
7734+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7735+ dentry, err);
027c5e7a 7736+ }
1facf9fc 7737+
4f0767ce 7738+out_parent:
4a4d8108 7739+ dput(parent);
1facf9fc 7740+ kfree(whname.name);
4f0767ce 7741+out:
1facf9fc 7742+ return err;
7743+}
7744+
076b876e 7745+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7746+{
7747+ struct dentry *dentry;
7748+ int wkq_err;
7749+
5527c038 7750+ if (!au_test_h_perm_sio(d_inode(parent), MAY_EXEC))
b4510431 7751+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7752+ else {
b4510431
AM
7753+ struct vfsub_lkup_one_args args = {
7754+ .errp = &dentry,
7755+ .name = name,
7756+ .parent = parent
1facf9fc 7757+ };
7758+
b4510431 7759+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7760+ if (unlikely(wkq_err))
7761+ dentry = ERR_PTR(wkq_err);
7762+ }
7763+
7764+ return dentry;
7765+}
7766+
7767+/*
7768+ * lookup @dentry on @bindex which should be negative.
7769+ */
86dc4139 7770+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7771+{
7772+ int err;
7773+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7774+ struct au_branch *br;
1facf9fc 7775+
1facf9fc 7776+ parent = dget_parent(dentry);
7777+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7778+ br = au_sbr(dentry->d_sb, bindex);
7779+ if (wh)
7780+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7781+ else
076b876e 7782+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7783+ err = PTR_ERR(h_dentry);
7784+ if (IS_ERR(h_dentry))
7785+ goto out;
5527c038 7786+ if (unlikely(d_is_positive(h_dentry))) {
1facf9fc 7787+ err = -EIO;
523b37e3 7788+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7789+ dput(h_dentry);
7790+ goto out;
7791+ }
7792+
4a4d8108 7793+ err = 0;
1facf9fc 7794+ if (bindex < au_dbstart(dentry))
7795+ au_set_dbstart(dentry, bindex);
7796+ if (au_dbend(dentry) < bindex)
7797+ au_set_dbend(dentry, bindex);
7798+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7799+
4f0767ce 7800+out:
1facf9fc 7801+ dput(parent);
7802+ return err;
7803+}
7804+
7805+/* ---------------------------------------------------------------------- */
7806+
7807+/* subset of struct inode */
7808+struct au_iattr {
7809+ unsigned long i_ino;
7810+ /* unsigned int i_nlink; */
0c3ec466
AM
7811+ kuid_t i_uid;
7812+ kgid_t i_gid;
1facf9fc 7813+ u64 i_version;
7814+/*
7815+ loff_t i_size;
7816+ blkcnt_t i_blocks;
7817+*/
7818+ umode_t i_mode;
7819+};
7820+
7821+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7822+{
7823+ ia->i_ino = h_inode->i_ino;
7824+ /* ia->i_nlink = h_inode->i_nlink; */
7825+ ia->i_uid = h_inode->i_uid;
7826+ ia->i_gid = h_inode->i_gid;
7827+ ia->i_version = h_inode->i_version;
7828+/*
7829+ ia->i_size = h_inode->i_size;
7830+ ia->i_blocks = h_inode->i_blocks;
7831+*/
7832+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7833+}
7834+
7835+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7836+{
7837+ return ia->i_ino != h_inode->i_ino
7838+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7839+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7840+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7841+ || ia->i_version != h_inode->i_version
7842+/*
7843+ || ia->i_size != h_inode->i_size
7844+ || ia->i_blocks != h_inode->i_blocks
7845+*/
7846+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
7847+}
7848+
7849+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
7850+ struct au_branch *br)
7851+{
7852+ int err;
7853+ struct au_iattr ia;
7854+ struct inode *h_inode;
7855+ struct dentry *h_d;
7856+ struct super_block *h_sb;
7857+
7858+ err = 0;
7859+ memset(&ia, -1, sizeof(ia));
7860+ h_sb = h_dentry->d_sb;
5527c038
JR
7861+ h_inode = NULL;
7862+ if (d_is_positive(h_dentry)) {
7863+ h_inode = d_inode(h_dentry);
1facf9fc 7864+ au_iattr_save(&ia, h_inode);
5527c038 7865+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
1facf9fc 7866+ /* nfs d_revalidate may return 0 for negative dentry */
7867+ /* fuse d_revalidate always return 0 for negative dentry */
7868+ goto out;
7869+
7870+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 7871+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 7872+ err = PTR_ERR(h_d);
7873+ if (IS_ERR(h_d))
7874+ goto out;
7875+
7876+ err = 0;
7877+ if (unlikely(h_d != h_dentry
5527c038 7878+ || d_inode(h_d) != h_inode
1facf9fc 7879+ || (h_inode && au_iattr_test(&ia, h_inode))))
7880+ err = au_busy_or_stale();
7881+ dput(h_d);
7882+
4f0767ce 7883+out:
1facf9fc 7884+ AuTraceErr(err);
7885+ return err;
7886+}
7887+
7888+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7889+ struct dentry *h_parent, struct au_branch *br)
7890+{
7891+ int err;
7892+
7893+ err = 0;
027c5e7a
AM
7894+ if (udba == AuOpt_UDBA_REVAL
7895+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 7896+ IMustLock(h_dir);
5527c038 7897+ err = (d_inode(h_dentry->d_parent) != h_dir);
027c5e7a 7898+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 7899+ err = au_h_verify_dentry(h_dentry, h_parent, br);
7900+
7901+ return err;
7902+}
7903+
7904+/* ---------------------------------------------------------------------- */
7905+
027c5e7a 7906+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 7907+{
027c5e7a 7908+ int err;
1facf9fc 7909+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
7910+ struct au_hdentry tmp, *p, *q;
7911+ struct au_dinfo *dinfo;
7912+ struct super_block *sb;
1facf9fc 7913+
027c5e7a 7914+ DiMustWriteLock(dentry);
1308ab2a 7915+
027c5e7a
AM
7916+ sb = dentry->d_sb;
7917+ dinfo = au_di(dentry);
1facf9fc 7918+ bend = dinfo->di_bend;
7919+ bwh = dinfo->di_bwh;
7920+ bdiropq = dinfo->di_bdiropq;
027c5e7a 7921+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 7922+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 7923+ if (!p->hd_dentry)
1facf9fc 7924+ continue;
7925+
027c5e7a
AM
7926+ new_bindex = au_br_index(sb, p->hd_id);
7927+ if (new_bindex == bindex)
1facf9fc 7928+ continue;
1facf9fc 7929+
1facf9fc 7930+ if (dinfo->di_bwh == bindex)
7931+ bwh = new_bindex;
7932+ if (dinfo->di_bdiropq == bindex)
7933+ bdiropq = new_bindex;
7934+ if (new_bindex < 0) {
7935+ au_hdput(p);
7936+ p->hd_dentry = NULL;
7937+ continue;
7938+ }
7939+
7940+ /* swap two lower dentries, and loop again */
7941+ q = dinfo->di_hdentry + new_bindex;
7942+ tmp = *q;
7943+ *q = *p;
7944+ *p = tmp;
7945+ if (tmp.hd_dentry) {
7946+ bindex--;
7947+ p--;
7948+ }
7949+ }
7950+
1facf9fc 7951+ dinfo->di_bwh = -1;
7952+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
7953+ dinfo->di_bwh = bwh;
7954+
7955+ dinfo->di_bdiropq = -1;
7956+ if (bdiropq >= 0
7957+ && bdiropq <= au_sbend(sb)
7958+ && au_sbr_whable(sb, bdiropq))
7959+ dinfo->di_bdiropq = bdiropq;
7960+
027c5e7a
AM
7961+ err = -EIO;
7962+ dinfo->di_bstart = -1;
7963+ dinfo->di_bend = -1;
1facf9fc 7964+ bend = au_dbend(parent);
7965+ p = dinfo->di_hdentry;
7966+ for (bindex = 0; bindex <= bend; bindex++, p++)
7967+ if (p->hd_dentry) {
7968+ dinfo->di_bstart = bindex;
7969+ break;
7970+ }
7971+
027c5e7a
AM
7972+ if (dinfo->di_bstart >= 0) {
7973+ p = dinfo->di_hdentry + bend;
7974+ for (bindex = bend; bindex >= 0; bindex--, p--)
7975+ if (p->hd_dentry) {
7976+ dinfo->di_bend = bindex;
7977+ err = 0;
7978+ break;
7979+ }
7980+ }
7981+
7982+ return err;
1facf9fc 7983+}
7984+
027c5e7a 7985+static void au_do_hide(struct dentry *dentry)
1facf9fc 7986+{
027c5e7a 7987+ struct inode *inode;
1facf9fc 7988+
5527c038
JR
7989+ if (d_really_is_positive(dentry)) {
7990+ inode = d_inode(dentry);
7991+ if (!d_is_dir(dentry)) {
027c5e7a
AM
7992+ if (inode->i_nlink && !d_unhashed(dentry))
7993+ drop_nlink(inode);
7994+ } else {
7995+ clear_nlink(inode);
7996+ /* stop next lookup */
7997+ inode->i_flags |= S_DEAD;
7998+ }
7999+ smp_mb(); /* necessary? */
8000+ }
8001+ d_drop(dentry);
8002+}
1308ab2a 8003+
027c5e7a
AM
8004+static int au_hide_children(struct dentry *parent)
8005+{
8006+ int err, i, j, ndentry;
8007+ struct au_dcsub_pages dpages;
8008+ struct au_dpage *dpage;
8009+ struct dentry *dentry;
1facf9fc 8010+
027c5e7a 8011+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 8012+ if (unlikely(err))
8013+ goto out;
027c5e7a
AM
8014+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
8015+ if (unlikely(err))
8016+ goto out_dpages;
1facf9fc 8017+
027c5e7a
AM
8018+ /* in reverse order */
8019+ for (i = dpages.ndpage - 1; i >= 0; i--) {
8020+ dpage = dpages.dpages + i;
8021+ ndentry = dpage->ndentry;
8022+ for (j = ndentry - 1; j >= 0; j--) {
8023+ dentry = dpage->dentries[j];
8024+ if (dentry != parent)
8025+ au_do_hide(dentry);
8026+ }
8027+ }
1facf9fc 8028+
027c5e7a
AM
8029+out_dpages:
8030+ au_dpages_free(&dpages);
4f0767ce 8031+out:
027c5e7a 8032+ return err;
1facf9fc 8033+}
8034+
027c5e7a 8035+static void au_hide(struct dentry *dentry)
1facf9fc 8036+{
027c5e7a 8037+ int err;
1facf9fc 8038+
027c5e7a 8039+ AuDbgDentry(dentry);
2000de60 8040+ if (d_is_dir(dentry)) {
027c5e7a
AM
8041+ /* shrink_dcache_parent(dentry); */
8042+ err = au_hide_children(dentry);
8043+ if (unlikely(err))
523b37e3
AM
8044+ AuIOErr("%pd, failed hiding children, ignored %d\n",
8045+ dentry, err);
027c5e7a
AM
8046+ }
8047+ au_do_hide(dentry);
8048+}
1facf9fc 8049+
027c5e7a
AM
8050+/*
8051+ * By adding a dirty branch, a cached dentry may be affected in various ways.
8052+ *
8053+ * a dirty branch is added
8054+ * - on the top of layers
8055+ * - in the middle of layers
8056+ * - to the bottom of layers
8057+ *
8058+ * on the added branch there exists
8059+ * - a whiteout
8060+ * - a diropq
8061+ * - a same named entry
8062+ * + exist
8063+ * * negative --> positive
8064+ * * positive --> positive
8065+ * - type is unchanged
8066+ * - type is changed
8067+ * + doesn't exist
8068+ * * negative --> negative
8069+ * * positive --> negative (rejected by au_br_del() for non-dir case)
8070+ * - none
8071+ */
8072+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8073+ struct au_dinfo *tmp)
8074+{
8075+ int err;
8076+ aufs_bindex_t bindex, bend;
8077+ struct {
8078+ struct dentry *dentry;
8079+ struct inode *inode;
8080+ mode_t mode;
be52b249
AM
8081+ } orig_h, tmp_h = {
8082+ .dentry = NULL
8083+ };
027c5e7a
AM
8084+ struct au_hdentry *hd;
8085+ struct inode *inode, *h_inode;
8086+ struct dentry *h_dentry;
8087+
8088+ err = 0;
8089+ AuDebugOn(dinfo->di_bstart < 0);
027c5e7a 8090+ orig_h.mode = 0;
5527c038
JR
8091+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
8092+ orig_h.inode = NULL;
8093+ if (d_is_positive(orig_h.dentry)) {
8094+ orig_h.inode = d_inode(orig_h.dentry);
027c5e7a 8095+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
5527c038 8096+ }
027c5e7a
AM
8097+ if (tmp->di_bstart >= 0) {
8098+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
5527c038
JR
8099+ if (d_is_positive(tmp_h.dentry)) {
8100+ tmp_h.inode = d_inode(tmp_h.dentry);
027c5e7a 8101+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
5527c038 8102+ }
027c5e7a
AM
8103+ }
8104+
5527c038
JR
8105+ inode = NULL;
8106+ if (d_really_is_positive(dentry))
8107+ inode = d_inode(dentry);
027c5e7a
AM
8108+ if (!orig_h.inode) {
8109+ AuDbg("nagative originally\n");
8110+ if (inode) {
8111+ au_hide(dentry);
8112+ goto out;
8113+ }
8114+ AuDebugOn(inode);
8115+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8116+ AuDebugOn(dinfo->di_bdiropq != -1);
8117+
8118+ if (!tmp_h.inode) {
8119+ AuDbg("negative --> negative\n");
8120+ /* should have only one negative lower */
8121+ if (tmp->di_bstart >= 0
8122+ && tmp->di_bstart < dinfo->di_bstart) {
8123+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
8124+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8125+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
8126+ au_di_cp(dinfo, tmp);
8127+ hd = tmp->di_hdentry + tmp->di_bstart;
8128+ au_set_h_dptr(dentry, tmp->di_bstart,
8129+ dget(hd->hd_dentry));
8130+ }
8131+ au_dbg_verify_dinode(dentry);
8132+ } else {
8133+ AuDbg("negative --> positive\n");
8134+ /*
8135+ * similar to the behaviour of creating with bypassing
8136+ * aufs.
8137+ * unhash it in order to force an error in the
8138+ * succeeding create operation.
8139+ * we should not set S_DEAD here.
8140+ */
8141+ d_drop(dentry);
8142+ /* au_di_swap(tmp, dinfo); */
8143+ au_dbg_verify_dinode(dentry);
8144+ }
8145+ } else {
8146+ AuDbg("positive originally\n");
8147+ /* inode may be NULL */
8148+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8149+ if (!tmp_h.inode) {
8150+ AuDbg("positive --> negative\n");
8151+ /* or bypassing aufs */
8152+ au_hide(dentry);
8153+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
8154+ dinfo->di_bwh = tmp->di_bwh;
8155+ if (inode)
8156+ err = au_refresh_hinode_self(inode);
8157+ au_dbg_verify_dinode(dentry);
8158+ } else if (orig_h.mode == tmp_h.mode) {
8159+ AuDbg("positive --> positive, same type\n");
8160+ if (!S_ISDIR(orig_h.mode)
8161+ && dinfo->di_bstart > tmp->di_bstart) {
8162+ /*
8163+ * similar to the behaviour of removing and
8164+ * creating.
8165+ */
8166+ au_hide(dentry);
8167+ if (inode)
8168+ err = au_refresh_hinode_self(inode);
8169+ au_dbg_verify_dinode(dentry);
8170+ } else {
8171+ /* fill empty slots */
8172+ if (dinfo->di_bstart > tmp->di_bstart)
8173+ dinfo->di_bstart = tmp->di_bstart;
8174+ if (dinfo->di_bend < tmp->di_bend)
8175+ dinfo->di_bend = tmp->di_bend;
8176+ dinfo->di_bwh = tmp->di_bwh;
8177+ dinfo->di_bdiropq = tmp->di_bdiropq;
8178+ hd = tmp->di_hdentry;
8179+ bend = dinfo->di_bend;
8180+ for (bindex = tmp->di_bstart; bindex <= bend;
8181+ bindex++) {
8182+ if (au_h_dptr(dentry, bindex))
8183+ continue;
8184+ h_dentry = hd[bindex].hd_dentry;
8185+ if (!h_dentry)
8186+ continue;
5527c038
JR
8187+ AuDebugOn(d_is_negative(h_dentry));
8188+ h_inode = d_inode(h_dentry);
027c5e7a
AM
8189+ AuDebugOn(orig_h.mode
8190+ != (h_inode->i_mode
8191+ & S_IFMT));
8192+ au_set_h_dptr(dentry, bindex,
8193+ dget(h_dentry));
8194+ }
8195+ err = au_refresh_hinode(inode, dentry);
8196+ au_dbg_verify_dinode(dentry);
8197+ }
8198+ } else {
8199+ AuDbg("positive --> positive, different type\n");
8200+ /* similar to the behaviour of removing and creating */
8201+ au_hide(dentry);
8202+ if (inode)
8203+ err = au_refresh_hinode_self(inode);
8204+ au_dbg_verify_dinode(dentry);
8205+ }
8206+ }
8207+
8208+out:
8209+ return err;
8210+}
8211+
79b8bda9
AM
8212+void au_refresh_dop(struct dentry *dentry, int force_reval)
8213+{
8214+ const struct dentry_operations *dop
8215+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8216+ static const unsigned int mask
8217+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8218+
8219+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8220+
8221+ if (dentry->d_op == dop)
8222+ return;
8223+
8224+ AuDbg("%pd\n", dentry);
8225+ spin_lock(&dentry->d_lock);
8226+ if (dop == &aufs_dop)
8227+ dentry->d_flags |= mask;
8228+ else
8229+ dentry->d_flags &= ~mask;
8230+ dentry->d_op = dop;
8231+ spin_unlock(&dentry->d_lock);
8232+}
8233+
027c5e7a
AM
8234+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8235+{
8236+ int err, ebrange;
8237+ unsigned int sigen;
8238+ struct au_dinfo *dinfo, *tmp;
8239+ struct super_block *sb;
8240+ struct inode *inode;
8241+
8242+ DiMustWriteLock(dentry);
8243+ AuDebugOn(IS_ROOT(dentry));
5527c038 8244+ AuDebugOn(d_really_is_negative(parent));
027c5e7a
AM
8245+
8246+ sb = dentry->d_sb;
027c5e7a
AM
8247+ sigen = au_sigen(sb);
8248+ err = au_digen_test(parent, sigen);
8249+ if (unlikely(err))
8250+ goto out;
8251+
8252+ dinfo = au_di(dentry);
8253+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
8254+ if (unlikely(err))
8255+ goto out;
8256+ ebrange = au_dbrange_test(dentry);
8257+ if (!ebrange)
8258+ ebrange = au_do_refresh_hdentry(dentry, parent);
8259+
38d290e6 8260+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
027c5e7a 8261+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
5527c038
JR
8262+ if (d_really_is_positive(dentry)) {
8263+ inode = d_inode(dentry);
027c5e7a 8264+ err = au_refresh_hinode_self(inode);
5527c038 8265+ }
027c5e7a
AM
8266+ au_dbg_verify_dinode(dentry);
8267+ if (!err)
8268+ goto out_dgen; /* success */
8269+ goto out;
8270+ }
8271+
8272+ /* temporary dinfo */
8273+ AuDbgDentry(dentry);
8274+ err = -ENOMEM;
8275+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8276+ if (unlikely(!tmp))
8277+ goto out;
8278+ au_di_swap(tmp, dinfo);
8279+ /* returns the number of positive dentries */
8280+ /*
8281+ * if current working dir is removed, it returns an error.
8282+ * but the dentry is legal.
8283+ */
537831f9 8284+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
027c5e7a
AM
8285+ AuDbgDentry(dentry);
8286+ au_di_swap(tmp, dinfo);
8287+ if (err == -ENOENT)
8288+ err = 0;
8289+ if (err >= 0) {
8290+ /* compare/refresh by dinfo */
8291+ AuDbgDentry(dentry);
8292+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8293+ au_dbg_verify_dinode(dentry);
8294+ AuTraceErr(err);
8295+ }
8296+ au_rw_write_unlock(&tmp->di_rwsem);
8297+ au_di_free(tmp);
8298+ if (unlikely(err))
8299+ goto out;
8300+
8301+out_dgen:
8302+ au_update_digen(dentry);
8303+out:
8304+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8305+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8306+ AuDbgDentry(dentry);
8307+ }
8308+ AuTraceErr(err);
8309+ return err;
8310+}
8311+
b4510431
AM
8312+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8313+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8314+{
8315+ int err, valid;
027c5e7a
AM
8316+
8317+ err = 0;
8318+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8319+ goto out;
027c5e7a
AM
8320+
8321+ AuDbg("b%d\n", bindex);
b4510431
AM
8322+ /*
8323+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8324+ * due to whiteout and branch permission.
8325+ */
8326+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8327+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8328+ /* it may return tri-state */
8329+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8330+
8331+ if (unlikely(valid < 0))
8332+ err = valid;
8333+ else if (!valid)
8334+ err = -EINVAL;
8335+
4f0767ce 8336+out:
1facf9fc 8337+ AuTraceErr(err);
8338+ return err;
8339+}
8340+
8341+/* todo: remove this */
8342+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
b4510431 8343+ unsigned int flags, int do_udba)
1facf9fc 8344+{
8345+ int err;
8346+ umode_t mode, h_mode;
8347+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
38d290e6 8348+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8349+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8350+ struct dentry *h_dentry;
8351+ struct qstr *name, *h_name;
8352+
8353+ err = 0;
8354+ plus = 0;
8355+ mode = 0;
1facf9fc 8356+ ibs = -1;
8357+ ibe = -1;
8358+ unhashed = !!d_unhashed(dentry);
8359+ is_root = !!IS_ROOT(dentry);
8360+ name = &dentry->d_name;
38d290e6 8361+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8362+
8363+ /*
7f207e10
AM
8364+ * Theoretically, REVAL test should be unnecessary in case of
8365+ * {FS,I}NOTIFY.
8366+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8367+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8368+ * Let's do REVAL test too.
8369+ */
8370+ if (do_udba && inode) {
8371+ mode = (inode->i_mode & S_IFMT);
8372+ plus = (inode->i_nlink > 0);
1facf9fc 8373+ ibs = au_ibstart(inode);
8374+ ibe = au_ibend(inode);
8375+ }
8376+
8377+ bstart = au_dbstart(dentry);
8378+ btail = bstart;
8379+ if (inode && S_ISDIR(inode->i_mode))
8380+ btail = au_dbtaildir(dentry);
8381+ for (bindex = bstart; bindex <= btail; bindex++) {
8382+ h_dentry = au_h_dptr(dentry, bindex);
8383+ if (!h_dentry)
8384+ continue;
8385+
523b37e3
AM
8386+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8387+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8388+ spin_lock(&h_dentry->d_lock);
1facf9fc 8389+ h_name = &h_dentry->d_name;
8390+ if (unlikely(do_udba
8391+ && !is_root
523b37e3
AM
8392+ && ((!h_nfs
8393+ && (unhashed != !!d_unhashed(h_dentry)
38d290e6
JR
8394+ || (!tmpfile
8395+ && !au_qstreq(name, h_name))
8396+ ))
523b37e3
AM
8397+ || (h_nfs
8398+ && !(flags & LOOKUP_OPEN)
8399+ && (h_dentry->d_flags
8400+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8401+ )) {
38d290e6
JR
8402+ int h_unhashed;
8403+
8404+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8405+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8406+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8407+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8408+ goto err;
8409+ }
027c5e7a 8410+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8411+
b4510431 8412+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8413+ if (unlikely(err))
8414+ /* do not goto err, to keep the errno */
8415+ break;
8416+
8417+ /* todo: plink too? */
8418+ if (!do_udba)
8419+ continue;
8420+
8421+ /* UDBA tests */
5527c038 8422+ if (unlikely(!!inode != d_is_positive(h_dentry)))
1facf9fc 8423+ goto err;
8424+
5527c038
JR
8425+ h_inode = NULL;
8426+ if (d_is_positive(h_dentry))
8427+ h_inode = d_inode(h_dentry);
1facf9fc 8428+ h_plus = plus;
8429+ h_mode = mode;
8430+ h_cached_inode = h_inode;
8431+ if (h_inode) {
8432+ h_mode = (h_inode->i_mode & S_IFMT);
8433+ h_plus = (h_inode->i_nlink > 0);
8434+ }
8435+ if (inode && ibs <= bindex && bindex <= ibe)
8436+ h_cached_inode = au_h_iptr(inode, bindex);
8437+
523b37e3 8438+ if (!h_nfs) {
38d290e6 8439+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8440+ goto err;
8441+ } else {
8442+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8443+ && !is_root
8444+ && !IS_ROOT(h_dentry)
8445+ && unhashed != d_unhashed(h_dentry)))
8446+ goto err;
8447+ }
8448+ if (unlikely(mode != h_mode
1facf9fc 8449+ || h_cached_inode != h_inode))
8450+ goto err;
8451+ continue;
8452+
f6b6e03d 8453+err:
1facf9fc 8454+ err = -EINVAL;
8455+ break;
8456+ }
8457+
523b37e3 8458+ AuTraceErr(err);
1facf9fc 8459+ return err;
8460+}
8461+
027c5e7a 8462+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8463+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8464+{
8465+ int err;
8466+ struct dentry *parent;
1facf9fc 8467+
027c5e7a 8468+ if (!au_digen_test(dentry, sigen))
1facf9fc 8469+ return 0;
8470+
8471+ parent = dget_parent(dentry);
8472+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8473+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8474+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8475+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8476+ di_read_unlock(parent, AuLock_IR);
8477+ dput(parent);
027c5e7a 8478+ AuTraceErr(err);
1facf9fc 8479+ return err;
8480+}
8481+
8482+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8483+{
8484+ int err;
8485+ struct dentry *d, *parent;
1facf9fc 8486+
027c5e7a 8487+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8488+ return simple_reval_dpath(dentry, sigen);
8489+
8490+ /* slow loop, keep it simple and stupid */
8491+ /* cf: au_cpup_dirs() */
8492+ err = 0;
8493+ parent = NULL;
027c5e7a 8494+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8495+ d = dentry;
8496+ while (1) {
8497+ dput(parent);
8498+ parent = dget_parent(d);
027c5e7a 8499+ if (!au_digen_test(parent, sigen))
1facf9fc 8500+ break;
8501+ d = parent;
8502+ }
8503+
1facf9fc 8504+ if (d != dentry)
027c5e7a 8505+ di_write_lock_child2(d);
1facf9fc 8506+
8507+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8508+ if (au_digen_test(d, sigen)) {
8509+ /*
8510+ * todo: consolidate with simple_reval_dpath(),
8511+ * do_refresh() and au_reval_for_attr().
8512+ */
1facf9fc 8513+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8514+ err = au_refresh_dentry(d, parent);
1facf9fc 8515+ di_read_unlock(parent, AuLock_IR);
8516+ }
8517+
8518+ if (d != dentry)
8519+ di_write_unlock(d);
8520+ dput(parent);
8521+ if (unlikely(err))
8522+ break;
8523+ }
8524+
8525+ return err;
8526+}
8527+
8528+/*
8529+ * if valid returns 1, otherwise 0.
8530+ */
b4510431 8531+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8532+{
8533+ int valid, err;
8534+ unsigned int sigen;
8535+ unsigned char do_udba;
8536+ struct super_block *sb;
8537+ struct inode *inode;
8538+
027c5e7a 8539+ /* todo: support rcu-walk? */
b4510431 8540+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8541+ return -ECHILD;
8542+
8543+ valid = 0;
8544+ if (unlikely(!au_di(dentry)))
8545+ goto out;
8546+
e49829fe 8547+ valid = 1;
1facf9fc 8548+ sb = dentry->d_sb;
e49829fe
JR
8549+ /*
8550+ * todo: very ugly
8551+ * i_mutex of parent dir may be held,
8552+ * but we should not return 'invalid' due to busy.
8553+ */
8554+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8555+ if (unlikely(err)) {
8556+ valid = err;
027c5e7a 8557+ AuTraceErr(err);
e49829fe
JR
8558+ goto out;
8559+ }
5527c038
JR
8560+ inode = NULL;
8561+ if (d_really_is_positive(dentry))
8562+ inode = d_inode(dentry);
c1595e42
JR
8563+ if (unlikely(inode && is_bad_inode(inode))) {
8564+ err = -EINVAL;
8565+ AuTraceErr(err);
8566+ goto out_dgrade;
8567+ }
027c5e7a
AM
8568+ if (unlikely(au_dbrange_test(dentry))) {
8569+ err = -EINVAL;
8570+ AuTraceErr(err);
8571+ goto out_dgrade;
1facf9fc 8572+ }
027c5e7a
AM
8573+
8574+ sigen = au_sigen(sb);
8575+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8576+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8577+ err = au_reval_dpath(dentry, sigen);
8578+ if (unlikely(err)) {
8579+ AuTraceErr(err);
1facf9fc 8580+ goto out_dgrade;
027c5e7a 8581+ }
1facf9fc 8582+ }
8583+ di_downgrade_lock(dentry, AuLock_IR);
8584+
1facf9fc 8585+ err = -EINVAL;
c1595e42 8586+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8587+ && inode
38d290e6 8588+ && !(inode->i_state && I_LINKABLE)
79b8bda9
AM
8589+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8590+ AuTraceErr(err);
027c5e7a 8591+ goto out_inval;
79b8bda9 8592+ }
027c5e7a 8593+
1facf9fc 8594+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8595+ if (do_udba && inode) {
8596+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 8597+ struct inode *h_inode;
1facf9fc 8598+
027c5e7a
AM
8599+ if (bstart >= 0) {
8600+ h_inode = au_h_iptr(inode, bstart);
79b8bda9
AM
8601+ if (h_inode && au_test_higen(inode, h_inode)) {
8602+ AuTraceErr(err);
027c5e7a 8603+ goto out_inval;
79b8bda9 8604+ }
027c5e7a 8605+ }
1facf9fc 8606+ }
8607+
b4510431 8608+ err = h_d_revalidate(dentry, inode, flags, do_udba);
027c5e7a 8609+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 8610+ err = -EIO;
523b37e3
AM
8611+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8612+ dentry, err);
027c5e7a 8613+ }
e49829fe 8614+ goto out_inval;
1facf9fc 8615+
4f0767ce 8616+out_dgrade:
1facf9fc 8617+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8618+out_inval:
1facf9fc 8619+ aufs_read_unlock(dentry, AuLock_IR);
8620+ AuTraceErr(err);
8621+ valid = !err;
e49829fe 8622+out:
027c5e7a 8623+ if (!valid) {
523b37e3 8624+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8625+ d_drop(dentry);
8626+ }
1facf9fc 8627+ return valid;
8628+}
8629+
8630+static void aufs_d_release(struct dentry *dentry)
8631+{
027c5e7a 8632+ if (au_di(dentry)) {
4a4d8108
AM
8633+ au_di_fin(dentry);
8634+ au_hn_di_reinit(dentry);
1facf9fc 8635+ }
1facf9fc 8636+}
8637+
4a4d8108 8638+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8639+ .d_revalidate = aufs_d_revalidate,
8640+ .d_weak_revalidate = aufs_d_revalidate,
8641+ .d_release = aufs_d_release
1facf9fc 8642+};
79b8bda9
AM
8643+
8644+/* aufs_dop without d_revalidate */
8645+const struct dentry_operations aufs_dop_noreval = {
8646+ .d_release = aufs_d_release
8647+};
7f207e10
AM
8648diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8649--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 8650+++ linux/fs/aufs/dentry.h 2016-02-28 11:26:32.569971135 +0100
79b8bda9 8651@@ -0,0 +1,234 @@
1facf9fc 8652+/*
8cdd5066 8653+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 8654+ *
8655+ * This program, aufs is free software; you can redistribute it and/or modify
8656+ * it under the terms of the GNU General Public License as published by
8657+ * the Free Software Foundation; either version 2 of the License, or
8658+ * (at your option) any later version.
dece6358
AM
8659+ *
8660+ * This program is distributed in the hope that it will be useful,
8661+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8662+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8663+ * GNU General Public License for more details.
8664+ *
8665+ * You should have received a copy of the GNU General Public License
523b37e3 8666+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8667+ */
8668+
8669+/*
8670+ * lookup and dentry operations
8671+ */
8672+
8673+#ifndef __AUFS_DENTRY_H__
8674+#define __AUFS_DENTRY_H__
8675+
8676+#ifdef __KERNEL__
8677+
dece6358 8678+#include <linux/dcache.h>
1facf9fc 8679+#include "rwsem.h"
8680+
1facf9fc 8681+struct au_hdentry {
8682+ struct dentry *hd_dentry;
027c5e7a 8683+ aufs_bindex_t hd_id;
1facf9fc 8684+};
8685+
8686+struct au_dinfo {
8687+ atomic_t di_generation;
8688+
dece6358 8689+ struct au_rwsem di_rwsem;
1facf9fc 8690+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
38d290e6 8691+ unsigned char di_tmpfile; /* to allow the different name */
1facf9fc 8692+ struct au_hdentry *di_hdentry;
4a4d8108 8693+} ____cacheline_aligned_in_smp;
1facf9fc 8694+
8695+/* ---------------------------------------------------------------------- */
8696+
8697+/* dentry.c */
79b8bda9 8698+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8699+struct au_branch;
076b876e 8700+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8701+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8702+ struct dentry *h_parent, struct au_branch *br);
8703+
537831f9 8704+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type);
86dc4139 8705+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8706+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8707+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
79b8bda9 8708+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8709+
8710+/* dinfo.c */
4a4d8108 8711+void au_di_init_once(void *_di);
027c5e7a
AM
8712+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8713+void au_di_free(struct au_dinfo *dinfo);
8714+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8715+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8716+int au_di_init(struct dentry *dentry);
8717+void au_di_fin(struct dentry *dentry);
1facf9fc 8718+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
8719+
8720+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8721+void di_read_unlock(struct dentry *d, int flags);
8722+void di_downgrade_lock(struct dentry *d, int flags);
8723+void di_write_lock(struct dentry *d, unsigned int lsc);
8724+void di_write_unlock(struct dentry *d);
8725+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8726+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8727+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8728+
8729+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8730+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8731+aufs_bindex_t au_dbtail(struct dentry *dentry);
8732+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8733+
8734+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8735+ struct dentry *h_dentry);
027c5e7a
AM
8736+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8737+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8738+void au_update_digen(struct dentry *dentry);
8739+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
8740+void au_update_dbstart(struct dentry *dentry);
8741+void au_update_dbend(struct dentry *dentry);
8742+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8743+
8744+/* ---------------------------------------------------------------------- */
8745+
8746+static inline struct au_dinfo *au_di(struct dentry *dentry)
8747+{
8748+ return dentry->d_fsdata;
8749+}
8750+
8751+/* ---------------------------------------------------------------------- */
8752+
8753+/* lock subclass for dinfo */
8754+enum {
8755+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8756+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8757+ AuLsc_DI_CHILD3, /* copyup dirs */
8758+ AuLsc_DI_PARENT,
8759+ AuLsc_DI_PARENT2,
027c5e7a
AM
8760+ AuLsc_DI_PARENT3,
8761+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8762+};
8763+
8764+/*
8765+ * di_read_lock_child, di_write_lock_child,
8766+ * di_read_lock_child2, di_write_lock_child2,
8767+ * di_read_lock_child3, di_write_lock_child3,
8768+ * di_read_lock_parent, di_write_lock_parent,
8769+ * di_read_lock_parent2, di_write_lock_parent2,
8770+ * di_read_lock_parent3, di_write_lock_parent3,
8771+ */
8772+#define AuReadLockFunc(name, lsc) \
8773+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8774+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8775+
8776+#define AuWriteLockFunc(name, lsc) \
8777+static inline void di_write_lock_##name(struct dentry *d) \
8778+{ di_write_lock(d, AuLsc_DI_##lsc); }
8779+
8780+#define AuRWLockFuncs(name, lsc) \
8781+ AuReadLockFunc(name, lsc) \
8782+ AuWriteLockFunc(name, lsc)
8783+
8784+AuRWLockFuncs(child, CHILD);
8785+AuRWLockFuncs(child2, CHILD2);
8786+AuRWLockFuncs(child3, CHILD3);
8787+AuRWLockFuncs(parent, PARENT);
8788+AuRWLockFuncs(parent2, PARENT2);
8789+AuRWLockFuncs(parent3, PARENT3);
8790+
8791+#undef AuReadLockFunc
8792+#undef AuWriteLockFunc
8793+#undef AuRWLockFuncs
8794+
8795+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8796+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8797+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8798+
8799+/* ---------------------------------------------------------------------- */
8800+
8801+/* todo: memory barrier? */
8802+static inline unsigned int au_digen(struct dentry *d)
8803+{
8804+ return atomic_read(&au_di(d)->di_generation);
8805+}
8806+
8807+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8808+{
8809+ hdentry->hd_dentry = NULL;
8810+}
8811+
8812+static inline void au_hdput(struct au_hdentry *hd)
8813+{
4a4d8108
AM
8814+ if (hd)
8815+ dput(hd->hd_dentry);
1facf9fc 8816+}
8817+
8818+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
8819+{
1308ab2a 8820+ DiMustAnyLock(dentry);
1facf9fc 8821+ return au_di(dentry)->di_bstart;
8822+}
8823+
8824+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
8825+{
1308ab2a 8826+ DiMustAnyLock(dentry);
1facf9fc 8827+ return au_di(dentry)->di_bend;
8828+}
8829+
8830+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
8831+{
1308ab2a 8832+ DiMustAnyLock(dentry);
1facf9fc 8833+ return au_di(dentry)->di_bwh;
8834+}
8835+
8836+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
8837+{
1308ab2a 8838+ DiMustAnyLock(dentry);
1facf9fc 8839+ return au_di(dentry)->di_bdiropq;
8840+}
8841+
8842+/* todo: hard/soft set? */
8843+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
8844+{
1308ab2a 8845+ DiMustWriteLock(dentry);
1facf9fc 8846+ au_di(dentry)->di_bstart = bindex;
8847+}
8848+
8849+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
8850+{
1308ab2a 8851+ DiMustWriteLock(dentry);
1facf9fc 8852+ au_di(dentry)->di_bend = bindex;
8853+}
8854+
8855+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
8856+{
1308ab2a 8857+ DiMustWriteLock(dentry);
1facf9fc 8858+ /* dbwh can be outside of bstart - bend range */
8859+ au_di(dentry)->di_bwh = bindex;
8860+}
8861+
8862+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
8863+{
1308ab2a 8864+ DiMustWriteLock(dentry);
1facf9fc 8865+ au_di(dentry)->di_bdiropq = bindex;
8866+}
8867+
8868+/* ---------------------------------------------------------------------- */
8869+
4a4d8108 8870+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 8871+static inline void au_digen_dec(struct dentry *d)
8872+{
e49829fe 8873+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 8874+}
8875+
4a4d8108 8876+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 8877+{
8878+ dentry->d_fsdata = NULL;
8879+}
8880+#else
4a4d8108
AM
8881+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
8882+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 8883+
8884+#endif /* __KERNEL__ */
8885+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
8886diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
8887--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 8888+++ linux/fs/aufs/dinfo.c 2016-02-28 11:26:32.569971135 +0100
5527c038 8889@@ -0,0 +1,550 @@
1facf9fc 8890+/*
8cdd5066 8891+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 8892+ *
8893+ * This program, aufs is free software; you can redistribute it and/or modify
8894+ * it under the terms of the GNU General Public License as published by
8895+ * the Free Software Foundation; either version 2 of the License, or
8896+ * (at your option) any later version.
dece6358
AM
8897+ *
8898+ * This program is distributed in the hope that it will be useful,
8899+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8900+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8901+ * GNU General Public License for more details.
8902+ *
8903+ * You should have received a copy of the GNU General Public License
523b37e3 8904+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8905+ */
8906+
8907+/*
8908+ * dentry private data
8909+ */
8910+
8911+#include "aufs.h"
8912+
e49829fe 8913+void au_di_init_once(void *_dinfo)
4a4d8108 8914+{
e49829fe
JR
8915+ struct au_dinfo *dinfo = _dinfo;
8916+ static struct lock_class_key aufs_di;
4a4d8108 8917+
e49829fe
JR
8918+ au_rw_init(&dinfo->di_rwsem);
8919+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
8920+}
8921+
027c5e7a 8922+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 8923+{
8924+ struct au_dinfo *dinfo;
027c5e7a 8925+ int nbr, i;
1facf9fc 8926+
8927+ dinfo = au_cache_alloc_dinfo();
8928+ if (unlikely(!dinfo))
8929+ goto out;
8930+
1facf9fc 8931+ nbr = au_sbend(sb) + 1;
8932+ if (nbr <= 0)
8933+ nbr = 1;
8934+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
8935+ if (dinfo->di_hdentry) {
8936+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
8937+ dinfo->di_bstart = -1;
8938+ dinfo->di_bend = -1;
8939+ dinfo->di_bwh = -1;
8940+ dinfo->di_bdiropq = -1;
38d290e6 8941+ dinfo->di_tmpfile = 0;
027c5e7a
AM
8942+ for (i = 0; i < nbr; i++)
8943+ dinfo->di_hdentry[i].hd_id = -1;
8944+ goto out;
8945+ }
1facf9fc 8946+
1facf9fc 8947+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
8948+ dinfo = NULL;
8949+
4f0767ce 8950+out:
027c5e7a 8951+ return dinfo;
1facf9fc 8952+}
8953+
027c5e7a 8954+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 8955+{
4a4d8108
AM
8956+ struct au_hdentry *p;
8957+ aufs_bindex_t bend, bindex;
8958+
8959+ /* dentry may not be revalidated */
027c5e7a 8960+ bindex = dinfo->di_bstart;
4a4d8108 8961+ if (bindex >= 0) {
027c5e7a
AM
8962+ bend = dinfo->di_bend;
8963+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
8964+ while (bindex++ <= bend)
8965+ au_hdput(p++);
8966+ }
027c5e7a
AM
8967+ kfree(dinfo->di_hdentry);
8968+ au_cache_free_dinfo(dinfo);
8969+}
8970+
8971+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
8972+{
8973+ struct au_hdentry *p;
8974+ aufs_bindex_t bi;
8975+
8976+ AuRwMustWriteLock(&a->di_rwsem);
8977+ AuRwMustWriteLock(&b->di_rwsem);
8978+
8979+#define DiSwap(v, name) \
8980+ do { \
8981+ v = a->di_##name; \
8982+ a->di_##name = b->di_##name; \
8983+ b->di_##name = v; \
8984+ } while (0)
8985+
8986+ DiSwap(p, hdentry);
8987+ DiSwap(bi, bstart);
8988+ DiSwap(bi, bend);
8989+ DiSwap(bi, bwh);
8990+ DiSwap(bi, bdiropq);
8991+ /* smp_mb(); */
8992+
8993+#undef DiSwap
8994+}
8995+
8996+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
8997+{
8998+ AuRwMustWriteLock(&dst->di_rwsem);
8999+ AuRwMustWriteLock(&src->di_rwsem);
9000+
9001+ dst->di_bstart = src->di_bstart;
9002+ dst->di_bend = src->di_bend;
9003+ dst->di_bwh = src->di_bwh;
9004+ dst->di_bdiropq = src->di_bdiropq;
9005+ /* smp_mb(); */
9006+}
9007+
9008+int au_di_init(struct dentry *dentry)
9009+{
9010+ int err;
9011+ struct super_block *sb;
9012+ struct au_dinfo *dinfo;
9013+
9014+ err = 0;
9015+ sb = dentry->d_sb;
9016+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
9017+ if (dinfo) {
9018+ atomic_set(&dinfo->di_generation, au_sigen(sb));
9019+ /* smp_mb(); */ /* atomic_set */
9020+ dentry->d_fsdata = dinfo;
9021+ } else
9022+ err = -ENOMEM;
9023+
9024+ return err;
9025+}
9026+
9027+void au_di_fin(struct dentry *dentry)
9028+{
9029+ struct au_dinfo *dinfo;
9030+
9031+ dinfo = au_di(dentry);
9032+ AuRwDestroy(&dinfo->di_rwsem);
9033+ au_di_free(dinfo);
4a4d8108
AM
9034+}
9035+
1facf9fc 9036+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
9037+{
9038+ int err, sz;
9039+ struct au_hdentry *hdp;
9040+
1308ab2a 9041+ AuRwMustWriteLock(&dinfo->di_rwsem);
9042+
1facf9fc 9043+ err = -ENOMEM;
9044+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
9045+ if (!sz)
9046+ sz = sizeof(*hdp);
9047+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
9048+ if (hdp) {
9049+ dinfo->di_hdentry = hdp;
9050+ err = 0;
9051+ }
9052+
9053+ return err;
9054+}
9055+
9056+/* ---------------------------------------------------------------------- */
9057+
9058+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
9059+{
9060+ switch (lsc) {
9061+ case AuLsc_DI_CHILD:
9062+ ii_write_lock_child(inode);
9063+ break;
9064+ case AuLsc_DI_CHILD2:
9065+ ii_write_lock_child2(inode);
9066+ break;
9067+ case AuLsc_DI_CHILD3:
9068+ ii_write_lock_child3(inode);
9069+ break;
9070+ case AuLsc_DI_PARENT:
9071+ ii_write_lock_parent(inode);
9072+ break;
9073+ case AuLsc_DI_PARENT2:
9074+ ii_write_lock_parent2(inode);
9075+ break;
9076+ case AuLsc_DI_PARENT3:
9077+ ii_write_lock_parent3(inode);
9078+ break;
9079+ default:
9080+ BUG();
9081+ }
9082+}
9083+
9084+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9085+{
9086+ switch (lsc) {
9087+ case AuLsc_DI_CHILD:
9088+ ii_read_lock_child(inode);
9089+ break;
9090+ case AuLsc_DI_CHILD2:
9091+ ii_read_lock_child2(inode);
9092+ break;
9093+ case AuLsc_DI_CHILD3:
9094+ ii_read_lock_child3(inode);
9095+ break;
9096+ case AuLsc_DI_PARENT:
9097+ ii_read_lock_parent(inode);
9098+ break;
9099+ case AuLsc_DI_PARENT2:
9100+ ii_read_lock_parent2(inode);
9101+ break;
9102+ case AuLsc_DI_PARENT3:
9103+ ii_read_lock_parent3(inode);
9104+ break;
9105+ default:
9106+ BUG();
9107+ }
9108+}
9109+
9110+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9111+{
5527c038
JR
9112+ struct inode *inode;
9113+
dece6358 9114+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9115+ if (d_really_is_positive(d)) {
9116+ inode = d_inode(d);
1facf9fc 9117+ if (au_ftest_lock(flags, IW))
5527c038 9118+ do_ii_write_lock(inode, lsc);
1facf9fc 9119+ else if (au_ftest_lock(flags, IR))
5527c038 9120+ do_ii_read_lock(inode, lsc);
1facf9fc 9121+ }
9122+}
9123+
9124+void di_read_unlock(struct dentry *d, int flags)
9125+{
5527c038
JR
9126+ struct inode *inode;
9127+
9128+ if (d_really_is_positive(d)) {
9129+ inode = d_inode(d);
027c5e7a
AM
9130+ if (au_ftest_lock(flags, IW)) {
9131+ au_dbg_verify_dinode(d);
5527c038 9132+ ii_write_unlock(inode);
027c5e7a
AM
9133+ } else if (au_ftest_lock(flags, IR)) {
9134+ au_dbg_verify_dinode(d);
5527c038 9135+ ii_read_unlock(inode);
027c5e7a 9136+ }
1facf9fc 9137+ }
dece6358 9138+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 9139+}
9140+
9141+void di_downgrade_lock(struct dentry *d, int flags)
9142+{
5527c038
JR
9143+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9144+ ii_downgrade_lock(d_inode(d));
dece6358 9145+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 9146+}
9147+
9148+void di_write_lock(struct dentry *d, unsigned int lsc)
9149+{
dece6358 9150+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
5527c038
JR
9151+ if (d_really_is_positive(d))
9152+ do_ii_write_lock(d_inode(d), lsc);
1facf9fc 9153+}
9154+
9155+void di_write_unlock(struct dentry *d)
9156+{
027c5e7a 9157+ au_dbg_verify_dinode(d);
5527c038
JR
9158+ if (d_really_is_positive(d))
9159+ ii_write_unlock(d_inode(d));
dece6358 9160+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9161+}
9162+
9163+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9164+{
9165+ AuDebugOn(d1 == d2
5527c038 9166+ || d_inode(d1) == d_inode(d2)
1facf9fc 9167+ || d1->d_sb != d2->d_sb);
9168+
9169+ if (isdir && au_test_subdir(d1, d2)) {
9170+ di_write_lock_child(d1);
9171+ di_write_lock_child2(d2);
9172+ } else {
9173+ /* there should be no races */
9174+ di_write_lock_child(d2);
9175+ di_write_lock_child2(d1);
9176+ }
9177+}
9178+
9179+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9180+{
9181+ AuDebugOn(d1 == d2
5527c038 9182+ || d_inode(d1) == d_inode(d2)
1facf9fc 9183+ || d1->d_sb != d2->d_sb);
9184+
9185+ if (isdir && au_test_subdir(d1, d2)) {
9186+ di_write_lock_parent(d1);
9187+ di_write_lock_parent2(d2);
9188+ } else {
9189+ /* there should be no races */
9190+ di_write_lock_parent(d2);
9191+ di_write_lock_parent2(d1);
9192+ }
9193+}
9194+
9195+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9196+{
9197+ di_write_unlock(d1);
5527c038 9198+ if (d_inode(d1) == d_inode(d2))
dece6358 9199+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9200+ else
9201+ di_write_unlock(d2);
9202+}
9203+
9204+/* ---------------------------------------------------------------------- */
9205+
9206+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9207+{
9208+ struct dentry *d;
9209+
1308ab2a 9210+ DiMustAnyLock(dentry);
9211+
1facf9fc 9212+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
9213+ return NULL;
9214+ AuDebugOn(bindex < 0);
9215+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
c1595e42 9216+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9217+ return d;
9218+}
9219+
2cbb1c4b
JR
9220+/*
9221+ * extended version of au_h_dptr().
38d290e6
JR
9222+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9223+ * error.
2cbb1c4b
JR
9224+ */
9225+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9226+{
9227+ struct dentry *h_dentry;
9228+ struct inode *inode, *h_inode;
9229+
5527c038 9230+ AuDebugOn(d_really_is_negative(dentry));
2cbb1c4b
JR
9231+
9232+ h_dentry = NULL;
9233+ if (au_dbstart(dentry) <= bindex
9234+ && bindex <= au_dbend(dentry))
9235+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9236+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9237+ dget(h_dentry);
9238+ goto out; /* success */
9239+ }
9240+
5527c038 9241+ inode = d_inode(dentry);
2cbb1c4b
JR
9242+ AuDebugOn(bindex < au_ibstart(inode));
9243+ AuDebugOn(au_ibend(inode) < bindex);
9244+ h_inode = au_h_iptr(inode, bindex);
9245+ h_dentry = d_find_alias(h_inode);
9246+ if (h_dentry) {
9247+ if (!IS_ERR(h_dentry)) {
38d290e6 9248+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9249+ goto out; /* success */
9250+ dput(h_dentry);
9251+ } else
9252+ goto out;
9253+ }
9254+
9255+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9256+ h_dentry = au_plink_lkup(inode, bindex);
9257+ AuDebugOn(!h_dentry);
9258+ if (!IS_ERR(h_dentry)) {
9259+ if (!au_d_hashed_positive(h_dentry))
9260+ goto out; /* success */
9261+ dput(h_dentry);
9262+ h_dentry = NULL;
9263+ }
9264+ }
9265+
9266+out:
9267+ AuDbgDentry(h_dentry);
9268+ return h_dentry;
9269+}
9270+
1facf9fc 9271+aufs_bindex_t au_dbtail(struct dentry *dentry)
9272+{
9273+ aufs_bindex_t bend, bwh;
9274+
9275+ bend = au_dbend(dentry);
9276+ if (0 <= bend) {
9277+ bwh = au_dbwh(dentry);
9278+ if (!bwh)
9279+ return bwh;
9280+ if (0 < bwh && bwh < bend)
9281+ return bwh - 1;
9282+ }
9283+ return bend;
9284+}
9285+
9286+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9287+{
9288+ aufs_bindex_t bend, bopq;
9289+
9290+ bend = au_dbtail(dentry);
9291+ if (0 <= bend) {
9292+ bopq = au_dbdiropq(dentry);
9293+ if (0 <= bopq && bopq < bend)
9294+ bend = bopq;
9295+ }
9296+ return bend;
9297+}
9298+
9299+/* ---------------------------------------------------------------------- */
9300+
9301+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9302+ struct dentry *h_dentry)
9303+{
9304+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 9305+ struct au_branch *br;
1facf9fc 9306+
1308ab2a 9307+ DiMustWriteLock(dentry);
9308+
4a4d8108 9309+ au_hdput(hd);
1facf9fc 9310+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9311+ if (h_dentry) {
9312+ br = au_sbr(dentry->d_sb, bindex);
9313+ hd->hd_id = br->br_id;
9314+ }
9315+}
9316+
9317+int au_dbrange_test(struct dentry *dentry)
9318+{
9319+ int err;
9320+ aufs_bindex_t bstart, bend;
9321+
9322+ err = 0;
9323+ bstart = au_dbstart(dentry);
9324+ bend = au_dbend(dentry);
9325+ if (bstart >= 0)
9326+ AuDebugOn(bend < 0 && bstart > bend);
9327+ else {
9328+ err = -EIO;
9329+ AuDebugOn(bend >= 0);
9330+ }
9331+
9332+ return err;
9333+}
9334+
9335+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9336+{
9337+ int err;
9338+
9339+ err = 0;
9340+ if (unlikely(au_digen(dentry) != sigen
5527c038 9341+ || au_iigen_test(d_inode(dentry), sigen)))
027c5e7a
AM
9342+ err = -EIO;
9343+
9344+ return err;
1facf9fc 9345+}
9346+
9347+void au_update_digen(struct dentry *dentry)
9348+{
9349+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9350+ /* smp_mb(); */ /* atomic_set */
9351+}
9352+
9353+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9354+{
9355+ struct au_dinfo *dinfo;
9356+ struct dentry *h_d;
4a4d8108 9357+ struct au_hdentry *hdp;
1facf9fc 9358+
1308ab2a 9359+ DiMustWriteLock(dentry);
9360+
1facf9fc 9361+ dinfo = au_di(dentry);
9362+ if (!dinfo || dinfo->di_bstart < 0)
9363+ return;
9364+
4a4d8108 9365+ hdp = dinfo->di_hdentry;
1facf9fc 9366+ if (do_put_zero) {
9367+ aufs_bindex_t bindex, bend;
9368+
9369+ bend = dinfo->di_bend;
9370+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 9371+ h_d = hdp[0 + bindex].hd_dentry;
5527c038 9372+ if (h_d && d_is_negative(h_d))
1facf9fc 9373+ au_set_h_dptr(dentry, bindex, NULL);
9374+ }
9375+ }
9376+
9377+ dinfo->di_bstart = -1;
9378+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 9379+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 9380+ break;
9381+ if (dinfo->di_bstart > dinfo->di_bend) {
9382+ dinfo->di_bstart = -1;
9383+ dinfo->di_bend = -1;
9384+ return;
9385+ }
9386+
9387+ dinfo->di_bend++;
9388+ while (0 <= --dinfo->di_bend)
4a4d8108 9389+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 9390+ break;
9391+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
9392+}
9393+
9394+void au_update_dbstart(struct dentry *dentry)
9395+{
9396+ aufs_bindex_t bindex, bend;
9397+ struct dentry *h_dentry;
9398+
9399+ bend = au_dbend(dentry);
9400+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
9401+ h_dentry = au_h_dptr(dentry, bindex);
9402+ if (!h_dentry)
9403+ continue;
5527c038 9404+ if (d_is_positive(h_dentry)) {
1facf9fc 9405+ au_set_dbstart(dentry, bindex);
9406+ return;
9407+ }
9408+ au_set_h_dptr(dentry, bindex, NULL);
9409+ }
9410+}
9411+
9412+void au_update_dbend(struct dentry *dentry)
9413+{
9414+ aufs_bindex_t bindex, bstart;
9415+ struct dentry *h_dentry;
9416+
9417+ bstart = au_dbstart(dentry);
7f207e10 9418+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 9419+ h_dentry = au_h_dptr(dentry, bindex);
9420+ if (!h_dentry)
9421+ continue;
5527c038 9422+ if (d_is_positive(h_dentry)) {
1facf9fc 9423+ au_set_dbend(dentry, bindex);
9424+ return;
9425+ }
9426+ au_set_h_dptr(dentry, bindex, NULL);
9427+ }
9428+}
9429+
9430+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9431+{
9432+ aufs_bindex_t bindex, bend;
9433+
9434+ bend = au_dbend(dentry);
9435+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
9436+ if (au_h_dptr(dentry, bindex) == h_dentry)
9437+ return bindex;
9438+ return -1;
9439+}
7f207e10
AM
9440diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9441--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
9442+++ linux/fs/aufs/dir.c 2016-02-28 11:26:32.569971135 +0100
9443@@ -0,0 +1,758 @@
1facf9fc 9444+/*
8cdd5066 9445+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 9446+ *
9447+ * This program, aufs is free software; you can redistribute it and/or modify
9448+ * it under the terms of the GNU General Public License as published by
9449+ * the Free Software Foundation; either version 2 of the License, or
9450+ * (at your option) any later version.
dece6358
AM
9451+ *
9452+ * This program is distributed in the hope that it will be useful,
9453+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9454+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9455+ * GNU General Public License for more details.
9456+ *
9457+ * You should have received a copy of the GNU General Public License
523b37e3 9458+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9459+ */
9460+
9461+/*
9462+ * directory operations
9463+ */
9464+
9465+#include <linux/fs_stack.h>
9466+#include "aufs.h"
9467+
9468+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9469+{
9dbd164d
AM
9470+ unsigned int nlink;
9471+
1facf9fc 9472+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9473+
9dbd164d
AM
9474+ nlink = dir->i_nlink;
9475+ nlink += h_dir->i_nlink - 2;
1facf9fc 9476+ if (h_dir->i_nlink < 2)
9dbd164d 9477+ nlink += 2;
f6b6e03d 9478+ smp_mb(); /* for i_nlink */
7eafdf33 9479+ /* 0 can happen in revaliding */
92d182d2 9480+ set_nlink(dir, nlink);
1facf9fc 9481+}
9482+
9483+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9484+{
9dbd164d
AM
9485+ unsigned int nlink;
9486+
1facf9fc 9487+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9488+
9dbd164d
AM
9489+ nlink = dir->i_nlink;
9490+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9491+ if (h_dir->i_nlink < 2)
9dbd164d 9492+ nlink -= 2;
f6b6e03d 9493+ smp_mb(); /* for i_nlink */
92d182d2 9494+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9495+ set_nlink(dir, nlink);
1facf9fc 9496+}
9497+
1308ab2a 9498+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9499+{
9500+ loff_t sz;
9501+ aufs_bindex_t bindex, bend;
9502+ struct file *h_file;
9503+ struct dentry *h_dentry;
9504+
9505+ sz = 0;
9506+ if (file) {
2000de60 9507+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9508+
4a4d8108 9509+ bend = au_fbend_dir(file);
1308ab2a 9510+ for (bindex = au_fbstart(file);
9511+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9512+ bindex++) {
4a4d8108 9513+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9514+ if (h_file && file_inode(h_file))
9515+ sz += vfsub_f_size_read(h_file);
1308ab2a 9516+ }
9517+ } else {
9518+ AuDebugOn(!dentry);
2000de60 9519+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9520+
9521+ bend = au_dbtaildir(dentry);
9522+ for (bindex = au_dbstart(dentry);
9523+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9524+ bindex++) {
9525+ h_dentry = au_h_dptr(dentry, bindex);
5527c038
JR
9526+ if (h_dentry && d_is_positive(h_dentry))
9527+ sz += i_size_read(d_inode(h_dentry));
1308ab2a 9528+ }
9529+ }
9530+ if (sz < KMALLOC_MAX_SIZE)
9531+ sz = roundup_pow_of_two(sz);
9532+ if (sz > KMALLOC_MAX_SIZE)
9533+ sz = KMALLOC_MAX_SIZE;
9534+ else if (sz < NAME_MAX) {
9535+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9536+ sz = AUFS_RDBLK_DEF;
9537+ }
9538+ return sz;
9539+}
9540+
b912730e
AM
9541+struct au_dir_ts_arg {
9542+ struct dentry *dentry;
9543+ aufs_bindex_t brid;
9544+};
9545+
9546+static void au_do_dir_ts(void *arg)
9547+{
9548+ struct au_dir_ts_arg *a = arg;
9549+ struct au_dtime dt;
9550+ struct path h_path;
9551+ struct inode *dir, *h_dir;
9552+ struct super_block *sb;
9553+ struct au_branch *br;
9554+ struct au_hinode *hdir;
9555+ int err;
9556+ aufs_bindex_t bstart, bindex;
9557+
9558+ sb = a->dentry->d_sb;
5527c038 9559+ if (d_really_is_negative(a->dentry))
b912730e 9560+ goto out;
5527c038 9561+ /* no dir->i_mutex lock */
b95c5147
AM
9562+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9563+
5527c038 9564+ dir = d_inode(a->dentry);
b912730e
AM
9565+ bstart = au_ibstart(dir);
9566+ bindex = au_br_index(sb, a->brid);
9567+ if (bindex < bstart)
9568+ goto out_unlock;
9569+
9570+ br = au_sbr(sb, bindex);
9571+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9572+ if (!h_path.dentry)
9573+ goto out_unlock;
9574+ h_path.mnt = au_br_mnt(br);
9575+ au_dtime_store(&dt, a->dentry, &h_path);
9576+
9577+ br = au_sbr(sb, bstart);
9578+ if (!au_br_writable(br->br_perm))
9579+ goto out_unlock;
9580+ h_path.dentry = au_h_dptr(a->dentry, bstart);
9581+ h_path.mnt = au_br_mnt(br);
9582+ err = vfsub_mnt_want_write(h_path.mnt);
9583+ if (err)
9584+ goto out_unlock;
9585+ hdir = au_hi(dir, bstart);
9586+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
9587+ h_dir = au_h_iptr(dir, bstart);
9588+ if (h_dir->i_nlink
9589+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9590+ dt.dt_h_path = h_path;
9591+ au_dtime_revert(&dt);
9592+ }
9593+ au_hn_imtx_unlock(hdir);
9594+ vfsub_mnt_drop_write(h_path.mnt);
9595+ au_cpup_attr_timesizes(dir);
9596+
9597+out_unlock:
9598+ aufs_read_unlock(a->dentry, AuLock_DW);
9599+out:
9600+ dput(a->dentry);
9601+ au_nwt_done(&au_sbi(sb)->si_nowait);
9602+ kfree(arg);
9603+}
9604+
9605+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9606+{
9607+ int perm, wkq_err;
9608+ aufs_bindex_t bstart;
9609+ struct au_dir_ts_arg *arg;
9610+ struct dentry *dentry;
9611+ struct super_block *sb;
9612+
9613+ IMustLock(dir);
9614+
9615+ dentry = d_find_any_alias(dir);
9616+ AuDebugOn(!dentry);
9617+ sb = dentry->d_sb;
9618+ bstart = au_ibstart(dir);
9619+ if (bstart == bindex) {
9620+ au_cpup_attr_timesizes(dir);
9621+ goto out;
9622+ }
9623+
9624+ perm = au_sbr_perm(sb, bstart);
9625+ if (!au_br_writable(perm))
9626+ goto out;
9627+
9628+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9629+ if (!arg)
9630+ goto out;
9631+
9632+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9633+ arg->brid = au_sbr_id(sb, bindex);
9634+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9635+ if (unlikely(wkq_err)) {
9636+ pr_err("wkq %d\n", wkq_err);
9637+ dput(dentry);
9638+ kfree(arg);
9639+ }
9640+
9641+out:
9642+ dput(dentry);
9643+}
9644+
1facf9fc 9645+/* ---------------------------------------------------------------------- */
9646+
9647+static int reopen_dir(struct file *file)
9648+{
9649+ int err;
9650+ unsigned int flags;
9651+ aufs_bindex_t bindex, btail, bstart;
9652+ struct dentry *dentry, *h_dentry;
9653+ struct file *h_file;
9654+
9655+ /* open all lower dirs */
2000de60 9656+ dentry = file->f_path.dentry;
1facf9fc 9657+ bstart = au_dbstart(dentry);
9658+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
9659+ au_set_h_fptr(file, bindex, NULL);
9660+ au_set_fbstart(file, bstart);
9661+
9662+ btail = au_dbtaildir(dentry);
4a4d8108 9663+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 9664+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 9665+ au_set_fbend_dir(file, btail);
1facf9fc 9666+
4a4d8108 9667+ flags = vfsub_file_flags(file);
1facf9fc 9668+ for (bindex = bstart; bindex <= btail; bindex++) {
9669+ h_dentry = au_h_dptr(dentry, bindex);
9670+ if (!h_dentry)
9671+ continue;
4a4d8108 9672+ h_file = au_hf_dir(file, bindex);
1facf9fc 9673+ if (h_file)
9674+ continue;
9675+
392086de 9676+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9677+ err = PTR_ERR(h_file);
9678+ if (IS_ERR(h_file))
9679+ goto out; /* close all? */
9680+ au_set_h_fptr(file, bindex, h_file);
9681+ }
9682+ au_update_figen(file);
9683+ /* todo: necessary? */
9684+ /* file->f_ra = h_file->f_ra; */
9685+ err = 0;
9686+
4f0767ce 9687+out:
1facf9fc 9688+ return err;
9689+}
9690+
b912730e 9691+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9692+{
9693+ int err;
9694+ aufs_bindex_t bindex, btail;
9695+ struct dentry *dentry, *h_dentry;
8cdd5066 9696+ struct vfsmount *mnt;
1facf9fc 9697+
1308ab2a 9698+ FiMustWriteLock(file);
b912730e 9699+ AuDebugOn(h_file);
1308ab2a 9700+
523b37e3 9701+ err = 0;
8cdd5066 9702+ mnt = file->f_path.mnt;
2000de60 9703+ dentry = file->f_path.dentry;
5527c038 9704+ file->f_version = d_inode(dentry)->i_version;
1facf9fc 9705+ bindex = au_dbstart(dentry);
9706+ au_set_fbstart(file, bindex);
9707+ btail = au_dbtaildir(dentry);
4a4d8108 9708+ au_set_fbend_dir(file, btail);
1facf9fc 9709+ for (; !err && bindex <= btail; bindex++) {
9710+ h_dentry = au_h_dptr(dentry, bindex);
9711+ if (!h_dentry)
9712+ continue;
9713+
8cdd5066
JR
9714+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
9715+ if (unlikely(err))
9716+ break;
392086de 9717+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9718+ if (IS_ERR(h_file)) {
9719+ err = PTR_ERR(h_file);
9720+ break;
9721+ }
9722+ au_set_h_fptr(file, bindex, h_file);
9723+ }
9724+ au_update_figen(file);
9725+ /* todo: necessary? */
9726+ /* file->f_ra = h_file->f_ra; */
9727+ if (!err)
9728+ return 0; /* success */
9729+
9730+ /* close all */
9731+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
9732+ au_set_h_fptr(file, bindex, NULL);
9733+ au_set_fbstart(file, -1);
4a4d8108
AM
9734+ au_set_fbend_dir(file, -1);
9735+
1facf9fc 9736+ return err;
9737+}
9738+
9739+static int aufs_open_dir(struct inode *inode __maybe_unused,
9740+ struct file *file)
9741+{
4a4d8108
AM
9742+ int err;
9743+ struct super_block *sb;
9744+ struct au_fidir *fidir;
9745+
9746+ err = -ENOMEM;
2000de60 9747+ sb = file->f_path.dentry->d_sb;
4a4d8108 9748+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9749+ fidir = au_fidir_alloc(sb);
4a4d8108 9750+ if (fidir) {
b912730e
AM
9751+ struct au_do_open_args args = {
9752+ .open = do_open_dir,
9753+ .fidir = fidir
9754+ };
9755+ err = au_do_open(file, &args);
4a4d8108
AM
9756+ if (unlikely(err))
9757+ kfree(fidir);
9758+ }
9759+ si_read_unlock(sb);
9760+ return err;
1facf9fc 9761+}
9762+
9763+static int aufs_release_dir(struct inode *inode __maybe_unused,
9764+ struct file *file)
9765+{
9766+ struct au_vdir *vdir_cache;
4a4d8108
AM
9767+ struct au_finfo *finfo;
9768+ struct au_fidir *fidir;
9769+ aufs_bindex_t bindex, bend;
1facf9fc 9770+
4a4d8108
AM
9771+ finfo = au_fi(file);
9772+ fidir = finfo->fi_hdir;
9773+ if (fidir) {
076b876e 9774+ au_sphl_del(&finfo->fi_hlist,
2000de60 9775+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108
AM
9776+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9777+ if (vdir_cache)
9778+ au_vdir_free(vdir_cache);
9779+
9780+ bindex = finfo->fi_btop;
9781+ if (bindex >= 0) {
9782+ /*
9783+ * calls fput() instead of filp_close(),
9784+ * since no dnotify or lock for the lower file.
9785+ */
9786+ bend = fidir->fd_bbot;
9787+ for (; bindex <= bend; bindex++)
9788+ au_set_h_fptr(file, bindex, NULL);
9789+ }
9790+ kfree(fidir);
9791+ finfo->fi_hdir = NULL;
1facf9fc 9792+ }
1facf9fc 9793+ au_finfo_fin(file);
1facf9fc 9794+ return 0;
9795+}
9796+
9797+/* ---------------------------------------------------------------------- */
9798+
4a4d8108
AM
9799+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9800+{
9801+ int err;
9802+ aufs_bindex_t bindex, bend;
9803+ struct file *h_file;
9804+
9805+ err = 0;
9806+ bend = au_fbend_dir(file);
9807+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
9808+ h_file = au_hf_dir(file, bindex);
9809+ if (h_file)
9810+ err = vfsub_flush(h_file, id);
9811+ }
9812+ return err;
9813+}
9814+
9815+static int aufs_flush_dir(struct file *file, fl_owner_t id)
9816+{
9817+ return au_do_flush(file, id, au_do_flush_dir);
9818+}
9819+
9820+/* ---------------------------------------------------------------------- */
9821+
1facf9fc 9822+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
9823+{
9824+ int err;
9825+ aufs_bindex_t bend, bindex;
9826+ struct inode *inode;
9827+ struct super_block *sb;
9828+
9829+ err = 0;
9830+ sb = dentry->d_sb;
5527c038 9831+ inode = d_inode(dentry);
1facf9fc 9832+ IMustLock(inode);
9833+ bend = au_dbend(dentry);
9834+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
9835+ struct path h_path;
1facf9fc 9836+
9837+ if (au_test_ro(sb, bindex, inode))
9838+ continue;
9839+ h_path.dentry = au_h_dptr(dentry, bindex);
9840+ if (!h_path.dentry)
9841+ continue;
1facf9fc 9842+
1facf9fc 9843+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 9844+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 9845+ }
9846+
9847+ return err;
9848+}
9849+
9850+static int au_do_fsync_dir(struct file *file, int datasync)
9851+{
9852+ int err;
9853+ aufs_bindex_t bend, bindex;
9854+ struct file *h_file;
9855+ struct super_block *sb;
9856+ struct inode *inode;
1facf9fc 9857+
9858+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9859+ if (unlikely(err))
9860+ goto out;
9861+
c06a8ce3 9862+ inode = file_inode(file);
b912730e 9863+ sb = inode->i_sb;
4a4d8108 9864+ bend = au_fbend_dir(file);
1facf9fc 9865+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 9866+ h_file = au_hf_dir(file, bindex);
1facf9fc 9867+ if (!h_file || au_test_ro(sb, bindex, inode))
9868+ continue;
9869+
53392da6 9870+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 9871+ }
9872+
4f0767ce 9873+out:
1facf9fc 9874+ return err;
9875+}
9876+
9877+/*
9878+ * @file may be NULL
9879+ */
1e00d052
AM
9880+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
9881+ int datasync)
1facf9fc 9882+{
9883+ int err;
b752ccd1 9884+ struct dentry *dentry;
5527c038 9885+ struct inode *inode;
1facf9fc 9886+ struct super_block *sb;
1e00d052 9887+ struct mutex *mtx;
1facf9fc 9888+
9889+ err = 0;
2000de60 9890+ dentry = file->f_path.dentry;
5527c038
JR
9891+ inode = d_inode(dentry);
9892+ mtx = &inode->i_mutex;
1e00d052 9893+ mutex_lock(mtx);
1facf9fc 9894+ sb = dentry->d_sb;
9895+ si_noflush_read_lock(sb);
9896+ if (file)
9897+ err = au_do_fsync_dir(file, datasync);
9898+ else {
9899+ di_write_lock_child(dentry);
9900+ err = au_do_fsync_dir_no_file(dentry, datasync);
9901+ }
5527c038 9902+ au_cpup_attr_timesizes(inode);
1facf9fc 9903+ di_write_unlock(dentry);
9904+ if (file)
9905+ fi_write_unlock(file);
9906+
9907+ si_read_unlock(sb);
1e00d052 9908+ mutex_unlock(mtx);
1facf9fc 9909+ return err;
9910+}
9911+
9912+/* ---------------------------------------------------------------------- */
9913+
392086de 9914+static int aufs_iterate(struct file *file, struct dir_context *ctx)
1facf9fc 9915+{
9916+ int err;
9917+ struct dentry *dentry;
9dbd164d 9918+ struct inode *inode, *h_inode;
1facf9fc 9919+ struct super_block *sb;
9920+
523b37e3 9921+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 9922+
2000de60 9923+ dentry = file->f_path.dentry;
5527c038 9924+ inode = d_inode(dentry);
1facf9fc 9925+ IMustLock(inode);
9926+
9927+ sb = dentry->d_sb;
9928+ si_read_lock(sb, AuLock_FLUSH);
9929+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9930+ if (unlikely(err))
9931+ goto out;
027c5e7a
AM
9932+ err = au_alive_dir(dentry);
9933+ if (!err)
9934+ err = au_vdir_init(file);
1facf9fc 9935+ di_downgrade_lock(dentry, AuLock_IR);
9936+ if (unlikely(err))
9937+ goto out_unlock;
9938+
9dbd164d 9939+ h_inode = au_h_iptr(inode, au_ibstart(inode));
b752ccd1 9940+ if (!au_test_nfsd()) {
392086de 9941+ err = au_vdir_fill_de(file, ctx);
9dbd164d 9942+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 9943+ } else {
9944+ /*
9945+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
9946+ * encode_fh() and others.
9947+ */
9dbd164d 9948+ atomic_inc(&h_inode->i_count);
1facf9fc 9949+ di_read_unlock(dentry, AuLock_IR);
9950+ si_read_unlock(sb);
392086de 9951+ err = au_vdir_fill_de(file, ctx);
1facf9fc 9952+ fsstack_copy_attr_atime(inode, h_inode);
9953+ fi_write_unlock(file);
9dbd164d 9954+ iput(h_inode);
1facf9fc 9955+
9956+ AuTraceErr(err);
9957+ return err;
9958+ }
9959+
4f0767ce 9960+out_unlock:
1facf9fc 9961+ di_read_unlock(dentry, AuLock_IR);
9962+ fi_write_unlock(file);
4f0767ce 9963+out:
1facf9fc 9964+ si_read_unlock(sb);
9965+ return err;
9966+}
9967+
9968+/* ---------------------------------------------------------------------- */
9969+
9970+#define AuTestEmpty_WHONLY 1
dece6358
AM
9971+#define AuTestEmpty_CALLED (1 << 1)
9972+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 9973+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
9974+#define au_fset_testempty(flags, name) \
9975+ do { (flags) |= AuTestEmpty_##name; } while (0)
9976+#define au_fclr_testempty(flags, name) \
9977+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 9978+
dece6358
AM
9979+#ifndef CONFIG_AUFS_SHWH
9980+#undef AuTestEmpty_SHWH
9981+#define AuTestEmpty_SHWH 0
9982+#endif
9983+
1facf9fc 9984+struct test_empty_arg {
392086de 9985+ struct dir_context ctx;
1308ab2a 9986+ struct au_nhash *whlist;
1facf9fc 9987+ unsigned int flags;
9988+ int err;
9989+ aufs_bindex_t bindex;
9990+};
9991+
392086de
AM
9992+static int test_empty_cb(struct dir_context *ctx, const char *__name,
9993+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 9994+ unsigned int d_type)
1facf9fc 9995+{
392086de
AM
9996+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
9997+ ctx);
1facf9fc 9998+ char *name = (void *)__name;
9999+
10000+ arg->err = 0;
10001+ au_fset_testempty(arg->flags, CALLED);
10002+ /* smp_mb(); */
10003+ if (name[0] == '.'
10004+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
10005+ goto out; /* success */
10006+
10007+ if (namelen <= AUFS_WH_PFX_LEN
10008+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
10009+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 10010+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10011+ arg->err = -ENOTEMPTY;
10012+ goto out;
10013+ }
10014+
10015+ name += AUFS_WH_PFX_LEN;
10016+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 10017+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10018+ arg->err = au_nhash_append_wh
1308ab2a 10019+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 10020+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 10021+
4f0767ce 10022+out:
1facf9fc 10023+ /* smp_mb(); */
10024+ AuTraceErr(arg->err);
10025+ return arg->err;
10026+}
10027+
10028+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10029+{
10030+ int err;
10031+ struct file *h_file;
10032+
10033+ h_file = au_h_open(dentry, arg->bindex,
10034+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 10035+ /*file*/NULL, /*force_wr*/0);
1facf9fc 10036+ err = PTR_ERR(h_file);
10037+ if (IS_ERR(h_file))
10038+ goto out;
10039+
10040+ err = 0;
10041+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 10042+ && !file_inode(h_file)->i_nlink)
1facf9fc 10043+ goto out_put;
10044+
10045+ do {
10046+ arg->err = 0;
10047+ au_fclr_testempty(arg->flags, CALLED);
10048+ /* smp_mb(); */
392086de 10049+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 10050+ if (err >= 0)
10051+ err = arg->err;
10052+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
10053+
4f0767ce 10054+out_put:
1facf9fc 10055+ fput(h_file);
10056+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 10057+out:
1facf9fc 10058+ return err;
10059+}
10060+
10061+struct do_test_empty_args {
10062+ int *errp;
10063+ struct dentry *dentry;
10064+ struct test_empty_arg *arg;
10065+};
10066+
10067+static void call_do_test_empty(void *args)
10068+{
10069+ struct do_test_empty_args *a = args;
10070+ *a->errp = do_test_empty(a->dentry, a->arg);
10071+}
10072+
10073+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10074+{
10075+ int err, wkq_err;
10076+ struct dentry *h_dentry;
10077+ struct inode *h_inode;
10078+
10079+ h_dentry = au_h_dptr(dentry, arg->bindex);
5527c038 10080+ h_inode = d_inode(h_dentry);
53392da6 10081+ /* todo: i_mode changes anytime? */
1facf9fc 10082+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10083+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
10084+ mutex_unlock(&h_inode->i_mutex);
10085+ if (!err)
10086+ err = do_test_empty(dentry, arg);
10087+ else {
10088+ struct do_test_empty_args args = {
10089+ .errp = &err,
10090+ .dentry = dentry,
10091+ .arg = arg
10092+ };
10093+ unsigned int flags = arg->flags;
10094+
10095+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10096+ if (unlikely(wkq_err))
10097+ err = wkq_err;
10098+ arg->flags = flags;
10099+ }
10100+
10101+ return err;
10102+}
10103+
10104+int au_test_empty_lower(struct dentry *dentry)
10105+{
10106+ int err;
1308ab2a 10107+ unsigned int rdhash;
1facf9fc 10108+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 10109+ struct au_nhash whlist;
392086de
AM
10110+ struct test_empty_arg arg = {
10111+ .ctx = {
2000de60 10112+ .actor = test_empty_cb
392086de
AM
10113+ }
10114+ };
076b876e 10115+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 10116+
dece6358
AM
10117+ SiMustAnyLock(dentry->d_sb);
10118+
1308ab2a 10119+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10120+ if (!rdhash)
10121+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10122+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 10123+ if (unlikely(err))
1facf9fc 10124+ goto out;
10125+
1facf9fc 10126+ arg.flags = 0;
1308ab2a 10127+ arg.whlist = &whlist;
10128+ bstart = au_dbstart(dentry);
dece6358
AM
10129+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10130+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
10131+ test_empty = do_test_empty;
10132+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10133+ test_empty = sio_test_empty;
1facf9fc 10134+ arg.bindex = bstart;
076b876e 10135+ err = test_empty(dentry, &arg);
1facf9fc 10136+ if (unlikely(err))
10137+ goto out_whlist;
10138+
10139+ au_fset_testempty(arg.flags, WHONLY);
10140+ btail = au_dbtaildir(dentry);
10141+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
10142+ struct dentry *h_dentry;
10143+
10144+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10145+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10146+ arg.bindex = bindex;
076b876e 10147+ err = test_empty(dentry, &arg);
1facf9fc 10148+ }
10149+ }
10150+
4f0767ce 10151+out_whlist:
1308ab2a 10152+ au_nhash_wh_free(&whlist);
4f0767ce 10153+out:
1facf9fc 10154+ return err;
10155+}
10156+
10157+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10158+{
10159+ int err;
392086de
AM
10160+ struct test_empty_arg arg = {
10161+ .ctx = {
2000de60 10162+ .actor = test_empty_cb
392086de
AM
10163+ }
10164+ };
1facf9fc 10165+ aufs_bindex_t bindex, btail;
10166+
10167+ err = 0;
1308ab2a 10168+ arg.whlist = whlist;
1facf9fc 10169+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10170+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10171+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10172+ btail = au_dbtaildir(dentry);
10173+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
10174+ struct dentry *h_dentry;
10175+
10176+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 10177+ if (h_dentry && d_is_positive(h_dentry)) {
1facf9fc 10178+ arg.bindex = bindex;
10179+ err = sio_test_empty(dentry, &arg);
10180+ }
10181+ }
10182+
10183+ return err;
10184+}
10185+
10186+/* ---------------------------------------------------------------------- */
10187+
10188+const struct file_operations aufs_dir_fop = {
4a4d8108 10189+ .owner = THIS_MODULE,
027c5e7a 10190+ .llseek = default_llseek,
1facf9fc 10191+ .read = generic_read_dir,
392086de 10192+ .iterate = aufs_iterate,
1facf9fc 10193+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10194+#ifdef CONFIG_COMPAT
10195+ .compat_ioctl = aufs_compat_ioctl_dir,
10196+#endif
1facf9fc 10197+ .open = aufs_open_dir,
10198+ .release = aufs_release_dir,
4a4d8108 10199+ .flush = aufs_flush_dir,
1facf9fc 10200+ .fsync = aufs_fsync_dir
10201+};
7f207e10
AM
10202diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10203--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 10204+++ linux/fs/aufs/dir.h 2016-02-28 11:26:32.569971135 +0100
b912730e 10205@@ -0,0 +1,131 @@
1facf9fc 10206+/*
8cdd5066 10207+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 10208+ *
10209+ * This program, aufs is free software; you can redistribute it and/or modify
10210+ * it under the terms of the GNU General Public License as published by
10211+ * the Free Software Foundation; either version 2 of the License, or
10212+ * (at your option) any later version.
dece6358
AM
10213+ *
10214+ * This program is distributed in the hope that it will be useful,
10215+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10216+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10217+ * GNU General Public License for more details.
10218+ *
10219+ * You should have received a copy of the GNU General Public License
523b37e3 10220+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10221+ */
10222+
10223+/*
10224+ * directory operations
10225+ */
10226+
10227+#ifndef __AUFS_DIR_H__
10228+#define __AUFS_DIR_H__
10229+
10230+#ifdef __KERNEL__
10231+
10232+#include <linux/fs.h>
1facf9fc 10233+
10234+/* ---------------------------------------------------------------------- */
10235+
10236+/* need to be faster and smaller */
10237+
10238+struct au_nhash {
dece6358
AM
10239+ unsigned int nh_num;
10240+ struct hlist_head *nh_head;
1facf9fc 10241+};
10242+
10243+struct au_vdir_destr {
10244+ unsigned char len;
10245+ unsigned char name[0];
10246+} __packed;
10247+
10248+struct au_vdir_dehstr {
10249+ struct hlist_node hash;
10250+ struct au_vdir_destr *str;
4a4d8108 10251+} ____cacheline_aligned_in_smp;
1facf9fc 10252+
10253+struct au_vdir_de {
10254+ ino_t de_ino;
10255+ unsigned char de_type;
10256+ /* caution: packed */
10257+ struct au_vdir_destr de_str;
10258+} __packed;
10259+
10260+struct au_vdir_wh {
10261+ struct hlist_node wh_hash;
dece6358
AM
10262+#ifdef CONFIG_AUFS_SHWH
10263+ ino_t wh_ino;
1facf9fc 10264+ aufs_bindex_t wh_bindex;
dece6358
AM
10265+ unsigned char wh_type;
10266+#else
10267+ aufs_bindex_t wh_bindex;
10268+#endif
10269+ /* caution: packed */
1facf9fc 10270+ struct au_vdir_destr wh_str;
10271+} __packed;
10272+
10273+union au_vdir_deblk_p {
10274+ unsigned char *deblk;
10275+ struct au_vdir_de *de;
10276+};
10277+
10278+struct au_vdir {
10279+ unsigned char **vd_deblk;
10280+ unsigned long vd_nblk;
1facf9fc 10281+ struct {
10282+ unsigned long ul;
10283+ union au_vdir_deblk_p p;
10284+ } vd_last;
10285+
10286+ unsigned long vd_version;
dece6358 10287+ unsigned int vd_deblk_sz;
1facf9fc 10288+ unsigned long vd_jiffy;
4a4d8108 10289+} ____cacheline_aligned_in_smp;
1facf9fc 10290+
10291+/* ---------------------------------------------------------------------- */
10292+
10293+/* dir.c */
10294+extern const struct file_operations aufs_dir_fop;
10295+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10296+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10297+loff_t au_dir_size(struct file *file, struct dentry *dentry);
b912730e 10298+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10299+int au_test_empty_lower(struct dentry *dentry);
10300+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10301+
10302+/* vdir.c */
1308ab2a 10303+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10304+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10305+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10306+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10307+ int limit);
dece6358
AM
10308+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10309+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10310+ unsigned int d_type, aufs_bindex_t bindex,
10311+ unsigned char shwh);
1facf9fc 10312+void au_vdir_free(struct au_vdir *vdir);
10313+int au_vdir_init(struct file *file);
392086de 10314+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10315+
10316+/* ioctl.c */
10317+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10318+
1308ab2a 10319+#ifdef CONFIG_AUFS_RDU
10320+/* rdu.c */
10321+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10322+#ifdef CONFIG_COMPAT
10323+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10324+ unsigned long arg);
10325+#endif
1308ab2a 10326+#else
c1595e42
JR
10327+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10328+ unsigned int cmd, unsigned long arg)
b752ccd1 10329+#ifdef CONFIG_COMPAT
c1595e42
JR
10330+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10331+ unsigned int cmd, unsigned long arg)
b752ccd1 10332+#endif
1308ab2a 10333+#endif
10334+
1facf9fc 10335+#endif /* __KERNEL__ */
10336+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
10337diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10338--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 10339+++ linux/fs/aufs/dynop.c 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 10340@@ -0,0 +1,369 @@
1facf9fc 10341+/*
8cdd5066 10342+ * Copyright (C) 2010-2016 Junjiro R. Okajima
1facf9fc 10343+ *
10344+ * This program, aufs is free software; you can redistribute it and/or modify
10345+ * it under the terms of the GNU General Public License as published by
10346+ * the Free Software Foundation; either version 2 of the License, or
10347+ * (at your option) any later version.
dece6358
AM
10348+ *
10349+ * This program is distributed in the hope that it will be useful,
10350+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10351+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10352+ * GNU General Public License for more details.
10353+ *
10354+ * You should have received a copy of the GNU General Public License
523b37e3 10355+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10356+ */
10357+
10358+/*
4a4d8108 10359+ * dynamically customizable operations for regular files
1facf9fc 10360+ */
10361+
1facf9fc 10362+#include "aufs.h"
10363+
4a4d8108 10364+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 10365+
4a4d8108
AM
10366+/*
10367+ * How large will these lists be?
10368+ * Usually just a few elements, 20-30 at most for each, I guess.
10369+ */
10370+static struct au_splhead dynop[AuDyLast];
10371+
10372+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 10373+{
4a4d8108
AM
10374+ struct au_dykey *key, *tmp;
10375+ struct list_head *head;
1facf9fc 10376+
4a4d8108
AM
10377+ key = NULL;
10378+ head = &spl->head;
10379+ rcu_read_lock();
10380+ list_for_each_entry_rcu(tmp, head, dk_list)
10381+ if (tmp->dk_op.dy_hop == h_op) {
10382+ key = tmp;
10383+ kref_get(&key->dk_kref);
10384+ break;
10385+ }
10386+ rcu_read_unlock();
10387+
10388+ return key;
1facf9fc 10389+}
10390+
4a4d8108 10391+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 10392+{
4a4d8108
AM
10393+ struct au_dykey **k, *found;
10394+ const void *h_op = key->dk_op.dy_hop;
10395+ int i;
1facf9fc 10396+
4a4d8108
AM
10397+ found = NULL;
10398+ k = br->br_dykey;
10399+ for (i = 0; i < AuBrDynOp; i++)
10400+ if (k[i]) {
10401+ if (k[i]->dk_op.dy_hop == h_op) {
10402+ found = k[i];
10403+ break;
10404+ }
10405+ } else
10406+ break;
10407+ if (!found) {
10408+ spin_lock(&br->br_dykey_lock);
10409+ for (; i < AuBrDynOp; i++)
10410+ if (k[i]) {
10411+ if (k[i]->dk_op.dy_hop == h_op) {
10412+ found = k[i];
10413+ break;
10414+ }
10415+ } else {
10416+ k[i] = key;
10417+ break;
10418+ }
10419+ spin_unlock(&br->br_dykey_lock);
10420+ BUG_ON(i == AuBrDynOp); /* expand the array */
10421+ }
10422+
10423+ return found;
1facf9fc 10424+}
10425+
4a4d8108
AM
10426+/* kref_get() if @key is already added */
10427+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
10428+{
10429+ struct au_dykey *tmp, *found;
10430+ struct list_head *head;
10431+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 10432+
4a4d8108
AM
10433+ found = NULL;
10434+ head = &spl->head;
10435+ spin_lock(&spl->spin);
10436+ list_for_each_entry(tmp, head, dk_list)
10437+ if (tmp->dk_op.dy_hop == h_op) {
10438+ kref_get(&tmp->dk_kref);
10439+ found = tmp;
10440+ break;
10441+ }
10442+ if (!found)
10443+ list_add_rcu(&key->dk_list, head);
10444+ spin_unlock(&spl->spin);
1facf9fc 10445+
4a4d8108
AM
10446+ if (!found)
10447+ DyPrSym(key);
10448+ return found;
10449+}
10450+
10451+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 10452+{
4a4d8108
AM
10453+ struct au_dykey *key;
10454+
10455+ key = container_of(rcu, struct au_dykey, dk_rcu);
10456+ DyPrSym(key);
10457+ kfree(key);
1facf9fc 10458+}
10459+
4a4d8108
AM
10460+static void dy_free(struct kref *kref)
10461+{
10462+ struct au_dykey *key;
10463+ struct au_splhead *spl;
1facf9fc 10464+
4a4d8108
AM
10465+ key = container_of(kref, struct au_dykey, dk_kref);
10466+ spl = dynop + key->dk_op.dy_type;
10467+ au_spl_del_rcu(&key->dk_list, spl);
10468+ call_rcu(&key->dk_rcu, dy_free_rcu);
10469+}
10470+
10471+void au_dy_put(struct au_dykey *key)
1facf9fc 10472+{
4a4d8108
AM
10473+ kref_put(&key->dk_kref, dy_free);
10474+}
1facf9fc 10475+
4a4d8108
AM
10476+/* ---------------------------------------------------------------------- */
10477+
10478+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10479+
10480+#ifdef CONFIG_AUFS_DEBUG
10481+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 10482+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
10483+#else
10484+#define DyDbgDeclare(cnt) do {} while (0)
10485+#define DyDbgInc(cnt) do {} while (0)
10486+#endif
10487+
10488+#define DySet(func, dst, src, h_op, h_sb) do { \
10489+ DyDbgInc(cnt); \
10490+ if (h_op->func) { \
10491+ if (src.func) \
10492+ dst.func = src.func; \
10493+ else \
10494+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10495+ } \
10496+} while (0)
10497+
10498+#define DySetForce(func, dst, src) do { \
10499+ AuDebugOn(!src.func); \
10500+ DyDbgInc(cnt); \
10501+ dst.func = src.func; \
10502+} while (0)
10503+
10504+#define DySetAop(func) \
10505+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10506+#define DySetAopForce(func) \
10507+ DySetForce(func, dyaop->da_op, aufs_aop)
10508+
10509+static void dy_aop(struct au_dykey *key, const void *h_op,
10510+ struct super_block *h_sb __maybe_unused)
10511+{
10512+ struct au_dyaop *dyaop = (void *)key;
10513+ const struct address_space_operations *h_aop = h_op;
10514+ DyDbgDeclare(cnt);
10515+
10516+ AuDbg("%s\n", au_sbtype(h_sb));
10517+
10518+ DySetAop(writepage);
10519+ DySetAopForce(readpage); /* force */
4a4d8108
AM
10520+ DySetAop(writepages);
10521+ DySetAop(set_page_dirty);
10522+ DySetAop(readpages);
10523+ DySetAop(write_begin);
10524+ DySetAop(write_end);
10525+ DySetAop(bmap);
10526+ DySetAop(invalidatepage);
10527+ DySetAop(releasepage);
027c5e7a 10528+ DySetAop(freepage);
7e9cd9fe 10529+ /* this one will be changed according to an aufs mount option */
4a4d8108 10530+ DySetAop(direct_IO);
4a4d8108
AM
10531+ DySetAop(migratepage);
10532+ DySetAop(launder_page);
10533+ DySetAop(is_partially_uptodate);
392086de 10534+ DySetAop(is_dirty_writeback);
4a4d8108 10535+ DySetAop(error_remove_page);
b4510431
AM
10536+ DySetAop(swap_activate);
10537+ DySetAop(swap_deactivate);
4a4d8108
AM
10538+
10539+ DyDbgSize(cnt, *h_aop);
4a4d8108
AM
10540+}
10541+
4a4d8108
AM
10542+/* ---------------------------------------------------------------------- */
10543+
10544+static void dy_bug(struct kref *kref)
10545+{
10546+ BUG();
10547+}
10548+
10549+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10550+{
10551+ struct au_dykey *key, *old;
10552+ struct au_splhead *spl;
b752ccd1 10553+ struct op {
4a4d8108 10554+ unsigned int sz;
b752ccd1
AM
10555+ void (*set)(struct au_dykey *key, const void *h_op,
10556+ struct super_block *h_sb __maybe_unused);
10557+ };
10558+ static const struct op a[] = {
4a4d8108
AM
10559+ [AuDy_AOP] = {
10560+ .sz = sizeof(struct au_dyaop),
b752ccd1 10561+ .set = dy_aop
4a4d8108 10562+ }
b752ccd1
AM
10563+ };
10564+ const struct op *p;
4a4d8108
AM
10565+
10566+ spl = dynop + op->dy_type;
10567+ key = dy_gfind_get(spl, op->dy_hop);
10568+ if (key)
10569+ goto out_add; /* success */
10570+
10571+ p = a + op->dy_type;
10572+ key = kzalloc(p->sz, GFP_NOFS);
10573+ if (unlikely(!key)) {
10574+ key = ERR_PTR(-ENOMEM);
10575+ goto out;
10576+ }
10577+
10578+ key->dk_op.dy_hop = op->dy_hop;
10579+ kref_init(&key->dk_kref);
86dc4139 10580+ p->set(key, op->dy_hop, au_br_sb(br));
4a4d8108
AM
10581+ old = dy_gadd(spl, key);
10582+ if (old) {
10583+ kfree(key);
10584+ key = old;
10585+ }
10586+
10587+out_add:
10588+ old = dy_bradd(br, key);
10589+ if (old)
10590+ /* its ref-count should never be zero here */
10591+ kref_put(&key->dk_kref, dy_bug);
10592+out:
10593+ return key;
10594+}
10595+
10596+/* ---------------------------------------------------------------------- */
10597+/*
10598+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
c1595e42 10599+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
4a4d8108
AM
10600+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10601+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10602+ * See the aufs manual in detail.
4a4d8108
AM
10603+ */
10604+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10605+{
7e9cd9fe 10606+ if (!do_dx)
4a4d8108 10607+ dyaop->da_op.direct_IO = NULL;
7e9cd9fe 10608+ else
4a4d8108 10609+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
4a4d8108
AM
10610+}
10611+
10612+static struct au_dyaop *dy_aget(struct au_branch *br,
10613+ const struct address_space_operations *h_aop,
10614+ int do_dx)
10615+{
10616+ struct au_dyaop *dyaop;
10617+ struct au_dynop op;
10618+
10619+ op.dy_type = AuDy_AOP;
10620+ op.dy_haop = h_aop;
10621+ dyaop = (void *)dy_get(&op, br);
10622+ if (IS_ERR(dyaop))
10623+ goto out;
10624+ dy_adx(dyaop, do_dx);
10625+
10626+out:
10627+ return dyaop;
10628+}
10629+
10630+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10631+ struct inode *h_inode)
10632+{
10633+ int err, do_dx;
10634+ struct super_block *sb;
10635+ struct au_branch *br;
10636+ struct au_dyaop *dyaop;
10637+
10638+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10639+ IiMustWriteLock(inode);
10640+
10641+ sb = inode->i_sb;
10642+ br = au_sbr(sb, bindex);
10643+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10644+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10645+ err = PTR_ERR(dyaop);
10646+ if (IS_ERR(dyaop))
10647+ /* unnecessary to call dy_fput() */
10648+ goto out;
10649+
10650+ err = 0;
10651+ inode->i_mapping->a_ops = &dyaop->da_op;
10652+
10653+out:
10654+ return err;
10655+}
10656+
b752ccd1
AM
10657+/*
10658+ * Is it safe to replace a_ops during the inode/file is in operation?
10659+ * Yes, I hope so.
10660+ */
10661+int au_dy_irefresh(struct inode *inode)
10662+{
10663+ int err;
10664+ aufs_bindex_t bstart;
10665+ struct inode *h_inode;
10666+
10667+ err = 0;
10668+ if (S_ISREG(inode->i_mode)) {
10669+ bstart = au_ibstart(inode);
10670+ h_inode = au_h_iptr(inode, bstart);
10671+ err = au_dy_iaop(inode, bstart, h_inode);
10672+ }
10673+ return err;
10674+}
10675+
4a4d8108
AM
10676+void au_dy_arefresh(int do_dx)
10677+{
10678+ struct au_splhead *spl;
10679+ struct list_head *head;
10680+ struct au_dykey *key;
10681+
10682+ spl = dynop + AuDy_AOP;
10683+ head = &spl->head;
10684+ spin_lock(&spl->spin);
10685+ list_for_each_entry(key, head, dk_list)
10686+ dy_adx((void *)key, do_dx);
10687+ spin_unlock(&spl->spin);
10688+}
10689+
4a4d8108
AM
10690+/* ---------------------------------------------------------------------- */
10691+
10692+void __init au_dy_init(void)
10693+{
10694+ int i;
10695+
10696+ /* make sure that 'struct au_dykey *' can be any type */
10697+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
10698+
10699+ for (i = 0; i < AuDyLast; i++)
10700+ au_spl_init(dynop + i);
10701+}
10702+
10703+void au_dy_fin(void)
10704+{
10705+ int i;
10706+
10707+ for (i = 0; i < AuDyLast; i++)
10708+ WARN_ON(!list_empty(&dynop[i].head));
10709+}
7f207e10
AM
10710diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10711--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 10712+++ linux/fs/aufs/dynop.h 2016-02-28 11:26:32.569971135 +0100
7e9cd9fe 10713@@ -0,0 +1,74 @@
4a4d8108 10714+/*
8cdd5066 10715+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
10716+ *
10717+ * This program, aufs is free software; you can redistribute it and/or modify
10718+ * it under the terms of the GNU General Public License as published by
10719+ * the Free Software Foundation; either version 2 of the License, or
10720+ * (at your option) any later version.
10721+ *
10722+ * This program is distributed in the hope that it will be useful,
10723+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10724+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10725+ * GNU General Public License for more details.
10726+ *
10727+ * You should have received a copy of the GNU General Public License
523b37e3 10728+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10729+ */
10730+
10731+/*
10732+ * dynamically customizable operations (for regular files only)
10733+ */
10734+
10735+#ifndef __AUFS_DYNOP_H__
10736+#define __AUFS_DYNOP_H__
10737+
10738+#ifdef __KERNEL__
10739+
7e9cd9fe
AM
10740+#include <linux/fs.h>
10741+#include <linux/kref.h>
4a4d8108 10742+
2cbb1c4b 10743+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
10744+
10745+struct au_dynop {
10746+ int dy_type;
10747+ union {
10748+ const void *dy_hop;
10749+ const struct address_space_operations *dy_haop;
4a4d8108
AM
10750+ };
10751+};
10752+
10753+struct au_dykey {
10754+ union {
10755+ struct list_head dk_list;
10756+ struct rcu_head dk_rcu;
10757+ };
10758+ struct au_dynop dk_op;
10759+
10760+ /*
10761+ * during I am in the branch local array, kref is gotten. when the
10762+ * branch is removed, kref is put.
10763+ */
10764+ struct kref dk_kref;
10765+};
10766+
10767+/* stop unioning since their sizes are very different from each other */
10768+struct au_dyaop {
10769+ struct au_dykey da_key;
10770+ struct address_space_operations da_op; /* not const */
4a4d8108
AM
10771+};
10772+
4a4d8108
AM
10773+/* ---------------------------------------------------------------------- */
10774+
10775+/* dynop.c */
10776+struct au_branch;
10777+void au_dy_put(struct au_dykey *key);
10778+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10779+ struct inode *h_inode);
b752ccd1 10780+int au_dy_irefresh(struct inode *inode);
4a4d8108 10781+void au_dy_arefresh(int do_dio);
4a4d8108
AM
10782+
10783+void __init au_dy_init(void);
10784+void au_dy_fin(void);
10785+
4a4d8108
AM
10786+#endif /* __KERNEL__ */
10787+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
10788diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10789--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 10790+++ linux/fs/aufs/export.c 2016-02-28 11:26:32.569971135 +0100
5527c038 10791@@ -0,0 +1,832 @@
4a4d8108 10792+/*
8cdd5066 10793+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
10794+ *
10795+ * This program, aufs is free software; you can redistribute it and/or modify
10796+ * it under the terms of the GNU General Public License as published by
10797+ * the Free Software Foundation; either version 2 of the License, or
10798+ * (at your option) any later version.
10799+ *
10800+ * This program is distributed in the hope that it will be useful,
10801+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10802+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10803+ * GNU General Public License for more details.
10804+ *
10805+ * You should have received a copy of the GNU General Public License
523b37e3 10806+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10807+ */
10808+
10809+/*
10810+ * export via nfs
10811+ */
10812+
10813+#include <linux/exportfs.h>
7eafdf33 10814+#include <linux/fs_struct.h>
4a4d8108
AM
10815+#include <linux/namei.h>
10816+#include <linux/nsproxy.h>
10817+#include <linux/random.h>
10818+#include <linux/writeback.h>
7eafdf33 10819+#include "../fs/mount.h"
4a4d8108
AM
10820+#include "aufs.h"
10821+
10822+union conv {
10823+#ifdef CONFIG_AUFS_INO_T_64
10824+ __u32 a[2];
10825+#else
10826+ __u32 a[1];
10827+#endif
10828+ ino_t ino;
10829+};
10830+
10831+static ino_t decode_ino(__u32 *a)
10832+{
10833+ union conv u;
10834+
10835+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
10836+ u.a[0] = a[0];
10837+#ifdef CONFIG_AUFS_INO_T_64
10838+ u.a[1] = a[1];
10839+#endif
10840+ return u.ino;
10841+}
10842+
10843+static void encode_ino(__u32 *a, ino_t ino)
10844+{
10845+ union conv u;
10846+
10847+ u.ino = ino;
10848+ a[0] = u.a[0];
10849+#ifdef CONFIG_AUFS_INO_T_64
10850+ a[1] = u.a[1];
10851+#endif
10852+}
10853+
10854+/* NFS file handle */
10855+enum {
10856+ Fh_br_id,
10857+ Fh_sigen,
10858+#ifdef CONFIG_AUFS_INO_T_64
10859+ /* support 64bit inode number */
10860+ Fh_ino1,
10861+ Fh_ino2,
10862+ Fh_dir_ino1,
10863+ Fh_dir_ino2,
10864+#else
10865+ Fh_ino1,
10866+ Fh_dir_ino1,
10867+#endif
10868+ Fh_igen,
10869+ Fh_h_type,
10870+ Fh_tail,
10871+
10872+ Fh_ino = Fh_ino1,
10873+ Fh_dir_ino = Fh_dir_ino1
10874+};
10875+
10876+static int au_test_anon(struct dentry *dentry)
10877+{
027c5e7a 10878+ /* note: read d_flags without d_lock */
4a4d8108
AM
10879+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
10880+}
10881+
a2a7ad62
AM
10882+int au_test_nfsd(void)
10883+{
10884+ int ret;
10885+ struct task_struct *tsk = current;
10886+ char comm[sizeof(tsk->comm)];
10887+
10888+ ret = 0;
10889+ if (tsk->flags & PF_KTHREAD) {
10890+ get_task_comm(comm, tsk);
10891+ ret = !strcmp(comm, "nfsd");
10892+ }
10893+
10894+ return ret;
10895+}
10896+
4a4d8108
AM
10897+/* ---------------------------------------------------------------------- */
10898+/* inode generation external table */
10899+
b752ccd1 10900+void au_xigen_inc(struct inode *inode)
4a4d8108 10901+{
4a4d8108
AM
10902+ loff_t pos;
10903+ ssize_t sz;
10904+ __u32 igen;
10905+ struct super_block *sb;
10906+ struct au_sbinfo *sbinfo;
10907+
4a4d8108 10908+ sb = inode->i_sb;
b752ccd1 10909+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 10910+
b752ccd1 10911+ sbinfo = au_sbi(sb);
1facf9fc 10912+ pos = inode->i_ino;
10913+ pos *= sizeof(igen);
10914+ igen = inode->i_generation + 1;
1facf9fc 10915+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
10916+ sizeof(igen), &pos);
10917+ if (sz == sizeof(igen))
b752ccd1 10918+ return; /* success */
1facf9fc 10919+
b752ccd1 10920+ if (unlikely(sz >= 0))
1facf9fc 10921+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 10922+}
10923+
10924+int au_xigen_new(struct inode *inode)
10925+{
10926+ int err;
10927+ loff_t pos;
10928+ ssize_t sz;
10929+ struct super_block *sb;
10930+ struct au_sbinfo *sbinfo;
10931+ struct file *file;
10932+
10933+ err = 0;
10934+ /* todo: dirty, at mount time */
10935+ if (inode->i_ino == AUFS_ROOT_INO)
10936+ goto out;
10937+ sb = inode->i_sb;
dece6358 10938+ SiMustAnyLock(sb);
1facf9fc 10939+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
10940+ goto out;
10941+
10942+ err = -EFBIG;
10943+ pos = inode->i_ino;
10944+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
10945+ AuIOErr1("too large i%lld\n", pos);
10946+ goto out;
10947+ }
10948+ pos *= sizeof(inode->i_generation);
10949+
10950+ err = 0;
10951+ sbinfo = au_sbi(sb);
10952+ file = sbinfo->si_xigen;
10953+ BUG_ON(!file);
10954+
c06a8ce3 10955+ if (vfsub_f_size_read(file)
1facf9fc 10956+ < pos + sizeof(inode->i_generation)) {
10957+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
10958+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
10959+ sizeof(inode->i_generation), &pos);
10960+ } else
10961+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
10962+ sizeof(inode->i_generation), &pos);
10963+ if (sz == sizeof(inode->i_generation))
10964+ goto out; /* success */
10965+
10966+ err = sz;
10967+ if (unlikely(sz >= 0)) {
10968+ err = -EIO;
10969+ AuIOErr("xigen error (%zd)\n", sz);
10970+ }
10971+
4f0767ce 10972+out:
1facf9fc 10973+ return err;
10974+}
10975+
10976+int au_xigen_set(struct super_block *sb, struct file *base)
10977+{
10978+ int err;
10979+ struct au_sbinfo *sbinfo;
10980+ struct file *file;
10981+
dece6358
AM
10982+ SiMustWriteLock(sb);
10983+
1facf9fc 10984+ sbinfo = au_sbi(sb);
10985+ file = au_xino_create2(base, sbinfo->si_xigen);
10986+ err = PTR_ERR(file);
10987+ if (IS_ERR(file))
10988+ goto out;
10989+ err = 0;
10990+ if (sbinfo->si_xigen)
10991+ fput(sbinfo->si_xigen);
10992+ sbinfo->si_xigen = file;
10993+
4f0767ce 10994+out:
1facf9fc 10995+ return err;
10996+}
10997+
10998+void au_xigen_clr(struct super_block *sb)
10999+{
11000+ struct au_sbinfo *sbinfo;
11001+
dece6358
AM
11002+ SiMustWriteLock(sb);
11003+
1facf9fc 11004+ sbinfo = au_sbi(sb);
11005+ if (sbinfo->si_xigen) {
11006+ fput(sbinfo->si_xigen);
11007+ sbinfo->si_xigen = NULL;
11008+ }
11009+}
11010+
11011+/* ---------------------------------------------------------------------- */
11012+
11013+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
11014+ ino_t dir_ino)
11015+{
11016+ struct dentry *dentry, *d;
11017+ struct inode *inode;
11018+ unsigned int sigen;
11019+
11020+ dentry = NULL;
11021+ inode = ilookup(sb, ino);
11022+ if (!inode)
11023+ goto out;
11024+
11025+ dentry = ERR_PTR(-ESTALE);
11026+ sigen = au_sigen(sb);
11027+ if (unlikely(is_bad_inode(inode)
11028+ || IS_DEADDIR(inode)
537831f9 11029+ || sigen != au_iigen(inode, NULL)))
1facf9fc 11030+ goto out_iput;
11031+
11032+ dentry = NULL;
11033+ if (!dir_ino || S_ISDIR(inode->i_mode))
11034+ dentry = d_find_alias(inode);
11035+ else {
027c5e7a 11036+ spin_lock(&inode->i_lock);
c1595e42 11037+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 11038+ spin_lock(&d->d_lock);
1facf9fc 11039+ if (!au_test_anon(d)
5527c038 11040+ && d_inode(d->d_parent)->i_ino == dir_ino) {
027c5e7a
AM
11041+ dentry = dget_dlock(d);
11042+ spin_unlock(&d->d_lock);
1facf9fc 11043+ break;
11044+ }
027c5e7a
AM
11045+ spin_unlock(&d->d_lock);
11046+ }
11047+ spin_unlock(&inode->i_lock);
1facf9fc 11048+ }
027c5e7a 11049+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 11050+ /* need to refresh */
1facf9fc 11051+ dput(dentry);
2cbb1c4b 11052+ dentry = NULL;
1facf9fc 11053+ }
11054+
4f0767ce 11055+out_iput:
1facf9fc 11056+ iput(inode);
4f0767ce 11057+out:
2cbb1c4b 11058+ AuTraceErrPtr(dentry);
1facf9fc 11059+ return dentry;
11060+}
11061+
11062+/* ---------------------------------------------------------------------- */
11063+
11064+/* todo: dirty? */
11065+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
11066+
11067+struct au_compare_mnt_args {
11068+ /* input */
11069+ struct super_block *sb;
11070+
11071+ /* output */
11072+ struct vfsmount *mnt;
11073+};
11074+
11075+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
11076+{
11077+ struct au_compare_mnt_args *a = arg;
11078+
11079+ if (mnt->mnt_sb != a->sb)
11080+ return 0;
11081+ a->mnt = mntget(mnt);
11082+ return 1;
11083+}
11084+
1facf9fc 11085+static struct vfsmount *au_mnt_get(struct super_block *sb)
11086+{
4a4d8108 11087+ int err;
7eafdf33 11088+ struct path root;
4a4d8108
AM
11089+ struct au_compare_mnt_args args = {
11090+ .sb = sb
11091+ };
1facf9fc 11092+
7eafdf33 11093+ get_fs_root(current->fs, &root);
523b37e3 11094+ rcu_read_lock();
7eafdf33 11095+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
523b37e3 11096+ rcu_read_unlock();
7eafdf33 11097+ path_put(&root);
4a4d8108
AM
11098+ AuDebugOn(!err);
11099+ AuDebugOn(!args.mnt);
11100+ return args.mnt;
1facf9fc 11101+}
11102+
11103+struct au_nfsd_si_lock {
4a4d8108 11104+ unsigned int sigen;
027c5e7a 11105+ aufs_bindex_t bindex, br_id;
1facf9fc 11106+ unsigned char force_lock;
11107+};
11108+
027c5e7a
AM
11109+static int si_nfsd_read_lock(struct super_block *sb,
11110+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11111+{
027c5e7a 11112+ int err;
1facf9fc 11113+ aufs_bindex_t bindex;
11114+
11115+ si_read_lock(sb, AuLock_FLUSH);
11116+
11117+ /* branch id may be wrapped around */
027c5e7a 11118+ err = 0;
1facf9fc 11119+ bindex = au_br_index(sb, nsi_lock->br_id);
11120+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11121+ goto out; /* success */
11122+
027c5e7a
AM
11123+ err = -ESTALE;
11124+ bindex = -1;
1facf9fc 11125+ if (!nsi_lock->force_lock)
11126+ si_read_unlock(sb);
1facf9fc 11127+
4f0767ce 11128+out:
027c5e7a
AM
11129+ nsi_lock->bindex = bindex;
11130+ return err;
1facf9fc 11131+}
11132+
11133+struct find_name_by_ino {
392086de 11134+ struct dir_context ctx;
1facf9fc 11135+ int called, found;
11136+ ino_t ino;
11137+ char *name;
11138+ int namelen;
11139+};
11140+
11141+static int
392086de
AM
11142+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11143+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 11144+{
392086de
AM
11145+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11146+ ctx);
1facf9fc 11147+
11148+ a->called++;
11149+ if (a->ino != ino)
11150+ return 0;
11151+
11152+ memcpy(a->name, name, namelen);
11153+ a->namelen = namelen;
11154+ a->found = 1;
11155+ return 1;
11156+}
11157+
11158+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11159+ struct au_nfsd_si_lock *nsi_lock)
11160+{
11161+ struct dentry *dentry, *parent;
11162+ struct file *file;
11163+ struct inode *dir;
392086de
AM
11164+ struct find_name_by_ino arg = {
11165+ .ctx = {
2000de60 11166+ .actor = find_name_by_ino
392086de
AM
11167+ }
11168+ };
1facf9fc 11169+ int err;
11170+
11171+ parent = path->dentry;
11172+ if (nsi_lock)
11173+ si_read_unlock(parent->d_sb);
4a4d8108 11174+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 11175+ dentry = (void *)file;
11176+ if (IS_ERR(file))
11177+ goto out;
11178+
11179+ dentry = ERR_PTR(-ENOMEM);
537831f9 11180+ arg.name = (void *)__get_free_page(GFP_NOFS);
1facf9fc 11181+ if (unlikely(!arg.name))
11182+ goto out_file;
11183+ arg.ino = ino;
11184+ arg.found = 0;
11185+ do {
11186+ arg.called = 0;
11187+ /* smp_mb(); */
392086de 11188+ err = vfsub_iterate_dir(file, &arg.ctx);
1facf9fc 11189+ } while (!err && !arg.found && arg.called);
11190+ dentry = ERR_PTR(err);
11191+ if (unlikely(err))
11192+ goto out_name;
1716fcea
AM
11193+ /* instead of ENOENT */
11194+ dentry = ERR_PTR(-ESTALE);
1facf9fc 11195+ if (!arg.found)
11196+ goto out_name;
11197+
b4510431 11198+ /* do not call vfsub_lkup_one() */
5527c038 11199+ dir = d_inode(parent);
1facf9fc 11200+ mutex_lock(&dir->i_mutex);
11201+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
11202+ mutex_unlock(&dir->i_mutex);
11203+ AuTraceErrPtr(dentry);
11204+ if (IS_ERR(dentry))
11205+ goto out_name;
11206+ AuDebugOn(au_test_anon(dentry));
5527c038 11207+ if (unlikely(d_really_is_negative(dentry))) {
1facf9fc 11208+ dput(dentry);
11209+ dentry = ERR_PTR(-ENOENT);
11210+ }
11211+
4f0767ce 11212+out_name:
537831f9 11213+ free_page((unsigned long)arg.name);
4f0767ce 11214+out_file:
1facf9fc 11215+ fput(file);
4f0767ce 11216+out:
1facf9fc 11217+ if (unlikely(nsi_lock
11218+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11219+ if (!IS_ERR(dentry)) {
11220+ dput(dentry);
11221+ dentry = ERR_PTR(-ESTALE);
11222+ }
11223+ AuTraceErrPtr(dentry);
11224+ return dentry;
11225+}
11226+
11227+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11228+ ino_t dir_ino,
11229+ struct au_nfsd_si_lock *nsi_lock)
11230+{
11231+ struct dentry *dentry;
11232+ struct path path;
11233+
11234+ if (dir_ino != AUFS_ROOT_INO) {
11235+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11236+ dentry = path.dentry;
11237+ if (!path.dentry || IS_ERR(path.dentry))
11238+ goto out;
11239+ AuDebugOn(au_test_anon(path.dentry));
11240+ } else
11241+ path.dentry = dget(sb->s_root);
11242+
11243+ path.mnt = au_mnt_get(sb);
11244+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11245+ path_put(&path);
11246+
4f0767ce 11247+out:
1facf9fc 11248+ AuTraceErrPtr(dentry);
11249+ return dentry;
11250+}
11251+
11252+/* ---------------------------------------------------------------------- */
11253+
11254+static int h_acceptable(void *expv, struct dentry *dentry)
11255+{
11256+ return 1;
11257+}
11258+
11259+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11260+ char *buf, int len, struct super_block *sb)
11261+{
11262+ char *p;
11263+ int n;
11264+ struct path path;
11265+
11266+ p = d_path(h_rootpath, buf, len);
11267+ if (IS_ERR(p))
11268+ goto out;
11269+ n = strlen(p);
11270+
11271+ path.mnt = h_rootpath->mnt;
11272+ path.dentry = h_parent;
11273+ p = d_path(&path, buf, len);
11274+ if (IS_ERR(p))
11275+ goto out;
11276+ if (n != 1)
11277+ p += n;
11278+
11279+ path.mnt = au_mnt_get(sb);
11280+ path.dentry = sb->s_root;
11281+ p = d_path(&path, buf, len - strlen(p));
11282+ mntput(path.mnt);
11283+ if (IS_ERR(p))
11284+ goto out;
11285+ if (n != 1)
11286+ p[strlen(p)] = '/';
11287+
4f0767ce 11288+out:
1facf9fc 11289+ AuTraceErrPtr(p);
11290+ return p;
11291+}
11292+
11293+static
027c5e7a
AM
11294+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11295+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11296+{
11297+ struct dentry *dentry, *h_parent, *root;
11298+ struct super_block *h_sb;
11299+ char *pathname, *p;
11300+ struct vfsmount *h_mnt;
11301+ struct au_branch *br;
11302+ int err;
11303+ struct path path;
11304+
027c5e7a 11305+ br = au_sbr(sb, nsi_lock->bindex);
86dc4139 11306+ h_mnt = au_br_mnt(br);
1facf9fc 11307+ h_sb = h_mnt->mnt_sb;
11308+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
11309+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11310+ fh_len - Fh_tail, fh[Fh_h_type],
11311+ h_acceptable, /*context*/NULL);
11312+ dentry = h_parent;
11313+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11314+ AuWarn1("%s decode_fh failed, %ld\n",
11315+ au_sbtype(h_sb), PTR_ERR(h_parent));
11316+ goto out;
11317+ }
11318+ dentry = NULL;
11319+ if (unlikely(au_test_anon(h_parent))) {
11320+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11321+ au_sbtype(h_sb));
11322+ goto out_h_parent;
11323+ }
11324+
11325+ dentry = ERR_PTR(-ENOMEM);
11326+ pathname = (void *)__get_free_page(GFP_NOFS);
11327+ if (unlikely(!pathname))
11328+ goto out_h_parent;
11329+
11330+ root = sb->s_root;
11331+ path.mnt = h_mnt;
11332+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 11333+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 11334+ di_read_unlock(root, !AuLock_IR);
11335+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11336+ dentry = (void *)p;
11337+ if (IS_ERR(p))
11338+ goto out_pathname;
11339+
11340+ si_read_unlock(sb);
11341+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11342+ dentry = ERR_PTR(err);
11343+ if (unlikely(err))
11344+ goto out_relock;
11345+
11346+ dentry = ERR_PTR(-ENOENT);
11347+ AuDebugOn(au_test_anon(path.dentry));
5527c038 11348+ if (unlikely(d_really_is_negative(path.dentry)))
1facf9fc 11349+ goto out_path;
11350+
5527c038 11351+ if (ino != d_inode(path.dentry)->i_ino)
1facf9fc 11352+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11353+ else
11354+ dentry = dget(path.dentry);
11355+
4f0767ce 11356+out_path:
1facf9fc 11357+ path_put(&path);
4f0767ce 11358+out_relock:
1facf9fc 11359+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11360+ if (!IS_ERR(dentry)) {
11361+ dput(dentry);
11362+ dentry = ERR_PTR(-ESTALE);
11363+ }
4f0767ce 11364+out_pathname:
1facf9fc 11365+ free_page((unsigned long)pathname);
4f0767ce 11366+out_h_parent:
1facf9fc 11367+ dput(h_parent);
4f0767ce 11368+out:
1facf9fc 11369+ AuTraceErrPtr(dentry);
11370+ return dentry;
11371+}
11372+
11373+/* ---------------------------------------------------------------------- */
11374+
11375+static struct dentry *
11376+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11377+ int fh_type)
11378+{
11379+ struct dentry *dentry;
11380+ __u32 *fh = fid->raw;
027c5e7a 11381+ struct au_branch *br;
1facf9fc 11382+ ino_t ino, dir_ino;
1facf9fc 11383+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 11384+ .force_lock = 0
11385+ };
11386+
1facf9fc 11387+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
11388+ /* it should never happen, but the file handle is unreliable */
11389+ if (unlikely(fh_len < Fh_tail))
11390+ goto out;
11391+ nsi_lock.sigen = fh[Fh_sigen];
11392+ nsi_lock.br_id = fh[Fh_br_id];
11393+
1facf9fc 11394+ /* branch id may be wrapped around */
027c5e7a
AM
11395+ br = NULL;
11396+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 11397+ goto out;
11398+ nsi_lock.force_lock = 1;
11399+
11400+ /* is this inode still cached? */
11401+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
11402+ /* it should never happen */
11403+ if (unlikely(ino == AUFS_ROOT_INO))
8cdd5066 11404+ goto out_unlock;
4a4d8108 11405+
1facf9fc 11406+ dir_ino = decode_ino(fh + Fh_dir_ino);
11407+ dentry = decode_by_ino(sb, ino, dir_ino);
11408+ if (IS_ERR(dentry))
11409+ goto out_unlock;
11410+ if (dentry)
11411+ goto accept;
11412+
11413+ /* is the parent dir cached? */
027c5e7a
AM
11414+ br = au_sbr(sb, nsi_lock.bindex);
11415+ atomic_inc(&br->br_count);
1facf9fc 11416+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11417+ if (IS_ERR(dentry))
11418+ goto out_unlock;
11419+ if (dentry)
11420+ goto accept;
11421+
11422+ /* lookup path */
027c5e7a 11423+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 11424+ if (IS_ERR(dentry))
11425+ goto out_unlock;
11426+ if (unlikely(!dentry))
11427+ /* todo?: make it ESTALE */
11428+ goto out_unlock;
11429+
4f0767ce 11430+accept:
027c5e7a 11431+ if (!au_digen_test(dentry, au_sigen(sb))
5527c038 11432+ && d_inode(dentry)->i_generation == fh[Fh_igen])
1facf9fc 11433+ goto out_unlock; /* success */
11434+
11435+ dput(dentry);
11436+ dentry = ERR_PTR(-ESTALE);
4f0767ce 11437+out_unlock:
027c5e7a
AM
11438+ if (br)
11439+ atomic_dec(&br->br_count);
1facf9fc 11440+ si_read_unlock(sb);
4f0767ce 11441+out:
1facf9fc 11442+ AuTraceErrPtr(dentry);
11443+ return dentry;
11444+}
11445+
11446+#if 0 /* reserved for future use */
11447+/* support subtreecheck option */
11448+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11449+ int fh_len, int fh_type)
11450+{
11451+ struct dentry *parent;
11452+ __u32 *fh = fid->raw;
11453+ ino_t dir_ino;
11454+
11455+ dir_ino = decode_ino(fh + Fh_dir_ino);
11456+ parent = decode_by_ino(sb, dir_ino, 0);
11457+ if (IS_ERR(parent))
11458+ goto out;
11459+ if (!parent)
11460+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11461+ dir_ino, fh, fh_len);
11462+
4f0767ce 11463+out:
1facf9fc 11464+ AuTraceErrPtr(parent);
11465+ return parent;
11466+}
11467+#endif
11468+
11469+/* ---------------------------------------------------------------------- */
11470+
0c3ec466
AM
11471+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11472+ struct inode *dir)
1facf9fc 11473+{
11474+ int err;
0c3ec466 11475+ aufs_bindex_t bindex;
1facf9fc 11476+ struct super_block *sb, *h_sb;
0c3ec466
AM
11477+ struct dentry *dentry, *parent, *h_parent;
11478+ struct inode *h_dir;
1facf9fc 11479+ struct au_branch *br;
11480+
1facf9fc 11481+ err = -ENOSPC;
11482+ if (unlikely(*max_len <= Fh_tail)) {
11483+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11484+ goto out;
11485+ }
11486+
11487+ err = FILEID_ROOT;
0c3ec466
AM
11488+ if (inode->i_ino == AUFS_ROOT_INO) {
11489+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
1facf9fc 11490+ goto out;
11491+ }
11492+
1facf9fc 11493+ h_parent = NULL;
0c3ec466
AM
11494+ sb = inode->i_sb;
11495+ err = si_read_lock(sb, AuLock_FLUSH);
027c5e7a
AM
11496+ if (unlikely(err))
11497+ goto out;
11498+
1facf9fc 11499+#ifdef CONFIG_AUFS_DEBUG
11500+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11501+ AuWarn1("NFS-exporting requires xino\n");
11502+#endif
027c5e7a 11503+ err = -EIO;
0c3ec466
AM
11504+ parent = NULL;
11505+ ii_read_lock_child(inode);
11506+ bindex = au_ibstart(inode);
11507+ if (!dir) {
c1595e42 11508+ dentry = d_find_any_alias(inode);
0c3ec466
AM
11509+ if (unlikely(!dentry))
11510+ goto out_unlock;
11511+ AuDebugOn(au_test_anon(dentry));
11512+ parent = dget_parent(dentry);
11513+ dput(dentry);
11514+ if (unlikely(!parent))
11515+ goto out_unlock;
5527c038
JR
11516+ if (d_really_is_positive(parent))
11517+ dir = d_inode(parent);
1facf9fc 11518+ }
0c3ec466
AM
11519+
11520+ ii_read_lock_parent(dir);
11521+ h_dir = au_h_iptr(dir, bindex);
11522+ ii_read_unlock(dir);
11523+ if (unlikely(!h_dir))
11524+ goto out_parent;
c1595e42 11525+ h_parent = d_find_any_alias(h_dir);
1facf9fc 11526+ if (unlikely(!h_parent))
0c3ec466 11527+ goto out_hparent;
1facf9fc 11528+
11529+ err = -EPERM;
11530+ br = au_sbr(sb, bindex);
86dc4139 11531+ h_sb = au_br_sb(br);
1facf9fc 11532+ if (unlikely(!h_sb->s_export_op)) {
11533+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
0c3ec466 11534+ goto out_hparent;
1facf9fc 11535+ }
11536+
11537+ fh[Fh_br_id] = br->br_id;
11538+ fh[Fh_sigen] = au_sigen(sb);
11539+ encode_ino(fh + Fh_ino, inode->i_ino);
0c3ec466 11540+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
1facf9fc 11541+ fh[Fh_igen] = inode->i_generation;
11542+
11543+ *max_len -= Fh_tail;
11544+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11545+ max_len,
11546+ /*connectable or subtreecheck*/0);
11547+ err = fh[Fh_h_type];
11548+ *max_len += Fh_tail;
11549+ /* todo: macros? */
1716fcea 11550+ if (err != FILEID_INVALID)
1facf9fc 11551+ err = 99;
11552+ else
11553+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11554+
0c3ec466 11555+out_hparent:
1facf9fc 11556+ dput(h_parent);
0c3ec466 11557+out_parent:
1facf9fc 11558+ dput(parent);
0c3ec466
AM
11559+out_unlock:
11560+ ii_read_unlock(inode);
11561+ si_read_unlock(sb);
4f0767ce 11562+out:
1facf9fc 11563+ if (unlikely(err < 0))
1716fcea 11564+ err = FILEID_INVALID;
1facf9fc 11565+ return err;
11566+}
11567+
11568+/* ---------------------------------------------------------------------- */
11569+
4a4d8108
AM
11570+static int aufs_commit_metadata(struct inode *inode)
11571+{
11572+ int err;
11573+ aufs_bindex_t bindex;
11574+ struct super_block *sb;
11575+ struct inode *h_inode;
11576+ int (*f)(struct inode *inode);
11577+
11578+ sb = inode->i_sb;
e49829fe 11579+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11580+ ii_write_lock_child(inode);
11581+ bindex = au_ibstart(inode);
11582+ AuDebugOn(bindex < 0);
11583+ h_inode = au_h_iptr(inode, bindex);
11584+
11585+ f = h_inode->i_sb->s_export_op->commit_metadata;
11586+ if (f)
11587+ err = f(h_inode);
11588+ else {
11589+ struct writeback_control wbc = {
11590+ .sync_mode = WB_SYNC_ALL,
11591+ .nr_to_write = 0 /* metadata only */
11592+ };
11593+
11594+ err = sync_inode(h_inode, &wbc);
11595+ }
11596+
11597+ au_cpup_attr_timesizes(inode);
11598+ ii_write_unlock(inode);
11599+ si_read_unlock(sb);
11600+ return err;
11601+}
11602+
11603+/* ---------------------------------------------------------------------- */
11604+
1facf9fc 11605+static struct export_operations aufs_export_op = {
4a4d8108 11606+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 11607+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
11608+ .encode_fh = aufs_encode_fh,
11609+ .commit_metadata = aufs_commit_metadata
1facf9fc 11610+};
11611+
11612+void au_export_init(struct super_block *sb)
11613+{
11614+ struct au_sbinfo *sbinfo;
11615+ __u32 u;
11616+
11617+ sb->s_export_op = &aufs_export_op;
11618+ sbinfo = au_sbi(sb);
11619+ sbinfo->si_xigen = NULL;
11620+ get_random_bytes(&u, sizeof(u));
11621+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11622+ atomic_set(&sbinfo->si_xigen_next, u);
11623+}
076b876e
AM
11624diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11625--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 11626+++ linux/fs/aufs/fhsm.c 2016-02-28 11:26:32.573304539 +0100
c1595e42 11627@@ -0,0 +1,426 @@
076b876e 11628+/*
8cdd5066 11629+ * Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
11630+ *
11631+ * This program, aufs is free software; you can redistribute it and/or modify
11632+ * it under the terms of the GNU General Public License as published by
11633+ * the Free Software Foundation; either version 2 of the License, or
11634+ * (at your option) any later version.
11635+ *
11636+ * This program is distributed in the hope that it will be useful,
11637+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11638+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11639+ * GNU General Public License for more details.
11640+ *
11641+ * You should have received a copy of the GNU General Public License
11642+ * along with this program; if not, write to the Free Software
11643+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11644+ */
11645+
11646+/*
11647+ * File-based Hierarchy Storage Management
11648+ */
11649+
11650+#include <linux/anon_inodes.h>
11651+#include <linux/poll.h>
11652+#include <linux/seq_file.h>
11653+#include <linux/statfs.h>
11654+#include "aufs.h"
11655+
c1595e42
JR
11656+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11657+{
11658+ struct au_sbinfo *sbinfo;
11659+ struct au_fhsm *fhsm;
11660+
11661+ SiMustAnyLock(sb);
11662+
11663+ sbinfo = au_sbi(sb);
11664+ fhsm = &sbinfo->si_fhsm;
11665+ AuDebugOn(!fhsm);
11666+ return fhsm->fhsm_bottom;
11667+}
11668+
11669+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11670+{
11671+ struct au_sbinfo *sbinfo;
11672+ struct au_fhsm *fhsm;
11673+
11674+ SiMustWriteLock(sb);
11675+
11676+ sbinfo = au_sbi(sb);
11677+ fhsm = &sbinfo->si_fhsm;
11678+ AuDebugOn(!fhsm);
11679+ fhsm->fhsm_bottom = bindex;
11680+}
11681+
11682+/* ---------------------------------------------------------------------- */
11683+
076b876e
AM
11684+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11685+{
11686+ struct au_br_fhsm *bf;
11687+
11688+ bf = br->br_fhsm;
11689+ MtxMustLock(&bf->bf_lock);
11690+
11691+ return !bf->bf_readable
11692+ || time_after(jiffies,
11693+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11694+}
11695+
11696+/* ---------------------------------------------------------------------- */
11697+
11698+static void au_fhsm_notify(struct super_block *sb, int val)
11699+{
11700+ struct au_sbinfo *sbinfo;
11701+ struct au_fhsm *fhsm;
11702+
11703+ SiMustAnyLock(sb);
11704+
11705+ sbinfo = au_sbi(sb);
11706+ fhsm = &sbinfo->si_fhsm;
11707+ if (au_fhsm_pid(fhsm)
11708+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11709+ atomic_set(&fhsm->fhsm_readable, val);
11710+ if (val)
11711+ wake_up(&fhsm->fhsm_wqh);
11712+ }
11713+}
11714+
11715+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11716+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11717+{
11718+ int err;
11719+ struct au_branch *br;
11720+ struct au_br_fhsm *bf;
11721+
11722+ br = au_sbr(sb, bindex);
11723+ AuDebugOn(au_br_rdonly(br));
11724+ bf = br->br_fhsm;
11725+ AuDebugOn(!bf);
11726+
11727+ if (do_lock)
11728+ mutex_lock(&bf->bf_lock);
11729+ else
11730+ MtxMustLock(&bf->bf_lock);
11731+
11732+ /* sb->s_root for NFS is unreliable */
11733+ err = au_br_stfs(br, &bf->bf_stfs);
11734+ if (unlikely(err)) {
11735+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11736+ goto out;
11737+ }
11738+
11739+ bf->bf_jiffy = jiffies;
11740+ bf->bf_readable = 1;
11741+ if (do_notify)
11742+ au_fhsm_notify(sb, /*val*/1);
11743+ if (rstfs)
11744+ *rstfs = bf->bf_stfs;
11745+
11746+out:
11747+ if (do_lock)
11748+ mutex_unlock(&bf->bf_lock);
11749+ au_fhsm_notify(sb, /*val*/1);
11750+
11751+ return err;
11752+}
11753+
11754+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11755+{
11756+ int err;
076b876e
AM
11757+ struct au_sbinfo *sbinfo;
11758+ struct au_fhsm *fhsm;
11759+ struct au_branch *br;
11760+ struct au_br_fhsm *bf;
11761+
11762+ AuDbg("b%d, force %d\n", bindex, force);
11763+ SiMustAnyLock(sb);
11764+
11765+ sbinfo = au_sbi(sb);
11766+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
11767+ if (!au_ftest_si(sbinfo, FHSM)
11768+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
11769+ return;
11770+
11771+ br = au_sbr(sb, bindex);
11772+ bf = br->br_fhsm;
11773+ AuDebugOn(!bf);
11774+ mutex_lock(&bf->bf_lock);
11775+ if (force
11776+ || au_fhsm_pid(fhsm)
11777+ || au_fhsm_test_jiffy(sbinfo, br))
11778+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11779+ /*do_notify*/1);
11780+ mutex_unlock(&bf->bf_lock);
11781+}
11782+
11783+void au_fhsm_wrote_all(struct super_block *sb, int force)
11784+{
11785+ aufs_bindex_t bindex, bend;
11786+ struct au_branch *br;
11787+
11788+ /* exclude the bottom */
c1595e42 11789+ bend = au_fhsm_bottom(sb);
076b876e
AM
11790+ for (bindex = 0; bindex < bend; bindex++) {
11791+ br = au_sbr(sb, bindex);
11792+ if (au_br_fhsm(br->br_perm))
11793+ au_fhsm_wrote(sb, bindex, force);
11794+ }
11795+}
11796+
11797+/* ---------------------------------------------------------------------- */
11798+
11799+static unsigned int au_fhsm_poll(struct file *file,
11800+ struct poll_table_struct *wait)
11801+{
11802+ unsigned int mask;
11803+ struct au_sbinfo *sbinfo;
11804+ struct au_fhsm *fhsm;
11805+
11806+ mask = 0;
11807+ sbinfo = file->private_data;
11808+ fhsm = &sbinfo->si_fhsm;
11809+ poll_wait(file, &fhsm->fhsm_wqh, wait);
11810+ if (atomic_read(&fhsm->fhsm_readable))
11811+ mask = POLLIN /* | POLLRDNORM */;
11812+
11813+ AuTraceErr((int)mask);
11814+ return mask;
11815+}
11816+
11817+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
11818+ struct aufs_stfs *stfs, __s16 brid)
11819+{
11820+ int err;
11821+
11822+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
11823+ if (!err)
11824+ err = __put_user(brid, &stbr->brid);
11825+ if (unlikely(err))
11826+ err = -EFAULT;
11827+
11828+ return err;
11829+}
11830+
11831+static ssize_t au_fhsm_do_read(struct super_block *sb,
11832+ struct aufs_stbr __user *stbr, size_t count)
11833+{
11834+ ssize_t err;
11835+ int nstbr;
11836+ aufs_bindex_t bindex, bend;
11837+ struct au_branch *br;
11838+ struct au_br_fhsm *bf;
11839+
11840+ /* except the bottom branch */
11841+ err = 0;
11842+ nstbr = 0;
c1595e42 11843+ bend = au_fhsm_bottom(sb);
076b876e
AM
11844+ for (bindex = 0; !err && bindex < bend; bindex++) {
11845+ br = au_sbr(sb, bindex);
11846+ if (!au_br_fhsm(br->br_perm))
11847+ continue;
11848+
11849+ bf = br->br_fhsm;
11850+ mutex_lock(&bf->bf_lock);
11851+ if (bf->bf_readable) {
11852+ err = -EFAULT;
11853+ if (count >= sizeof(*stbr))
11854+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
11855+ br->br_id);
11856+ if (!err) {
11857+ bf->bf_readable = 0;
11858+ count -= sizeof(*stbr);
11859+ nstbr++;
11860+ }
11861+ }
11862+ mutex_unlock(&bf->bf_lock);
11863+ }
11864+ if (!err)
11865+ err = sizeof(*stbr) * nstbr;
11866+
11867+ return err;
11868+}
11869+
11870+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
11871+ loff_t *pos)
11872+{
11873+ ssize_t err;
11874+ int readable;
11875+ aufs_bindex_t nfhsm, bindex, bend;
11876+ struct au_sbinfo *sbinfo;
11877+ struct au_fhsm *fhsm;
11878+ struct au_branch *br;
11879+ struct super_block *sb;
11880+
11881+ err = 0;
11882+ sbinfo = file->private_data;
11883+ fhsm = &sbinfo->si_fhsm;
11884+need_data:
11885+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
11886+ if (!atomic_read(&fhsm->fhsm_readable)) {
11887+ if (vfsub_file_flags(file) & O_NONBLOCK)
11888+ err = -EAGAIN;
11889+ else
11890+ err = wait_event_interruptible_locked_irq
11891+ (fhsm->fhsm_wqh,
11892+ atomic_read(&fhsm->fhsm_readable));
11893+ }
11894+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
11895+ if (unlikely(err))
11896+ goto out;
11897+
11898+ /* sb may already be dead */
11899+ au_rw_read_lock(&sbinfo->si_rwsem);
11900+ readable = atomic_read(&fhsm->fhsm_readable);
11901+ if (readable > 0) {
11902+ sb = sbinfo->si_sb;
11903+ AuDebugOn(!sb);
11904+ /* exclude the bottom branch */
11905+ nfhsm = 0;
c1595e42 11906+ bend = au_fhsm_bottom(sb);
076b876e
AM
11907+ for (bindex = 0; bindex < bend; bindex++) {
11908+ br = au_sbr(sb, bindex);
11909+ if (au_br_fhsm(br->br_perm))
11910+ nfhsm++;
11911+ }
11912+ err = -EMSGSIZE;
11913+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
11914+ atomic_set(&fhsm->fhsm_readable, 0);
11915+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
11916+ count);
11917+ }
11918+ }
11919+ au_rw_read_unlock(&sbinfo->si_rwsem);
11920+ if (!readable)
11921+ goto need_data;
11922+
11923+out:
11924+ return err;
11925+}
11926+
11927+static int au_fhsm_release(struct inode *inode, struct file *file)
11928+{
11929+ struct au_sbinfo *sbinfo;
11930+ struct au_fhsm *fhsm;
11931+
11932+ /* sb may already be dead */
11933+ sbinfo = file->private_data;
11934+ fhsm = &sbinfo->si_fhsm;
11935+ spin_lock(&fhsm->fhsm_spin);
11936+ fhsm->fhsm_pid = 0;
11937+ spin_unlock(&fhsm->fhsm_spin);
11938+ kobject_put(&sbinfo->si_kobj);
11939+
11940+ return 0;
11941+}
11942+
11943+static const struct file_operations au_fhsm_fops = {
11944+ .owner = THIS_MODULE,
11945+ .llseek = noop_llseek,
11946+ .read = au_fhsm_read,
11947+ .poll = au_fhsm_poll,
11948+ .release = au_fhsm_release
11949+};
11950+
11951+int au_fhsm_fd(struct super_block *sb, int oflags)
11952+{
11953+ int err, fd;
11954+ struct au_sbinfo *sbinfo;
11955+ struct au_fhsm *fhsm;
11956+
11957+ err = -EPERM;
11958+ if (unlikely(!capable(CAP_SYS_ADMIN)))
11959+ goto out;
11960+
11961+ err = -EINVAL;
11962+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
11963+ goto out;
11964+
11965+ err = 0;
11966+ sbinfo = au_sbi(sb);
11967+ fhsm = &sbinfo->si_fhsm;
11968+ spin_lock(&fhsm->fhsm_spin);
11969+ if (!fhsm->fhsm_pid)
11970+ fhsm->fhsm_pid = current->pid;
11971+ else
11972+ err = -EBUSY;
11973+ spin_unlock(&fhsm->fhsm_spin);
11974+ if (unlikely(err))
11975+ goto out;
11976+
11977+ oflags |= O_RDONLY;
11978+ /* oflags |= FMODE_NONOTIFY; */
11979+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
11980+ err = fd;
11981+ if (unlikely(fd < 0))
11982+ goto out_pid;
11983+
11984+ /* succeed reglardless 'fhsm' status */
11985+ kobject_get(&sbinfo->si_kobj);
11986+ si_noflush_read_lock(sb);
11987+ if (au_ftest_si(sbinfo, FHSM))
11988+ au_fhsm_wrote_all(sb, /*force*/0);
11989+ si_read_unlock(sb);
11990+ goto out; /* success */
11991+
11992+out_pid:
11993+ spin_lock(&fhsm->fhsm_spin);
11994+ fhsm->fhsm_pid = 0;
11995+ spin_unlock(&fhsm->fhsm_spin);
11996+out:
11997+ AuTraceErr(err);
11998+ return err;
11999+}
12000+
12001+/* ---------------------------------------------------------------------- */
12002+
12003+int au_fhsm_br_alloc(struct au_branch *br)
12004+{
12005+ int err;
12006+
12007+ err = 0;
12008+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
12009+ if (br->br_fhsm)
12010+ au_br_fhsm_init(br->br_fhsm);
12011+ else
12012+ err = -ENOMEM;
12013+
12014+ return err;
12015+}
12016+
12017+/* ---------------------------------------------------------------------- */
12018+
12019+void au_fhsm_fin(struct super_block *sb)
12020+{
12021+ au_fhsm_notify(sb, /*val*/-1);
12022+}
12023+
12024+void au_fhsm_init(struct au_sbinfo *sbinfo)
12025+{
12026+ struct au_fhsm *fhsm;
12027+
12028+ fhsm = &sbinfo->si_fhsm;
12029+ spin_lock_init(&fhsm->fhsm_spin);
12030+ init_waitqueue_head(&fhsm->fhsm_wqh);
12031+ atomic_set(&fhsm->fhsm_readable, 0);
12032+ fhsm->fhsm_expire
12033+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 12034+ fhsm->fhsm_bottom = -1;
076b876e
AM
12035+}
12036+
12037+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
12038+{
12039+ sbinfo->si_fhsm.fhsm_expire
12040+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
12041+}
12042+
12043+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
12044+{
12045+ unsigned int u;
12046+
12047+ if (!au_ftest_si(sbinfo, FHSM))
12048+ return;
12049+
12050+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
12051+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
12052+ seq_printf(seq, ",fhsm_sec=%u", u);
12053+}
7f207e10
AM
12054diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
12055--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 12056+++ linux/fs/aufs/file.c 2016-02-28 11:26:32.573304539 +0100
79b8bda9 12057@@ -0,0 +1,844 @@
1facf9fc 12058+/*
8cdd5066 12059+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 12060+ *
12061+ * This program, aufs is free software; you can redistribute it and/or modify
12062+ * it under the terms of the GNU General Public License as published by
12063+ * the Free Software Foundation; either version 2 of the License, or
12064+ * (at your option) any later version.
dece6358
AM
12065+ *
12066+ * This program is distributed in the hope that it will be useful,
12067+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12068+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12069+ * GNU General Public License for more details.
12070+ *
12071+ * You should have received a copy of the GNU General Public License
523b37e3 12072+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 12073+ */
12074+
12075+/*
4a4d8108 12076+ * handling file/dir, and address_space operation
1facf9fc 12077+ */
12078+
7eafdf33
AM
12079+#ifdef CONFIG_AUFS_DEBUG
12080+#include <linux/migrate.h>
12081+#endif
4a4d8108 12082+#include <linux/pagemap.h>
1facf9fc 12083+#include "aufs.h"
12084+
4a4d8108
AM
12085+/* drop flags for writing */
12086+unsigned int au_file_roflags(unsigned int flags)
12087+{
12088+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12089+ flags |= O_RDONLY | O_NOATIME;
12090+ return flags;
12091+}
12092+
12093+/* common functions to regular file and dir */
12094+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12095+ struct file *file, int force_wr)
1facf9fc 12096+{
1308ab2a 12097+ struct file *h_file;
4a4d8108
AM
12098+ struct dentry *h_dentry;
12099+ struct inode *h_inode;
12100+ struct super_block *sb;
12101+ struct au_branch *br;
12102+ struct path h_path;
b912730e 12103+ int err;
1facf9fc 12104+
4a4d8108
AM
12105+ /* a race condition can happen between open and unlink/rmdir */
12106+ h_file = ERR_PTR(-ENOENT);
12107+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 12108+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
4a4d8108 12109+ goto out;
5527c038 12110+ h_inode = d_inode(h_dentry);
027c5e7a
AM
12111+ spin_lock(&h_dentry->d_lock);
12112+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
5527c038 12113+ /* || !d_inode(dentry)->i_nlink */
027c5e7a
AM
12114+ ;
12115+ spin_unlock(&h_dentry->d_lock);
12116+ if (unlikely(err))
4a4d8108 12117+ goto out;
1facf9fc 12118+
4a4d8108
AM
12119+ sb = dentry->d_sb;
12120+ br = au_sbr(sb, bindex);
b912730e
AM
12121+ err = au_br_test_oflag(flags, br);
12122+ h_file = ERR_PTR(err);
12123+ if (unlikely(err))
027c5e7a 12124+ goto out;
1facf9fc 12125+
4a4d8108 12126+ /* drop flags for writing */
5527c038 12127+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
392086de
AM
12128+ if (force_wr && !(flags & O_WRONLY))
12129+ force_wr = 0;
4a4d8108 12130+ flags = au_file_roflags(flags);
392086de
AM
12131+ if (force_wr) {
12132+ h_file = ERR_PTR(-EROFS);
12133+ flags = au_file_roflags(flags);
12134+ if (unlikely(vfsub_native_ro(h_inode)
12135+ || IS_APPEND(h_inode)))
12136+ goto out;
12137+ flags &= ~O_ACCMODE;
12138+ flags |= O_WRONLY;
12139+ }
12140+ }
4a4d8108
AM
12141+ flags &= ~O_CREAT;
12142+ atomic_inc(&br->br_count);
12143+ h_path.dentry = h_dentry;
86dc4139 12144+ h_path.mnt = au_br_mnt(br);
38d290e6 12145+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
12146+ if (IS_ERR(h_file))
12147+ goto out_br;
dece6358 12148+
b912730e 12149+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
12150+ err = deny_write_access(h_file);
12151+ if (unlikely(err)) {
12152+ fput(h_file);
12153+ h_file = ERR_PTR(err);
12154+ goto out_br;
12155+ }
12156+ }
953406b4 12157+ fsnotify_open(h_file);
4a4d8108 12158+ goto out; /* success */
1facf9fc 12159+
4f0767ce 12160+out_br:
4a4d8108 12161+ atomic_dec(&br->br_count);
4f0767ce 12162+out:
4a4d8108
AM
12163+ return h_file;
12164+}
1308ab2a 12165+
076b876e
AM
12166+static int au_cmoo(struct dentry *dentry)
12167+{
12168+ int err, cmoo;
12169+ unsigned int udba;
12170+ struct path h_path;
12171+ struct au_pin pin;
12172+ struct au_cp_generic cpg = {
12173+ .dentry = dentry,
12174+ .bdst = -1,
12175+ .bsrc = -1,
12176+ .len = -1,
12177+ .pin = &pin,
12178+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12179+ };
7e9cd9fe 12180+ struct inode *delegated;
076b876e
AM
12181+ struct super_block *sb;
12182+ struct au_sbinfo *sbinfo;
12183+ struct au_fhsm *fhsm;
12184+ pid_t pid;
12185+ struct au_branch *br;
12186+ struct dentry *parent;
12187+ struct au_hinode *hdir;
12188+
12189+ DiMustWriteLock(dentry);
5527c038 12190+ IiMustWriteLock(d_inode(dentry));
076b876e
AM
12191+
12192+ err = 0;
12193+ if (IS_ROOT(dentry))
12194+ goto out;
12195+ cpg.bsrc = au_dbstart(dentry);
12196+ if (!cpg.bsrc)
12197+ goto out;
12198+
12199+ sb = dentry->d_sb;
12200+ sbinfo = au_sbi(sb);
12201+ fhsm = &sbinfo->si_fhsm;
12202+ pid = au_fhsm_pid(fhsm);
12203+ if (pid
12204+ && (current->pid == pid
12205+ || current->real_parent->pid == pid))
12206+ goto out;
12207+
12208+ br = au_sbr(sb, cpg.bsrc);
12209+ cmoo = au_br_cmoo(br->br_perm);
12210+ if (!cmoo)
12211+ goto out;
7e9cd9fe 12212+ if (!d_is_reg(dentry))
076b876e
AM
12213+ cmoo &= AuBrAttr_COO_ALL;
12214+ if (!cmoo)
12215+ goto out;
12216+
12217+ parent = dget_parent(dentry);
12218+ di_write_lock_parent(parent);
12219+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12220+ cpg.bdst = err;
12221+ if (unlikely(err < 0)) {
12222+ err = 0; /* there is no upper writable branch */
12223+ goto out_dgrade;
12224+ }
12225+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12226+
12227+ /* do not respect the coo attrib for the target branch */
12228+ err = au_cpup_dirs(dentry, cpg.bdst);
12229+ if (unlikely(err))
12230+ goto out_dgrade;
12231+
12232+ di_downgrade_lock(parent, AuLock_IR);
12233+ udba = au_opt_udba(sb);
12234+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12235+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12236+ if (unlikely(err))
12237+ goto out_parent;
12238+
12239+ err = au_sio_cpup_simple(&cpg);
12240+ au_unpin(&pin);
12241+ if (unlikely(err))
12242+ goto out_parent;
12243+ if (!(cmoo & AuBrWAttr_MOO))
12244+ goto out_parent; /* success */
12245+
12246+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12247+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12248+ if (unlikely(err))
12249+ goto out_parent;
12250+
12251+ h_path.mnt = au_br_mnt(br);
12252+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
5527c038 12253+ hdir = au_hi(d_inode(parent), cpg.bsrc);
076b876e
AM
12254+ delegated = NULL;
12255+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12256+ au_unpin(&pin);
12257+ /* todo: keep h_dentry or not? */
12258+ if (unlikely(err == -EWOULDBLOCK)) {
12259+ pr_warn("cannot retry for NFSv4 delegation"
12260+ " for an internal unlink\n");
12261+ iput(delegated);
12262+ }
12263+ if (unlikely(err)) {
12264+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12265+ dentry, err);
12266+ err = 0;
12267+ }
12268+ goto out_parent; /* success */
12269+
12270+out_dgrade:
12271+ di_downgrade_lock(parent, AuLock_IR);
12272+out_parent:
12273+ di_read_unlock(parent, AuLock_IR);
12274+ dput(parent);
12275+out:
12276+ AuTraceErr(err);
12277+ return err;
12278+}
12279+
b912730e 12280+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 12281+{
b912730e 12282+ int err, no_lock = args->no_lock;
1facf9fc 12283+ struct dentry *dentry;
076b876e 12284+ struct au_finfo *finfo;
1308ab2a 12285+
b912730e
AM
12286+ if (!no_lock)
12287+ err = au_finfo_init(file, args->fidir);
12288+ else {
12289+ lockdep_off();
12290+ err = au_finfo_init(file, args->fidir);
12291+ lockdep_on();
12292+ }
4a4d8108
AM
12293+ if (unlikely(err))
12294+ goto out;
1facf9fc 12295+
2000de60 12296+ dentry = file->f_path.dentry;
b912730e
AM
12297+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12298+ if (!no_lock) {
12299+ di_write_lock_child(dentry);
12300+ err = au_cmoo(dentry);
12301+ di_downgrade_lock(dentry, AuLock_IR);
12302+ if (!err)
12303+ err = args->open(file, vfsub_file_flags(file), NULL);
12304+ di_read_unlock(dentry, AuLock_IR);
12305+ } else {
12306+ err = au_cmoo(dentry);
12307+ if (!err)
12308+ err = args->open(file, vfsub_file_flags(file),
12309+ args->h_file);
12310+ if (!err && au_fbstart(file) != au_dbstart(dentry))
12311+ /*
12312+ * cmoo happens after h_file was opened.
12313+ * need to refresh file later.
12314+ */
12315+ atomic_dec(&au_fi(file)->fi_generation);
12316+ }
1facf9fc 12317+
076b876e
AM
12318+ finfo = au_fi(file);
12319+ if (!err) {
12320+ finfo->fi_file = file;
12321+ au_sphl_add(&finfo->fi_hlist,
2000de60 12322+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
076b876e 12323+ }
b912730e
AM
12324+ if (!no_lock)
12325+ fi_write_unlock(file);
12326+ else {
12327+ lockdep_off();
12328+ fi_write_unlock(file);
12329+ lockdep_on();
12330+ }
4a4d8108 12331+ if (unlikely(err)) {
076b876e 12332+ finfo->fi_hdir = NULL;
4a4d8108 12333+ au_finfo_fin(file);
1308ab2a 12334+ }
4a4d8108 12335+
4f0767ce 12336+out:
1308ab2a 12337+ return err;
12338+}
dece6358 12339+
4a4d8108 12340+int au_reopen_nondir(struct file *file)
1308ab2a 12341+{
4a4d8108
AM
12342+ int err;
12343+ aufs_bindex_t bstart;
12344+ struct dentry *dentry;
12345+ struct file *h_file, *h_file_tmp;
1308ab2a 12346+
2000de60 12347+ dentry = file->f_path.dentry;
4a4d8108
AM
12348+ bstart = au_dbstart(dentry);
12349+ h_file_tmp = NULL;
12350+ if (au_fbstart(file) == bstart) {
12351+ h_file = au_hf_top(file);
12352+ if (file->f_mode == h_file->f_mode)
12353+ return 0; /* success */
12354+ h_file_tmp = h_file;
12355+ get_file(h_file_tmp);
12356+ au_set_h_fptr(file, bstart, NULL);
12357+ }
12358+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
12359+ /*
12360+ * it can happen
12361+ * file exists on both of rw and ro
12362+ * open --> dbstart and fbstart are both 0
12363+ * prepend a branch as rw, "rw" become ro
12364+ * remove rw/file
12365+ * delete the top branch, "rw" becomes rw again
12366+ * --> dbstart is 1, fbstart is still 0
12367+ * write --> fbstart is 0 but dbstart is 1
12368+ */
12369+ /* AuDebugOn(au_fbstart(file) < bstart); */
1308ab2a 12370+
4a4d8108 12371+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
392086de 12372+ file, /*force_wr*/0);
4a4d8108 12373+ err = PTR_ERR(h_file);
86dc4139
AM
12374+ if (IS_ERR(h_file)) {
12375+ if (h_file_tmp) {
12376+ atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count);
12377+ au_set_h_fptr(file, bstart, h_file_tmp);
12378+ h_file_tmp = NULL;
12379+ }
4a4d8108 12380+ goto out; /* todo: close all? */
86dc4139 12381+ }
4a4d8108
AM
12382+
12383+ err = 0;
12384+ au_set_fbstart(file, bstart);
12385+ au_set_h_fptr(file, bstart, h_file);
12386+ au_update_figen(file);
12387+ /* todo: necessary? */
12388+ /* file->f_ra = h_file->f_ra; */
12389+
4f0767ce 12390+out:
4a4d8108
AM
12391+ if (h_file_tmp)
12392+ fput(h_file_tmp);
12393+ return err;
1facf9fc 12394+}
12395+
1308ab2a 12396+/* ---------------------------------------------------------------------- */
12397+
4a4d8108
AM
12398+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12399+ struct dentry *hi_wh)
1facf9fc 12400+{
4a4d8108
AM
12401+ int err;
12402+ aufs_bindex_t bstart;
12403+ struct au_dinfo *dinfo;
12404+ struct dentry *h_dentry;
12405+ struct au_hdentry *hdp;
1facf9fc 12406+
2000de60 12407+ dinfo = au_di(file->f_path.dentry);
4a4d8108 12408+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 12409+
4a4d8108
AM
12410+ bstart = dinfo->di_bstart;
12411+ dinfo->di_bstart = btgt;
12412+ hdp = dinfo->di_hdentry;
12413+ h_dentry = hdp[0 + btgt].hd_dentry;
12414+ hdp[0 + btgt].hd_dentry = hi_wh;
12415+ err = au_reopen_nondir(file);
12416+ hdp[0 + btgt].hd_dentry = h_dentry;
12417+ dinfo->di_bstart = bstart;
1facf9fc 12418+
1facf9fc 12419+ return err;
12420+}
12421+
4a4d8108 12422+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 12423+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 12424+{
4a4d8108 12425+ int err;
027c5e7a 12426+ struct inode *inode, *h_inode;
c2b27bf2
AM
12427+ struct dentry *h_dentry, *hi_wh;
12428+ struct au_cp_generic cpg = {
2000de60 12429+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12430+ .bdst = bcpup,
12431+ .bsrc = -1,
12432+ .len = len,
12433+ .pin = pin
12434+ };
1facf9fc 12435+
c2b27bf2 12436+ au_update_dbstart(cpg.dentry);
5527c038 12437+ inode = d_inode(cpg.dentry);
027c5e7a 12438+ h_inode = NULL;
c2b27bf2
AM
12439+ if (au_dbstart(cpg.dentry) <= bcpup
12440+ && au_dbend(cpg.dentry) >= bcpup) {
12441+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
5527c038
JR
12442+ if (h_dentry && d_is_positive(h_dentry))
12443+ h_inode = d_inode(h_dentry);
027c5e7a 12444+ }
4a4d8108 12445+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 12446+ if (!hi_wh && !h_inode)
c2b27bf2 12447+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
12448+ else
12449+ /* already copied-up after unlink */
12450+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 12451+
4a4d8108 12452+ if (!err
38d290e6
JR
12453+ && (inode->i_nlink > 1
12454+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
12455+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12456+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 12457+
dece6358 12458+ return err;
1facf9fc 12459+}
12460+
4a4d8108
AM
12461+/*
12462+ * prepare the @file for writing.
12463+ */
12464+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 12465+{
4a4d8108 12466+ int err;
c2b27bf2 12467+ aufs_bindex_t dbstart;
c1595e42 12468+ struct dentry *parent;
86dc4139 12469+ struct inode *inode;
1facf9fc 12470+ struct super_block *sb;
4a4d8108 12471+ struct file *h_file;
c2b27bf2 12472+ struct au_cp_generic cpg = {
2000de60 12473+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12474+ .bdst = -1,
12475+ .bsrc = -1,
12476+ .len = len,
12477+ .pin = pin,
12478+ .flags = AuCpup_DTIME
12479+ };
1facf9fc 12480+
c2b27bf2 12481+ sb = cpg.dentry->d_sb;
5527c038 12482+ inode = d_inode(cpg.dentry);
c2b27bf2
AM
12483+ cpg.bsrc = au_fbstart(file);
12484+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 12485+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
12486+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12487+ /*flags*/0);
1facf9fc 12488+ goto out;
4a4d8108 12489+ }
1facf9fc 12490+
027c5e7a 12491+ /* need to cpup or reopen */
c2b27bf2 12492+ parent = dget_parent(cpg.dentry);
4a4d8108 12493+ di_write_lock_parent(parent);
c2b27bf2
AM
12494+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12495+ cpg.bdst = err;
4a4d8108
AM
12496+ if (unlikely(err < 0))
12497+ goto out_dgrade;
12498+ err = 0;
12499+
c2b27bf2
AM
12500+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12501+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 12502+ if (unlikely(err))
4a4d8108
AM
12503+ goto out_dgrade;
12504+ }
12505+
c2b27bf2 12506+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
12507+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12508+ if (unlikely(err))
12509+ goto out_dgrade;
12510+
c2b27bf2 12511+ dbstart = au_dbstart(cpg.dentry);
c1595e42 12512+ if (dbstart <= cpg.bdst)
c2b27bf2 12513+ cpg.bsrc = cpg.bdst;
027c5e7a 12514+
c2b27bf2
AM
12515+ if (dbstart <= cpg.bdst /* just reopen */
12516+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 12517+ ) {
392086de 12518+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 12519+ if (IS_ERR(h_file))
027c5e7a 12520+ err = PTR_ERR(h_file);
86dc4139 12521+ else {
027c5e7a 12522+ di_downgrade_lock(parent, AuLock_IR);
c2b27bf2
AM
12523+ if (dbstart > cpg.bdst)
12524+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
12525+ if (!err)
12526+ err = au_reopen_nondir(file);
c2b27bf2 12527+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 12528+ }
027c5e7a
AM
12529+ } else { /* copyup as wh and reopen */
12530+ /*
12531+ * since writable hfsplus branch is not supported,
12532+ * h_open_pre/post() are unnecessary.
12533+ */
c2b27bf2 12534+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 12535+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 12536+ }
4a4d8108
AM
12537+
12538+ if (!err) {
12539+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12540+ goto out_dput; /* success */
12541+ }
12542+ au_unpin(pin);
12543+ goto out_unlock;
1facf9fc 12544+
4f0767ce 12545+out_dgrade:
4a4d8108 12546+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 12547+out_unlock:
4a4d8108 12548+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12549+out_dput:
4a4d8108 12550+ dput(parent);
4f0767ce 12551+out:
1facf9fc 12552+ return err;
12553+}
12554+
4a4d8108
AM
12555+/* ---------------------------------------------------------------------- */
12556+
12557+int au_do_flush(struct file *file, fl_owner_t id,
12558+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 12559+{
4a4d8108 12560+ int err;
1facf9fc 12561+ struct super_block *sb;
4a4d8108 12562+ struct inode *inode;
1facf9fc 12563+
c06a8ce3
AM
12564+ inode = file_inode(file);
12565+ sb = inode->i_sb;
4a4d8108
AM
12566+ si_noflush_read_lock(sb);
12567+ fi_read_lock(file);
b752ccd1 12568+ ii_read_lock_child(inode);
1facf9fc 12569+
4a4d8108
AM
12570+ err = flush(file, id);
12571+ au_cpup_attr_timesizes(inode);
1facf9fc 12572+
b752ccd1 12573+ ii_read_unlock(inode);
4a4d8108 12574+ fi_read_unlock(file);
1308ab2a 12575+ si_read_unlock(sb);
dece6358 12576+ return err;
1facf9fc 12577+}
12578+
4a4d8108
AM
12579+/* ---------------------------------------------------------------------- */
12580+
12581+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 12582+{
4a4d8108 12583+ int err;
4a4d8108
AM
12584+ struct au_pin pin;
12585+ struct au_finfo *finfo;
c2b27bf2 12586+ struct dentry *parent, *hi_wh;
4a4d8108 12587+ struct inode *inode;
1facf9fc 12588+ struct super_block *sb;
c2b27bf2 12589+ struct au_cp_generic cpg = {
2000de60 12590+ .dentry = file->f_path.dentry,
c2b27bf2
AM
12591+ .bdst = -1,
12592+ .bsrc = -1,
12593+ .len = -1,
12594+ .pin = &pin,
12595+ .flags = AuCpup_DTIME
12596+ };
1facf9fc 12597+
4a4d8108
AM
12598+ FiMustWriteLock(file);
12599+
12600+ err = 0;
12601+ finfo = au_fi(file);
c2b27bf2 12602+ sb = cpg.dentry->d_sb;
5527c038 12603+ inode = d_inode(cpg.dentry);
c2b27bf2
AM
12604+ cpg.bdst = au_ibstart(inode);
12605+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 12606+ goto out;
dece6358 12607+
c2b27bf2
AM
12608+ parent = dget_parent(cpg.dentry);
12609+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 12610+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
12611+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12612+ cpg.bdst = err;
4a4d8108
AM
12613+ di_read_unlock(parent, !AuLock_IR);
12614+ if (unlikely(err < 0))
12615+ goto out_parent;
12616+ err = 0;
1facf9fc 12617+ }
1facf9fc 12618+
4a4d8108 12619+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 12620+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
12621+ if (!S_ISDIR(inode->i_mode)
12622+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 12623+ && au_plink_test(inode)
c2b27bf2
AM
12624+ && !d_unhashed(cpg.dentry)
12625+ && cpg.bdst < au_dbstart(cpg.dentry)) {
12626+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
12627+ if (unlikely(err))
12628+ goto out_unlock;
12629+
12630+ /* always superio. */
c2b27bf2 12631+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 12632+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 12633+ if (!err) {
c2b27bf2 12634+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
12635+ au_unpin(&pin);
12636+ }
4a4d8108
AM
12637+ } else if (hi_wh) {
12638+ /* already copied-up after unlink */
c2b27bf2 12639+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
12640+ *need_reopen = 0;
12641+ }
1facf9fc 12642+
4f0767ce 12643+out_unlock:
4a4d8108 12644+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12645+out_parent:
4a4d8108 12646+ dput(parent);
4f0767ce 12647+out:
1308ab2a 12648+ return err;
dece6358 12649+}
1facf9fc 12650+
4a4d8108 12651+static void au_do_refresh_dir(struct file *file)
dece6358 12652+{
4a4d8108
AM
12653+ aufs_bindex_t bindex, bend, new_bindex, brid;
12654+ struct au_hfile *p, tmp, *q;
12655+ struct au_finfo *finfo;
1308ab2a 12656+ struct super_block *sb;
4a4d8108 12657+ struct au_fidir *fidir;
1facf9fc 12658+
4a4d8108 12659+ FiMustWriteLock(file);
1facf9fc 12660+
2000de60 12661+ sb = file->f_path.dentry->d_sb;
4a4d8108
AM
12662+ finfo = au_fi(file);
12663+ fidir = finfo->fi_hdir;
12664+ AuDebugOn(!fidir);
12665+ p = fidir->fd_hfile + finfo->fi_btop;
12666+ brid = p->hf_br->br_id;
12667+ bend = fidir->fd_bbot;
12668+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
12669+ if (!p->hf_file)
12670+ continue;
1308ab2a 12671+
4a4d8108
AM
12672+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12673+ if (new_bindex == bindex)
12674+ continue;
12675+ if (new_bindex < 0) {
12676+ au_set_h_fptr(file, bindex, NULL);
12677+ continue;
12678+ }
1308ab2a 12679+
4a4d8108
AM
12680+ /* swap two lower inode, and loop again */
12681+ q = fidir->fd_hfile + new_bindex;
12682+ tmp = *q;
12683+ *q = *p;
12684+ *p = tmp;
12685+ if (tmp.hf_file) {
12686+ bindex--;
12687+ p--;
12688+ }
12689+ }
1308ab2a 12690+
4a4d8108 12691+ p = fidir->fd_hfile;
2000de60 12692+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
4a4d8108
AM
12693+ bend = au_sbend(sb);
12694+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
12695+ finfo->fi_btop++, p++)
12696+ if (p->hf_file) {
c06a8ce3 12697+ if (file_inode(p->hf_file))
4a4d8108 12698+ break;
c1595e42 12699+ au_hfput(p, file);
4a4d8108
AM
12700+ }
12701+ } else {
12702+ bend = au_br_index(sb, brid);
12703+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
12704+ finfo->fi_btop++, p++)
12705+ if (p->hf_file)
12706+ au_hfput(p, file);
12707+ bend = au_sbend(sb);
12708+ }
1308ab2a 12709+
4a4d8108
AM
12710+ p = fidir->fd_hfile + bend;
12711+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
12712+ fidir->fd_bbot--, p--)
12713+ if (p->hf_file) {
c06a8ce3 12714+ if (file_inode(p->hf_file))
4a4d8108 12715+ break;
c1595e42 12716+ au_hfput(p, file);
4a4d8108
AM
12717+ }
12718+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 12719+}
12720+
4a4d8108
AM
12721+/*
12722+ * after branch manipulating, refresh the file.
12723+ */
12724+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 12725+{
4a4d8108
AM
12726+ int err, need_reopen;
12727+ aufs_bindex_t bend, bindex;
12728+ struct dentry *dentry;
1308ab2a 12729+ struct au_finfo *finfo;
4a4d8108 12730+ struct au_hfile *hfile;
1facf9fc 12731+
2000de60 12732+ dentry = file->f_path.dentry;
1308ab2a 12733+ finfo = au_fi(file);
4a4d8108
AM
12734+ if (!finfo->fi_hdir) {
12735+ hfile = &finfo->fi_htop;
12736+ AuDebugOn(!hfile->hf_file);
12737+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
12738+ AuDebugOn(bindex < 0);
12739+ if (bindex != finfo->fi_btop)
12740+ au_set_fbstart(file, bindex);
12741+ } else {
12742+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
12743+ if (unlikely(err))
12744+ goto out;
12745+ au_do_refresh_dir(file);
12746+ }
1facf9fc 12747+
4a4d8108
AM
12748+ err = 0;
12749+ need_reopen = 1;
12750+ if (!au_test_mmapped(file))
12751+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 12752+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
12753+ err = reopen(file);
12754+ if (!err) {
12755+ au_update_figen(file);
12756+ goto out; /* success */
12757+ }
12758+
12759+ /* error, close all lower files */
12760+ if (finfo->fi_hdir) {
12761+ bend = au_fbend_dir(file);
12762+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
12763+ au_set_h_fptr(file, bindex, NULL);
12764+ }
1facf9fc 12765+
4f0767ce 12766+out:
1facf9fc 12767+ return err;
12768+}
12769+
4a4d8108
AM
12770+/* common function to regular file and dir */
12771+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12772+ int wlock)
dece6358 12773+{
1308ab2a 12774+ int err;
4a4d8108
AM
12775+ unsigned int sigen, figen;
12776+ aufs_bindex_t bstart;
12777+ unsigned char pseudo_link;
12778+ struct dentry *dentry;
12779+ struct inode *inode;
1facf9fc 12780+
4a4d8108 12781+ err = 0;
2000de60 12782+ dentry = file->f_path.dentry;
5527c038 12783+ inode = d_inode(dentry);
4a4d8108
AM
12784+ sigen = au_sigen(dentry->d_sb);
12785+ fi_write_lock(file);
12786+ figen = au_figen(file);
12787+ di_write_lock_child(dentry);
12788+ bstart = au_dbstart(dentry);
12789+ pseudo_link = (bstart != au_ibstart(inode));
12790+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
12791+ if (!wlock) {
12792+ di_downgrade_lock(dentry, AuLock_IR);
12793+ fi_downgrade_lock(file);
12794+ }
12795+ goto out; /* success */
12796+ }
dece6358 12797+
4a4d8108 12798+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 12799+ if (au_digen_test(dentry, sigen)) {
4a4d8108 12800+ err = au_reval_dpath(dentry, sigen);
027c5e7a 12801+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 12802+ }
dece6358 12803+
027c5e7a
AM
12804+ if (!err)
12805+ err = refresh_file(file, reopen);
4a4d8108
AM
12806+ if (!err) {
12807+ if (!wlock) {
12808+ di_downgrade_lock(dentry, AuLock_IR);
12809+ fi_downgrade_lock(file);
12810+ }
12811+ } else {
12812+ di_write_unlock(dentry);
12813+ fi_write_unlock(file);
12814+ }
1facf9fc 12815+
4f0767ce 12816+out:
1308ab2a 12817+ return err;
12818+}
1facf9fc 12819+
4a4d8108
AM
12820+/* ---------------------------------------------------------------------- */
12821+
12822+/* cf. aufs_nopage() */
12823+/* for madvise(2) */
12824+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 12825+{
4a4d8108
AM
12826+ unlock_page(page);
12827+ return 0;
12828+}
1facf9fc 12829+
4a4d8108 12830+/* it will never be called, but necessary to support O_DIRECT */
5527c038
JR
12831+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
12832+ loff_t offset)
4a4d8108 12833+{ BUG(); return 0; }
1facf9fc 12834+
4a4d8108
AM
12835+/* they will never be called. */
12836+#ifdef CONFIG_AUFS_DEBUG
12837+static int aufs_write_begin(struct file *file, struct address_space *mapping,
12838+ loff_t pos, unsigned len, unsigned flags,
12839+ struct page **pagep, void **fsdata)
12840+{ AuUnsupport(); return 0; }
12841+static int aufs_write_end(struct file *file, struct address_space *mapping,
12842+ loff_t pos, unsigned len, unsigned copied,
12843+ struct page *page, void *fsdata)
12844+{ AuUnsupport(); return 0; }
12845+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
12846+{ AuUnsupport(); return 0; }
1308ab2a 12847+
4a4d8108
AM
12848+static int aufs_set_page_dirty(struct page *page)
12849+{ AuUnsupport(); return 0; }
392086de
AM
12850+static void aufs_invalidatepage(struct page *page, unsigned int offset,
12851+ unsigned int length)
4a4d8108
AM
12852+{ AuUnsupport(); }
12853+static int aufs_releasepage(struct page *page, gfp_t gfp)
12854+{ AuUnsupport(); return 0; }
79b8bda9 12855+#if 0 /* called by memory compaction regardless file */
4a4d8108 12856+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 12857+ struct page *page, enum migrate_mode mode)
4a4d8108 12858+{ AuUnsupport(); return 0; }
79b8bda9 12859+#endif
4a4d8108
AM
12860+static int aufs_launder_page(struct page *page)
12861+{ AuUnsupport(); return 0; }
12862+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
12863+ unsigned long from,
12864+ unsigned long count)
4a4d8108 12865+{ AuUnsupport(); return 0; }
392086de
AM
12866+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
12867+ bool *writeback)
12868+{ AuUnsupport(); }
4a4d8108
AM
12869+static int aufs_error_remove_page(struct address_space *mapping,
12870+ struct page *page)
12871+{ AuUnsupport(); return 0; }
b4510431
AM
12872+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
12873+ sector_t *span)
12874+{ AuUnsupport(); return 0; }
12875+static void aufs_swap_deactivate(struct file *file)
12876+{ AuUnsupport(); }
4a4d8108
AM
12877+#endif /* CONFIG_AUFS_DEBUG */
12878+
12879+const struct address_space_operations aufs_aop = {
12880+ .readpage = aufs_readpage,
12881+ .direct_IO = aufs_direct_IO,
4a4d8108
AM
12882+#ifdef CONFIG_AUFS_DEBUG
12883+ .writepage = aufs_writepage,
4a4d8108
AM
12884+ /* no writepages, because of writepage */
12885+ .set_page_dirty = aufs_set_page_dirty,
12886+ /* no readpages, because of readpage */
12887+ .write_begin = aufs_write_begin,
12888+ .write_end = aufs_write_end,
12889+ /* no bmap, no block device */
12890+ .invalidatepage = aufs_invalidatepage,
12891+ .releasepage = aufs_releasepage,
79b8bda9
AM
12892+ /* is fallback_migrate_page ok? */
12893+ /* .migratepage = aufs_migratepage, */
4a4d8108
AM
12894+ .launder_page = aufs_launder_page,
12895+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 12896+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
12897+ .error_remove_page = aufs_error_remove_page,
12898+ .swap_activate = aufs_swap_activate,
12899+ .swap_deactivate = aufs_swap_deactivate
4a4d8108 12900+#endif /* CONFIG_AUFS_DEBUG */
dece6358 12901+};
7f207e10
AM
12902diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
12903--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 12904+++ linux/fs/aufs/file.h 2016-02-28 11:26:32.573304539 +0100
b912730e 12905@@ -0,0 +1,291 @@
4a4d8108 12906+/*
8cdd5066 12907+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
12908+ *
12909+ * This program, aufs is free software; you can redistribute it and/or modify
12910+ * it under the terms of the GNU General Public License as published by
12911+ * the Free Software Foundation; either version 2 of the License, or
12912+ * (at your option) any later version.
12913+ *
12914+ * This program is distributed in the hope that it will be useful,
12915+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12916+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12917+ * GNU General Public License for more details.
12918+ *
12919+ * You should have received a copy of the GNU General Public License
523b37e3 12920+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 12921+ */
1facf9fc 12922+
4a4d8108
AM
12923+/*
12924+ * file operations
12925+ */
1facf9fc 12926+
4a4d8108
AM
12927+#ifndef __AUFS_FILE_H__
12928+#define __AUFS_FILE_H__
1facf9fc 12929+
4a4d8108 12930+#ifdef __KERNEL__
1facf9fc 12931+
2cbb1c4b 12932+#include <linux/file.h>
4a4d8108
AM
12933+#include <linux/fs.h>
12934+#include <linux/poll.h>
4a4d8108 12935+#include "rwsem.h"
1facf9fc 12936+
4a4d8108
AM
12937+struct au_branch;
12938+struct au_hfile {
12939+ struct file *hf_file;
12940+ struct au_branch *hf_br;
12941+};
1facf9fc 12942+
4a4d8108
AM
12943+struct au_vdir;
12944+struct au_fidir {
12945+ aufs_bindex_t fd_bbot;
12946+ aufs_bindex_t fd_nent;
12947+ struct au_vdir *fd_vdir_cache;
12948+ struct au_hfile fd_hfile[];
12949+};
1facf9fc 12950+
4a4d8108 12951+static inline int au_fidir_sz(int nent)
dece6358 12952+{
4f0767ce
JR
12953+ AuDebugOn(nent < 0);
12954+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 12955+}
1facf9fc 12956+
4a4d8108
AM
12957+struct au_finfo {
12958+ atomic_t fi_generation;
dece6358 12959+
4a4d8108
AM
12960+ struct au_rwsem fi_rwsem;
12961+ aufs_bindex_t fi_btop;
12962+
12963+ /* do not union them */
12964+ struct { /* for non-dir */
12965+ struct au_hfile fi_htop;
2cbb1c4b 12966+ atomic_t fi_mmapped;
4a4d8108
AM
12967+ };
12968+ struct au_fidir *fi_hdir; /* for dir only */
523b37e3
AM
12969+
12970+ struct hlist_node fi_hlist;
12971+ struct file *fi_file; /* very ugly */
4a4d8108 12972+} ____cacheline_aligned_in_smp;
1facf9fc 12973+
4a4d8108 12974+/* ---------------------------------------------------------------------- */
1facf9fc 12975+
4a4d8108
AM
12976+/* file.c */
12977+extern const struct address_space_operations aufs_aop;
12978+unsigned int au_file_roflags(unsigned int flags);
12979+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12980+ struct file *file, int force_wr);
b912730e
AM
12981+struct au_do_open_args {
12982+ int no_lock;
12983+ int (*open)(struct file *file, int flags,
12984+ struct file *h_file);
12985+ struct au_fidir *fidir;
12986+ struct file *h_file;
12987+};
12988+int au_do_open(struct file *file, struct au_do_open_args *args);
4a4d8108
AM
12989+int au_reopen_nondir(struct file *file);
12990+struct au_pin;
12991+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
12992+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12993+ int wlock);
12994+int au_do_flush(struct file *file, fl_owner_t id,
12995+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 12996+
4a4d8108
AM
12997+/* poll.c */
12998+#ifdef CONFIG_AUFS_POLL
12999+unsigned int aufs_poll(struct file *file, poll_table *wait);
13000+#endif
1facf9fc 13001+
4a4d8108
AM
13002+#ifdef CONFIG_AUFS_BR_HFSPLUS
13003+/* hfsplus.c */
392086de
AM
13004+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
13005+ int force_wr);
4a4d8108
AM
13006+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
13007+ struct file *h_file);
13008+#else
c1595e42
JR
13009+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
13010+ aufs_bindex_t bindex, int force_wr)
4a4d8108
AM
13011+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
13012+ struct file *h_file);
13013+#endif
1facf9fc 13014+
4a4d8108
AM
13015+/* f_op.c */
13016+extern const struct file_operations aufs_file_fop;
b912730e 13017+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
4a4d8108 13018+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
b912730e 13019+struct file *au_read_pre(struct file *file, int keep_fi);
4a4d8108 13020+
4a4d8108
AM
13021+/* finfo.c */
13022+void au_hfput(struct au_hfile *hf, struct file *file);
13023+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
13024+ struct file *h_file);
1facf9fc 13025+
4a4d8108 13026+void au_update_figen(struct file *file);
4a4d8108
AM
13027+struct au_fidir *au_fidir_alloc(struct super_block *sb);
13028+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 13029+
4a4d8108
AM
13030+void au_fi_init_once(void *_fi);
13031+void au_finfo_fin(struct file *file);
13032+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 13033+
4a4d8108
AM
13034+/* ioctl.c */
13035+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
13036+#ifdef CONFIG_COMPAT
13037+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
13038+ unsigned long arg);
c2b27bf2
AM
13039+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
13040+ unsigned long arg);
b752ccd1 13041+#endif
1facf9fc 13042+
4a4d8108 13043+/* ---------------------------------------------------------------------- */
1facf9fc 13044+
4a4d8108
AM
13045+static inline struct au_finfo *au_fi(struct file *file)
13046+{
38d290e6 13047+ return file->private_data;
4a4d8108 13048+}
1facf9fc 13049+
4a4d8108 13050+/* ---------------------------------------------------------------------- */
1facf9fc 13051+
4a4d8108
AM
13052+/*
13053+ * fi_read_lock, fi_write_lock,
13054+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
13055+ */
13056+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 13057+
4a4d8108
AM
13058+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
13059+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
13060+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 13061+
1308ab2a 13062+/* ---------------------------------------------------------------------- */
13063+
4a4d8108
AM
13064+/* todo: hard/soft set? */
13065+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 13066+{
4a4d8108
AM
13067+ FiMustAnyLock(file);
13068+ return au_fi(file)->fi_btop;
13069+}
dece6358 13070+
4a4d8108
AM
13071+static inline aufs_bindex_t au_fbend_dir(struct file *file)
13072+{
13073+ FiMustAnyLock(file);
13074+ AuDebugOn(!au_fi(file)->fi_hdir);
13075+ return au_fi(file)->fi_hdir->fd_bbot;
13076+}
1facf9fc 13077+
4a4d8108
AM
13078+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13079+{
13080+ FiMustAnyLock(file);
13081+ AuDebugOn(!au_fi(file)->fi_hdir);
13082+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13083+}
1facf9fc 13084+
4a4d8108
AM
13085+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
13086+{
13087+ FiMustWriteLock(file);
13088+ au_fi(file)->fi_btop = bindex;
13089+}
1facf9fc 13090+
4a4d8108
AM
13091+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
13092+{
13093+ FiMustWriteLock(file);
13094+ AuDebugOn(!au_fi(file)->fi_hdir);
13095+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13096+}
1308ab2a 13097+
4a4d8108
AM
13098+static inline void au_set_fvdir_cache(struct file *file,
13099+ struct au_vdir *vdir_cache)
13100+{
13101+ FiMustWriteLock(file);
13102+ AuDebugOn(!au_fi(file)->fi_hdir);
13103+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13104+}
dece6358 13105+
4a4d8108
AM
13106+static inline struct file *au_hf_top(struct file *file)
13107+{
13108+ FiMustAnyLock(file);
13109+ AuDebugOn(au_fi(file)->fi_hdir);
13110+ return au_fi(file)->fi_htop.hf_file;
13111+}
1facf9fc 13112+
4a4d8108
AM
13113+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13114+{
13115+ FiMustAnyLock(file);
13116+ AuDebugOn(!au_fi(file)->fi_hdir);
13117+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
13118+}
13119+
4a4d8108
AM
13120+/* todo: memory barrier? */
13121+static inline unsigned int au_figen(struct file *f)
dece6358 13122+{
4a4d8108
AM
13123+ return atomic_read(&au_fi(f)->fi_generation);
13124+}
dece6358 13125+
2cbb1c4b
JR
13126+static inline void au_set_mmapped(struct file *f)
13127+{
13128+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13129+ return;
0c3ec466 13130+ pr_warn("fi_mmapped wrapped around\n");
2cbb1c4b
JR
13131+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13132+ ;
13133+}
13134+
13135+static inline void au_unset_mmapped(struct file *f)
13136+{
13137+ atomic_dec(&au_fi(f)->fi_mmapped);
13138+}
13139+
4a4d8108
AM
13140+static inline int au_test_mmapped(struct file *f)
13141+{
2cbb1c4b
JR
13142+ return atomic_read(&au_fi(f)->fi_mmapped);
13143+}
13144+
13145+/* customize vma->vm_file */
13146+
13147+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13148+ struct file *file)
13149+{
53392da6
AM
13150+ struct file *f;
13151+
13152+ f = vma->vm_file;
2cbb1c4b
JR
13153+ get_file(file);
13154+ vma->vm_file = file;
53392da6 13155+ fput(f);
2cbb1c4b
JR
13156+}
13157+
13158+#ifdef CONFIG_MMU
13159+#define AuDbgVmRegion(file, vma) do {} while (0)
13160+
13161+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13162+ struct file *file)
13163+{
13164+ au_do_vm_file_reset(vma, file);
13165+}
13166+#else
13167+#define AuDbgVmRegion(file, vma) \
13168+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13169+
13170+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13171+ struct file *file)
13172+{
53392da6
AM
13173+ struct file *f;
13174+
2cbb1c4b 13175+ au_do_vm_file_reset(vma, file);
53392da6 13176+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
13177+ get_file(file);
13178+ vma->vm_region->vm_file = file;
53392da6 13179+ fput(f);
2cbb1c4b
JR
13180+}
13181+#endif /* CONFIG_MMU */
13182+
13183+/* handle vma->vm_prfile */
fb47a38f 13184+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
2cbb1c4b
JR
13185+ struct file *file)
13186+{
2cbb1c4b
JR
13187+ get_file(file);
13188+ vma->vm_prfile = file;
13189+#ifndef CONFIG_MMU
13190+ get_file(file);
13191+ vma->vm_region->vm_prfile = file;
13192+#endif
fb47a38f 13193+}
1308ab2a 13194+
4a4d8108
AM
13195+#endif /* __KERNEL__ */
13196+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
13197diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13198--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 13199+++ linux/fs/aufs/finfo.c 2016-02-28 11:26:32.573304539 +0100
be52b249 13200@@ -0,0 +1,156 @@
4a4d8108 13201+/*
8cdd5066 13202+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
13203+ *
13204+ * This program, aufs is free software; you can redistribute it and/or modify
13205+ * it under the terms of the GNU General Public License as published by
13206+ * the Free Software Foundation; either version 2 of the License, or
13207+ * (at your option) any later version.
13208+ *
13209+ * This program is distributed in the hope that it will be useful,
13210+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13211+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13212+ * GNU General Public License for more details.
13213+ *
13214+ * You should have received a copy of the GNU General Public License
523b37e3 13215+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13216+ */
1308ab2a 13217+
4a4d8108
AM
13218+/*
13219+ * file private data
13220+ */
1facf9fc 13221+
4a4d8108 13222+#include "aufs.h"
1facf9fc 13223+
4a4d8108
AM
13224+void au_hfput(struct au_hfile *hf, struct file *file)
13225+{
13226+ /* todo: direct access f_flags */
2cbb1c4b 13227+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
13228+ allow_write_access(hf->hf_file);
13229+ fput(hf->hf_file);
13230+ hf->hf_file = NULL;
e49829fe 13231+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
13232+ hf->hf_br = NULL;
13233+}
1facf9fc 13234+
4a4d8108
AM
13235+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13236+{
13237+ struct au_finfo *finfo = au_fi(file);
13238+ struct au_hfile *hf;
13239+ struct au_fidir *fidir;
13240+
13241+ fidir = finfo->fi_hdir;
13242+ if (!fidir) {
13243+ AuDebugOn(finfo->fi_btop != bindex);
13244+ hf = &finfo->fi_htop;
13245+ } else
13246+ hf = fidir->fd_hfile + bindex;
13247+
13248+ if (hf && hf->hf_file)
13249+ au_hfput(hf, file);
13250+ if (val) {
13251+ FiMustWriteLock(file);
b912730e 13252+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
4a4d8108 13253+ hf->hf_file = val;
2000de60 13254+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
1308ab2a 13255+ }
4a4d8108 13256+}
1facf9fc 13257+
4a4d8108
AM
13258+void au_update_figen(struct file *file)
13259+{
2000de60 13260+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
4a4d8108 13261+ /* smp_mb(); */ /* atomic_set */
1facf9fc 13262+}
13263+
4a4d8108
AM
13264+/* ---------------------------------------------------------------------- */
13265+
4a4d8108
AM
13266+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13267+{
13268+ struct au_fidir *fidir;
13269+ int nbr;
13270+
13271+ nbr = au_sbend(sb) + 1;
13272+ if (nbr < 2)
13273+ nbr = 2; /* initial allocate for 2 branches */
13274+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13275+ if (fidir) {
13276+ fidir->fd_bbot = -1;
13277+ fidir->fd_nent = nbr;
4a4d8108
AM
13278+ }
13279+
13280+ return fidir;
13281+}
13282+
13283+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
13284+{
13285+ int err;
13286+ struct au_fidir *fidir, *p;
13287+
13288+ AuRwMustWriteLock(&finfo->fi_rwsem);
13289+ fidir = finfo->fi_hdir;
13290+ AuDebugOn(!fidir);
13291+
13292+ err = -ENOMEM;
13293+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
13294+ GFP_NOFS);
13295+ if (p) {
13296+ p->fd_nent = nbr;
13297+ finfo->fi_hdir = p;
13298+ err = 0;
13299+ }
1facf9fc 13300+
dece6358 13301+ return err;
1facf9fc 13302+}
1308ab2a 13303+
13304+/* ---------------------------------------------------------------------- */
13305+
4a4d8108 13306+void au_finfo_fin(struct file *file)
1308ab2a 13307+{
4a4d8108
AM
13308+ struct au_finfo *finfo;
13309+
2000de60 13310+ au_nfiles_dec(file->f_path.dentry->d_sb);
7f207e10 13311+
4a4d8108
AM
13312+ finfo = au_fi(file);
13313+ AuDebugOn(finfo->fi_hdir);
13314+ AuRwDestroy(&finfo->fi_rwsem);
13315+ au_cache_free_finfo(finfo);
1308ab2a 13316+}
1308ab2a 13317+
e49829fe 13318+void au_fi_init_once(void *_finfo)
4a4d8108 13319+{
e49829fe 13320+ struct au_finfo *finfo = _finfo;
2cbb1c4b 13321+ static struct lock_class_key aufs_fi;
1308ab2a 13322+
e49829fe
JR
13323+ au_rw_init(&finfo->fi_rwsem);
13324+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 13325+}
1308ab2a 13326+
4a4d8108
AM
13327+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13328+{
1716fcea 13329+ int err;
4a4d8108
AM
13330+ struct au_finfo *finfo;
13331+ struct dentry *dentry;
13332+
13333+ err = -ENOMEM;
2000de60 13334+ dentry = file->f_path.dentry;
4a4d8108
AM
13335+ finfo = au_cache_alloc_finfo();
13336+ if (unlikely(!finfo))
13337+ goto out;
13338+
13339+ err = 0;
7f207e10 13340+ au_nfiles_inc(dentry->d_sb);
1716fcea
AM
13341+ /* verbose coding for lock class name */
13342+ if (!fidir)
13343+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO);
13344+ else
13345+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO);
4a4d8108
AM
13346+ au_rw_write_lock(&finfo->fi_rwsem);
13347+ finfo->fi_btop = -1;
13348+ finfo->fi_hdir = fidir;
13349+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13350+ /* smp_mb(); */ /* atomic_set */
13351+
13352+ file->private_data = finfo;
13353+
13354+out:
13355+ return err;
13356+}
7f207e10
AM
13357diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13358--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
13359+++ linux/fs/aufs/f_op.c 2016-02-28 11:26:32.569971135 +0100
13360@@ -0,0 +1,748 @@
dece6358 13361+/*
8cdd5066 13362+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
13363+ *
13364+ * This program, aufs is free software; you can redistribute it and/or modify
13365+ * it under the terms of the GNU General Public License as published by
13366+ * the Free Software Foundation; either version 2 of the License, or
13367+ * (at your option) any later version.
13368+ *
13369+ * This program is distributed in the hope that it will be useful,
13370+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13371+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13372+ * GNU General Public License for more details.
13373+ *
13374+ * You should have received a copy of the GNU General Public License
523b37e3 13375+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 13376+ */
1facf9fc 13377+
13378+/*
4a4d8108 13379+ * file and vm operations
1facf9fc 13380+ */
dece6358 13381+
86dc4139 13382+#include <linux/aio.h>
4a4d8108
AM
13383+#include <linux/fs_stack.h>
13384+#include <linux/mman.h>
4a4d8108 13385+#include <linux/security.h>
dece6358
AM
13386+#include "aufs.h"
13387+
b912730e 13388+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
1facf9fc 13389+{
4a4d8108
AM
13390+ int err;
13391+ aufs_bindex_t bindex;
8cdd5066 13392+ struct dentry *dentry, *h_dentry;
4a4d8108 13393+ struct au_finfo *finfo;
38d290e6 13394+ struct inode *h_inode;
4a4d8108
AM
13395+
13396+ FiMustWriteLock(file);
13397+
523b37e3 13398+ err = 0;
2000de60 13399+ dentry = file->f_path.dentry;
b912730e 13400+ AuDebugOn(IS_ERR_OR_NULL(dentry));
4a4d8108
AM
13401+ finfo = au_fi(file);
13402+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 13403+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108 13404+ bindex = au_dbstart(dentry);
8cdd5066
JR
13405+ if (!h_file) {
13406+ h_dentry = au_h_dptr(dentry, bindex);
13407+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13408+ if (unlikely(err))
13409+ goto out;
b912730e 13410+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
8cdd5066
JR
13411+ } else {
13412+ h_dentry = h_file->f_path.dentry;
13413+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13414+ if (unlikely(err))
13415+ goto out;
b912730e 13416+ get_file(h_file);
8cdd5066 13417+ }
4a4d8108
AM
13418+ if (IS_ERR(h_file))
13419+ err = PTR_ERR(h_file);
13420+ else {
38d290e6
JR
13421+ if ((flags & __O_TMPFILE)
13422+ && !(flags & O_EXCL)) {
13423+ h_inode = file_inode(h_file);
13424+ spin_lock(&h_inode->i_lock);
13425+ h_inode->i_state |= I_LINKABLE;
13426+ spin_unlock(&h_inode->i_lock);
13427+ }
4a4d8108
AM
13428+ au_set_fbstart(file, bindex);
13429+ au_set_h_fptr(file, bindex, h_file);
13430+ au_update_figen(file);
13431+ /* todo: necessary? */
13432+ /* file->f_ra = h_file->f_ra; */
13433+ }
027c5e7a 13434+
8cdd5066 13435+out:
4a4d8108 13436+ return err;
1facf9fc 13437+}
13438+
4a4d8108
AM
13439+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13440+ struct file *file)
1facf9fc 13441+{
4a4d8108 13442+ int err;
1308ab2a 13443+ struct super_block *sb;
b912730e
AM
13444+ struct au_do_open_args args = {
13445+ .open = au_do_open_nondir
13446+ };
1facf9fc 13447+
523b37e3
AM
13448+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13449+ file, vfsub_file_flags(file), file->f_mode);
1facf9fc 13450+
2000de60 13451+ sb = file->f_path.dentry->d_sb;
4a4d8108 13452+ si_read_lock(sb, AuLock_FLUSH);
b912730e 13453+ err = au_do_open(file, &args);
4a4d8108
AM
13454+ si_read_unlock(sb);
13455+ return err;
13456+}
1facf9fc 13457+
4a4d8108
AM
13458+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13459+{
13460+ struct au_finfo *finfo;
13461+ aufs_bindex_t bindex;
1facf9fc 13462+
4a4d8108 13463+ finfo = au_fi(file);
2000de60
JR
13464+ au_sphl_del(&finfo->fi_hlist,
13465+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
4a4d8108 13466+ bindex = finfo->fi_btop;
b4510431 13467+ if (bindex >= 0)
4a4d8108 13468+ au_set_h_fptr(file, bindex, NULL);
7f207e10 13469+
4a4d8108
AM
13470+ au_finfo_fin(file);
13471+ return 0;
1facf9fc 13472+}
13473+
4a4d8108
AM
13474+/* ---------------------------------------------------------------------- */
13475+
13476+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 13477+{
1308ab2a 13478+ int err;
4a4d8108
AM
13479+ struct file *h_file;
13480+
13481+ err = 0;
13482+ h_file = au_hf_top(file);
13483+ if (h_file)
13484+ err = vfsub_flush(h_file, id);
13485+ return err;
13486+}
13487+
13488+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13489+{
13490+ return au_do_flush(file, id, au_do_flush_nondir);
13491+}
13492+
13493+/* ---------------------------------------------------------------------- */
9dbd164d
AM
13494+/*
13495+ * read and write functions acquire [fdi]_rwsem once, but release before
13496+ * mmap_sem. This is because to stop a race condition between mmap(2).
13497+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13498+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13499+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13500+ */
4a4d8108 13501+
b912730e
AM
13502+/* Callers should call au_read_post() or fput() in the end */
13503+struct file *au_read_pre(struct file *file, int keep_fi)
4a4d8108 13504+{
4a4d8108 13505+ struct file *h_file;
b912730e 13506+ int err;
1facf9fc 13507+
4a4d8108 13508+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
b912730e
AM
13509+ if (!err) {
13510+ di_read_unlock(file->f_path.dentry, AuLock_IR);
13511+ h_file = au_hf_top(file);
13512+ get_file(h_file);
13513+ if (!keep_fi)
13514+ fi_read_unlock(file);
13515+ } else
13516+ h_file = ERR_PTR(err);
13517+
13518+ return h_file;
13519+}
13520+
13521+static void au_read_post(struct inode *inode, struct file *h_file)
13522+{
13523+ /* update without lock, I don't think it a problem */
13524+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13525+ fput(h_file);
13526+}
13527+
13528+struct au_write_pre {
13529+ blkcnt_t blks;
13530+ aufs_bindex_t bstart;
13531+};
13532+
13533+/*
13534+ * return with iinfo is write-locked
13535+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13536+ * end
13537+ */
13538+static struct file *au_write_pre(struct file *file, int do_ready,
13539+ struct au_write_pre *wpre)
13540+{
13541+ struct file *h_file;
13542+ struct dentry *dentry;
13543+ int err;
13544+ struct au_pin pin;
13545+
13546+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13547+ h_file = ERR_PTR(err);
dece6358
AM
13548+ if (unlikely(err))
13549+ goto out;
1facf9fc 13550+
b912730e
AM
13551+ dentry = file->f_path.dentry;
13552+ if (do_ready) {
13553+ err = au_ready_to_write(file, -1, &pin);
13554+ if (unlikely(err)) {
13555+ h_file = ERR_PTR(err);
13556+ di_write_unlock(dentry);
13557+ goto out_fi;
13558+ }
13559+ }
13560+
13561+ di_downgrade_lock(dentry, /*flags*/0);
13562+ if (wpre)
13563+ wpre->bstart = au_fbstart(file);
4a4d8108 13564+ h_file = au_hf_top(file);
9dbd164d 13565+ get_file(h_file);
b912730e
AM
13566+ if (wpre)
13567+ wpre->blks = file_inode(h_file)->i_blocks;
13568+ if (do_ready)
13569+ au_unpin(&pin);
13570+ di_read_unlock(dentry, /*flags*/0);
13571+
13572+out_fi:
13573+ fi_write_unlock(file);
13574+out:
13575+ return h_file;
13576+}
13577+
13578+static void au_write_post(struct inode *inode, struct file *h_file,
13579+ struct au_write_pre *wpre, ssize_t written)
13580+{
13581+ struct inode *h_inode;
13582+
13583+ au_cpup_attr_timesizes(inode);
13584+ AuDebugOn(au_ibstart(inode) != wpre->bstart);
13585+ h_inode = file_inode(h_file);
13586+ inode->i_mode = h_inode->i_mode;
13587+ ii_write_unlock(inode);
13588+ fput(h_file);
13589+
13590+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13591+ if (written > 0)
13592+ au_fhsm_wrote(inode->i_sb, wpre->bstart,
13593+ /*force*/h_inode->i_blocks > wpre->blks);
13594+}
13595+
13596+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13597+ loff_t *ppos)
13598+{
13599+ ssize_t err;
13600+ struct inode *inode;
13601+ struct file *h_file;
13602+ struct super_block *sb;
13603+
13604+ inode = file_inode(file);
13605+ sb = inode->i_sb;
13606+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13607+
13608+ h_file = au_read_pre(file, /*keep_fi*/0);
13609+ err = PTR_ERR(h_file);
13610+ if (IS_ERR(h_file))
13611+ goto out;
9dbd164d
AM
13612+
13613+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
13614+ err = vfsub_read_u(h_file, buf, count, ppos);
13615+ /* todo: necessary? */
13616+ /* file->f_ra = h_file->f_ra; */
b912730e 13617+ au_read_post(inode, h_file);
1308ab2a 13618+
4f0767ce 13619+out:
dece6358
AM
13620+ si_read_unlock(sb);
13621+ return err;
13622+}
1facf9fc 13623+
e49829fe
JR
13624+/*
13625+ * todo: very ugly
13626+ * it locks both of i_mutex and si_rwsem for read in safe.
13627+ * if the plink maintenance mode continues forever (that is the problem),
13628+ * may loop forever.
13629+ */
13630+static void au_mtx_and_read_lock(struct inode *inode)
13631+{
13632+ int err;
13633+ struct super_block *sb = inode->i_sb;
13634+
13635+ while (1) {
13636+ mutex_lock(&inode->i_mutex);
13637+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13638+ if (!err)
13639+ break;
13640+ mutex_unlock(&inode->i_mutex);
13641+ si_read_lock(sb, AuLock_NOPLMW);
13642+ si_read_unlock(sb);
13643+ }
13644+}
13645+
4a4d8108
AM
13646+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13647+ size_t count, loff_t *ppos)
dece6358 13648+{
4a4d8108 13649+ ssize_t err;
b912730e
AM
13650+ struct au_write_pre wpre;
13651+ struct inode *inode;
4a4d8108
AM
13652+ struct file *h_file;
13653+ char __user *buf = (char __user *)ubuf;
1facf9fc 13654+
b912730e 13655+ inode = file_inode(file);
e49829fe 13656+ au_mtx_and_read_lock(inode);
1facf9fc 13657+
b912730e
AM
13658+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13659+ err = PTR_ERR(h_file);
13660+ if (IS_ERR(h_file))
9dbd164d 13661+ goto out;
9dbd164d 13662+
4a4d8108 13663+ err = vfsub_write_u(h_file, buf, count, ppos);
b912730e 13664+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13665+
4f0767ce 13666+out:
b912730e 13667+ si_read_unlock(inode->i_sb);
4a4d8108 13668+ mutex_unlock(&inode->i_mutex);
dece6358
AM
13669+ return err;
13670+}
1facf9fc 13671+
076b876e
AM
13672+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13673+ struct iov_iter *iov_iter)
dece6358 13674+{
4a4d8108
AM
13675+ ssize_t err;
13676+ struct file *file;
076b876e 13677+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
1facf9fc 13678+
4a4d8108
AM
13679+ err = security_file_permission(h_file, rw);
13680+ if (unlikely(err))
13681+ goto out;
1facf9fc 13682+
4a4d8108 13683+ err = -ENOSYS;
076b876e 13684+ iter = NULL;
5527c038 13685+ if (rw == MAY_READ)
076b876e 13686+ iter = h_file->f_op->read_iter;
5527c038 13687+ else if (rw == MAY_WRITE)
076b876e 13688+ iter = h_file->f_op->write_iter;
076b876e
AM
13689+
13690+ file = kio->ki_filp;
13691+ kio->ki_filp = h_file;
13692+ if (iter) {
2cbb1c4b 13693+ lockdep_off();
076b876e
AM
13694+ err = iter(kio, iov_iter);
13695+ lockdep_on();
4a4d8108
AM
13696+ } else
13697+ /* currently there is no such fs */
13698+ WARN_ON_ONCE(1);
076b876e 13699+ kio->ki_filp = file;
1facf9fc 13700+
4f0767ce 13701+out:
dece6358
AM
13702+ return err;
13703+}
1facf9fc 13704+
076b876e 13705+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13706+{
4a4d8108
AM
13707+ ssize_t err;
13708+ struct file *file, *h_file;
b912730e 13709+ struct inode *inode;
dece6358 13710+ struct super_block *sb;
1facf9fc 13711+
4a4d8108 13712+ file = kio->ki_filp;
b912730e
AM
13713+ inode = file_inode(file);
13714+ sb = inode->i_sb;
e49829fe 13715+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13716+
b912730e
AM
13717+ h_file = au_read_pre(file, /*keep_fi*/0);
13718+ err = PTR_ERR(h_file);
13719+ if (IS_ERR(h_file))
13720+ goto out;
9dbd164d 13721+
076b876e 13722+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
4a4d8108
AM
13723+ /* todo: necessary? */
13724+ /* file->f_ra = h_file->f_ra; */
b912730e 13725+ au_read_post(inode, h_file);
1facf9fc 13726+
4f0767ce 13727+out:
4a4d8108 13728+ si_read_unlock(sb);
1308ab2a 13729+ return err;
13730+}
1facf9fc 13731+
076b876e 13732+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1308ab2a 13733+{
4a4d8108 13734+ ssize_t err;
b912730e
AM
13735+ struct au_write_pre wpre;
13736+ struct inode *inode;
4a4d8108 13737+ struct file *file, *h_file;
1308ab2a 13738+
4a4d8108 13739+ file = kio->ki_filp;
b912730e 13740+ inode = file_inode(file);
e49829fe
JR
13741+ au_mtx_and_read_lock(inode);
13742+
b912730e
AM
13743+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13744+ err = PTR_ERR(h_file);
13745+ if (IS_ERR(h_file))
9dbd164d 13746+ goto out;
9dbd164d 13747+
076b876e 13748+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
b912730e 13749+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13750+
4f0767ce 13751+out:
b912730e 13752+ si_read_unlock(inode->i_sb);
4a4d8108 13753+ mutex_unlock(&inode->i_mutex);
dece6358 13754+ return err;
1facf9fc 13755+}
13756+
4a4d8108
AM
13757+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13758+ struct pipe_inode_info *pipe, size_t len,
13759+ unsigned int flags)
1facf9fc 13760+{
4a4d8108
AM
13761+ ssize_t err;
13762+ struct file *h_file;
b912730e 13763+ struct inode *inode;
dece6358 13764+ struct super_block *sb;
1facf9fc 13765+
b912730e
AM
13766+ inode = file_inode(file);
13767+ sb = inode->i_sb;
e49829fe 13768+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
13769+
13770+ h_file = au_read_pre(file, /*keep_fi*/1);
13771+ err = PTR_ERR(h_file);
13772+ if (IS_ERR(h_file))
dece6358 13773+ goto out;
1facf9fc 13774+
4a4d8108 13775+ if (au_test_loopback_kthread()) {
2000de60 13776+ au_warn_loopback(h_file->f_path.dentry->d_sb);
87a755f4
AM
13777+ if (file->f_mapping != h_file->f_mapping) {
13778+ file->f_mapping = h_file->f_mapping;
13779+ smp_mb(); /* unnecessary? */
13780+ }
1308ab2a 13781+ }
9dbd164d
AM
13782+ fi_read_unlock(file);
13783+
4a4d8108
AM
13784+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
13785+ /* todo: necessasry? */
13786+ /* file->f_ra = h_file->f_ra; */
b912730e 13787+ au_read_post(inode, h_file);
1facf9fc 13788+
4f0767ce 13789+out:
4a4d8108 13790+ si_read_unlock(sb);
dece6358 13791+ return err;
1facf9fc 13792+}
13793+
4a4d8108
AM
13794+static ssize_t
13795+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
13796+ size_t len, unsigned int flags)
1facf9fc 13797+{
4a4d8108 13798+ ssize_t err;
b912730e
AM
13799+ struct au_write_pre wpre;
13800+ struct inode *inode;
076b876e 13801+ struct file *h_file;
1facf9fc 13802+
b912730e 13803+ inode = file_inode(file);
e49829fe 13804+ au_mtx_and_read_lock(inode);
9dbd164d 13805+
b912730e
AM
13806+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13807+ err = PTR_ERR(h_file);
13808+ if (IS_ERR(h_file))
9dbd164d 13809+ goto out;
9dbd164d 13810+
4a4d8108 13811+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
b912730e 13812+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13813+
4f0767ce 13814+out:
b912730e 13815+ si_read_unlock(inode->i_sb);
4a4d8108
AM
13816+ mutex_unlock(&inode->i_mutex);
13817+ return err;
13818+}
1facf9fc 13819+
38d290e6
JR
13820+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
13821+ loff_t len)
13822+{
13823+ long err;
b912730e 13824+ struct au_write_pre wpre;
38d290e6
JR
13825+ struct inode *inode;
13826+ struct file *h_file;
13827+
b912730e 13828+ inode = file_inode(file);
38d290e6
JR
13829+ au_mtx_and_read_lock(inode);
13830+
b912730e
AM
13831+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13832+ err = PTR_ERR(h_file);
13833+ if (IS_ERR(h_file))
38d290e6 13834+ goto out;
38d290e6
JR
13835+
13836+ lockdep_off();
03673fb0 13837+ err = vfs_fallocate(h_file, mode, offset, len);
38d290e6 13838+ lockdep_on();
b912730e 13839+ au_write_post(inode, h_file, &wpre, /*written*/1);
38d290e6
JR
13840+
13841+out:
b912730e 13842+ si_read_unlock(inode->i_sb);
38d290e6
JR
13843+ mutex_unlock(&inode->i_mutex);
13844+ return err;
13845+}
13846+
4a4d8108
AM
13847+/* ---------------------------------------------------------------------- */
13848+
9dbd164d
AM
13849+/*
13850+ * The locking order around current->mmap_sem.
13851+ * - in most and regular cases
13852+ * file I/O syscall -- aufs_read() or something
13853+ * -- si_rwsem for read -- mmap_sem
13854+ * (Note that [fdi]i_rwsem are released before mmap_sem).
13855+ * - in mmap case
13856+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
13857+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
13858+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
13859+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
13860+ * It means that when aufs acquires si_rwsem for write, the process should never
13861+ * acquire mmap_sem.
13862+ *
392086de 13863+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
9dbd164d
AM
13864+ * problem either since any directory is not able to be mmap-ed.
13865+ * The similar scenario is applied to aufs_readlink() too.
13866+ */
13867+
38d290e6 13868+#if 0 /* stop calling security_file_mmap() */
2dfbb274
AM
13869+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
13870+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
13871+
13872+static unsigned long au_arch_prot_conv(unsigned long flags)
13873+{
13874+ /* currently ppc64 only */
13875+#ifdef CONFIG_PPC64
13876+ /* cf. linux/arch/powerpc/include/asm/mman.h */
13877+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
13878+ return AuConv_VM_PROT(flags, SAO);
13879+#else
13880+ AuDebugOn(arch_calc_vm_prot_bits(-1));
13881+ return 0;
13882+#endif
13883+}
13884+
13885+static unsigned long au_prot_conv(unsigned long flags)
13886+{
13887+ return AuConv_VM_PROT(flags, READ)
13888+ | AuConv_VM_PROT(flags, WRITE)
13889+ | AuConv_VM_PROT(flags, EXEC)
13890+ | au_arch_prot_conv(flags);
13891+}
13892+
13893+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
13894+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
13895+
13896+static unsigned long au_flag_conv(unsigned long flags)
13897+{
13898+ return AuConv_VM_MAP(flags, GROWSDOWN)
13899+ | AuConv_VM_MAP(flags, DENYWRITE)
2dfbb274
AM
13900+ | AuConv_VM_MAP(flags, LOCKED);
13901+}
38d290e6 13902+#endif
2dfbb274 13903+
9dbd164d 13904+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 13905+{
4a4d8108 13906+ int err;
4a4d8108 13907+ const unsigned char wlock
9dbd164d 13908+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108 13909+ struct super_block *sb;
9dbd164d 13910+ struct file *h_file;
b912730e 13911+ struct inode *inode;
9dbd164d
AM
13912+
13913+ AuDbgVmRegion(file, vma);
1308ab2a 13914+
b912730e
AM
13915+ inode = file_inode(file);
13916+ sb = inode->i_sb;
9dbd164d 13917+ lockdep_off();
e49829fe 13918+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108 13919+
b912730e 13920+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
9dbd164d 13921+ lockdep_on();
b912730e
AM
13922+ err = PTR_ERR(h_file);
13923+ if (IS_ERR(h_file))
13924+ goto out;
1308ab2a 13925+
b912730e
AM
13926+ err = 0;
13927+ au_set_mmapped(file);
9dbd164d 13928+ au_vm_file_reset(vma, h_file);
38d290e6
JR
13929+ /*
13930+ * we cannot call security_mmap_file() here since it may acquire
13931+ * mmap_sem or i_mutex.
13932+ *
13933+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
13934+ * au_flag_conv(vma->vm_flags));
13935+ */
9dbd164d
AM
13936+ if (!err)
13937+ err = h_file->f_op->mmap(h_file, vma);
b912730e
AM
13938+ if (!err) {
13939+ au_vm_prfile_set(vma, file);
13940+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13941+ goto out_fput; /* success */
13942+ }
2cbb1c4b
JR
13943+ au_unset_mmapped(file);
13944+ au_vm_file_reset(vma, file);
b912730e 13945+
2cbb1c4b 13946+out_fput:
9dbd164d 13947+ lockdep_off();
b912730e
AM
13948+ ii_write_unlock(inode);
13949+ lockdep_on();
13950+ fput(h_file);
4f0767ce 13951+out:
b912730e 13952+ lockdep_off();
9dbd164d
AM
13953+ si_read_unlock(sb);
13954+ lockdep_on();
13955+ AuTraceErr(err);
4a4d8108
AM
13956+ return err;
13957+}
13958+
13959+/* ---------------------------------------------------------------------- */
13960+
1e00d052
AM
13961+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
13962+ int datasync)
4a4d8108
AM
13963+{
13964+ int err;
b912730e 13965+ struct au_write_pre wpre;
4a4d8108
AM
13966+ struct inode *inode;
13967+ struct file *h_file;
4a4d8108
AM
13968+
13969+ err = 0; /* -EBADF; */ /* posix? */
13970+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
b912730e 13971+ goto out;
4a4d8108 13972+
b912730e
AM
13973+ inode = file_inode(file);
13974+ au_mtx_and_read_lock(inode);
13975+
13976+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13977+ err = PTR_ERR(h_file);
13978+ if (IS_ERR(h_file))
4a4d8108 13979+ goto out_unlock;
4a4d8108 13980+
53392da6 13981+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
b912730e 13982+ au_write_post(inode, h_file, &wpre, /*written*/0);
4a4d8108 13983+
4f0767ce 13984+out_unlock:
b912730e 13985+ si_read_unlock(inode->i_sb);
1e00d052 13986+ mutex_unlock(&inode->i_mutex);
b912730e 13987+out:
4a4d8108 13988+ return err;
dece6358
AM
13989+}
13990+
4a4d8108
AM
13991+/* no one supports this operation, currently */
13992+#if 0
13993+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 13994+{
4a4d8108 13995+ int err;
b912730e 13996+ struct au_write_pre wpre;
4a4d8108
AM
13997+ struct inode *inode;
13998+ struct file *file, *h_file;
1308ab2a 13999+
4a4d8108
AM
14000+ err = 0; /* -EBADF; */ /* posix? */
14001+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
14002+ goto out;
1308ab2a 14003+
b912730e
AM
14004+ file = kio->ki_filp;
14005+ inode = file_inode(file);
14006+ au_mtx_and_read_lock(inode);
14007+
14008+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14009+ err = PTR_ERR(h_file);
14010+ if (IS_ERR(h_file))
4a4d8108 14011+ goto out_unlock;
1308ab2a 14012+
4a4d8108
AM
14013+ err = -ENOSYS;
14014+ h_file = au_hf_top(file);
523b37e3 14015+ if (h_file->f_op->aio_fsync) {
4a4d8108 14016+ struct mutex *h_mtx;
1308ab2a 14017+
c06a8ce3 14018+ h_mtx = &file_inode(h_file)->i_mutex;
4a4d8108
AM
14019+ if (!is_sync_kiocb(kio)) {
14020+ get_file(h_file);
14021+ fput(file);
14022+ }
14023+ kio->ki_filp = h_file;
14024+ err = h_file->f_op->aio_fsync(kio, datasync);
14025+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14026+ if (!err)
14027+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
14028+ /*ignore*/
4a4d8108
AM
14029+ mutex_unlock(h_mtx);
14030+ }
b912730e 14031+ au_write_post(inode, h_file, &wpre, /*written*/0);
1308ab2a 14032+
4f0767ce 14033+out_unlock:
e49829fe 14034+ si_read_unlock(inode->sb);
4a4d8108 14035+ mutex_unlock(&inode->i_mutex);
b912730e 14036+out:
4a4d8108 14037+ return err;
dece6358 14038+}
4a4d8108 14039+#endif
dece6358 14040+
4a4d8108 14041+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 14042+{
4a4d8108
AM
14043+ int err;
14044+ struct file *h_file;
4a4d8108 14045+ struct super_block *sb;
1308ab2a 14046+
b912730e 14047+ sb = file->f_path.dentry->d_sb;
e49829fe 14048+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
14049+
14050+ h_file = au_read_pre(file, /*keep_fi*/0);
14051+ err = PTR_ERR(h_file);
14052+ if (IS_ERR(h_file))
4a4d8108
AM
14053+ goto out;
14054+
523b37e3 14055+ if (h_file->f_op->fasync)
4a4d8108 14056+ err = h_file->f_op->fasync(fd, h_file, flag);
b912730e 14057+ fput(h_file); /* instead of au_read_post() */
1308ab2a 14058+
4f0767ce 14059+out:
4a4d8108 14060+ si_read_unlock(sb);
1308ab2a 14061+ return err;
dece6358 14062+}
4a4d8108
AM
14063+
14064+/* ---------------------------------------------------------------------- */
14065+
14066+/* no one supports this operation, currently */
14067+#if 0
14068+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
2000de60 14069+ size_t len, loff_t *pos, int more)
4a4d8108
AM
14070+{
14071+}
14072+#endif
14073+
14074+/* ---------------------------------------------------------------------- */
14075+
14076+const struct file_operations aufs_file_fop = {
14077+ .owner = THIS_MODULE,
2cbb1c4b 14078+
027c5e7a 14079+ .llseek = default_llseek,
4a4d8108
AM
14080+
14081+ .read = aufs_read,
14082+ .write = aufs_write,
076b876e
AM
14083+ .read_iter = aufs_read_iter,
14084+ .write_iter = aufs_write_iter,
14085+
4a4d8108
AM
14086+#ifdef CONFIG_AUFS_POLL
14087+ .poll = aufs_poll,
14088+#endif
14089+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1 14090+#ifdef CONFIG_COMPAT
c2b27bf2 14091+ .compat_ioctl = aufs_compat_ioctl_nondir,
b752ccd1 14092+#endif
4a4d8108
AM
14093+ .mmap = aufs_mmap,
14094+ .open = aufs_open_nondir,
14095+ .flush = aufs_flush_nondir,
14096+ .release = aufs_release_nondir,
14097+ .fsync = aufs_fsync_nondir,
14098+ /* .aio_fsync = aufs_aio_fsync_nondir, */
14099+ .fasync = aufs_fasync,
14100+ /* .sendpage = aufs_sendpage, */
14101+ .splice_write = aufs_splice_write,
14102+ .splice_read = aufs_splice_read,
14103+#if 0
14104+ .aio_splice_write = aufs_aio_splice_write,
38d290e6 14105+ .aio_splice_read = aufs_aio_splice_read,
4a4d8108 14106+#endif
38d290e6 14107+ .fallocate = aufs_fallocate
4a4d8108 14108+};
7f207e10
AM
14109diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14110--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 14111+++ linux/fs/aufs/fstype.h 2016-02-28 11:26:32.573304539 +0100
b912730e 14112@@ -0,0 +1,400 @@
4a4d8108 14113+/*
8cdd5066 14114+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
14115+ *
14116+ * This program, aufs is free software; you can redistribute it and/or modify
14117+ * it under the terms of the GNU General Public License as published by
14118+ * the Free Software Foundation; either version 2 of the License, or
14119+ * (at your option) any later version.
14120+ *
14121+ * This program is distributed in the hope that it will be useful,
14122+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14123+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14124+ * GNU General Public License for more details.
14125+ *
14126+ * You should have received a copy of the GNU General Public License
523b37e3 14127+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
14128+ */
14129+
14130+/*
14131+ * judging filesystem type
14132+ */
14133+
14134+#ifndef __AUFS_FSTYPE_H__
14135+#define __AUFS_FSTYPE_H__
14136+
14137+#ifdef __KERNEL__
14138+
14139+#include <linux/fs.h>
14140+#include <linux/magic.h>
b912730e 14141+#include <linux/nfs_fs.h>
b95c5147 14142+#include <linux/romfs_fs.h>
4a4d8108
AM
14143+
14144+static inline int au_test_aufs(struct super_block *sb)
14145+{
14146+ return sb->s_magic == AUFS_SUPER_MAGIC;
14147+}
14148+
14149+static inline const char *au_sbtype(struct super_block *sb)
14150+{
14151+ return sb->s_type->name;
14152+}
1308ab2a 14153+
14154+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14155+{
2000de60
JR
14156+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
14157+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
14158+#else
14159+ return 0;
14160+#endif
14161+}
14162+
1308ab2a 14163+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 14164+{
2000de60
JR
14165+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
14166+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
14167+#else
14168+ return 0;
14169+#endif
14170+}
14171+
1308ab2a 14172+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 14173+{
1308ab2a 14174+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
14175+ return sb->s_magic == CRAMFS_MAGIC;
14176+#endif
14177+ return 0;
14178+}
14179+
14180+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14181+{
14182+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
14183+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
14184+#else
14185+ return 0;
14186+#endif
14187+}
14188+
1308ab2a 14189+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 14190+{
1308ab2a 14191+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
14192+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
14193+#else
14194+ return 0;
14195+#endif
14196+}
14197+
1308ab2a 14198+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 14199+{
1308ab2a 14200+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
14201+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
14202+#else
14203+ return 0;
14204+#endif
14205+}
14206+
1308ab2a 14207+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 14208+{
1308ab2a 14209+#ifdef CONFIG_TMPFS
14210+ return sb->s_magic == TMPFS_MAGIC;
14211+#else
14212+ return 0;
dece6358 14213+#endif
dece6358
AM
14214+}
14215+
1308ab2a 14216+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 14217+{
1308ab2a 14218+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
14219+ return !strcmp(au_sbtype(sb), "ecryptfs");
14220+#else
14221+ return 0;
14222+#endif
1facf9fc 14223+}
14224+
1308ab2a 14225+static inline int au_test_ramfs(struct super_block *sb)
14226+{
14227+ return sb->s_magic == RAMFS_MAGIC;
14228+}
14229+
14230+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14231+{
14232+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
14233+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14234+#else
14235+ return 0;
14236+#endif
14237+}
14238+
14239+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14240+{
14241+#ifdef CONFIG_PROC_FS
14242+ return sb->s_magic == PROC_SUPER_MAGIC;
14243+#else
14244+ return 0;
14245+#endif
14246+}
14247+
14248+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14249+{
14250+#ifdef CONFIG_SYSFS
14251+ return sb->s_magic == SYSFS_MAGIC;
14252+#else
14253+ return 0;
14254+#endif
14255+}
14256+
14257+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14258+{
14259+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
14260+ return sb->s_magic == CONFIGFS_MAGIC;
14261+#else
14262+ return 0;
14263+#endif
14264+}
14265+
14266+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14267+{
14268+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
14269+ return sb->s_magic == MINIX3_SUPER_MAGIC
14270+ || sb->s_magic == MINIX2_SUPER_MAGIC
14271+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14272+ || sb->s_magic == MINIX_SUPER_MAGIC
14273+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14274+#else
14275+ return 0;
14276+#endif
14277+}
14278+
1308ab2a 14279+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14280+{
14281+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
14282+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14283+#else
14284+ return 0;
14285+#endif
14286+}
14287+
14288+static inline int au_test_msdos(struct super_block *sb)
14289+{
14290+ return au_test_fat(sb);
14291+}
14292+
14293+static inline int au_test_vfat(struct super_block *sb)
14294+{
14295+ return au_test_fat(sb);
14296+}
14297+
14298+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14299+{
14300+#ifdef CONFIG_SECURITYFS
14301+ return sb->s_magic == SECURITYFS_MAGIC;
14302+#else
14303+ return 0;
14304+#endif
14305+}
14306+
14307+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14308+{
14309+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
14310+ return sb->s_magic == SQUASHFS_MAGIC;
14311+#else
14312+ return 0;
14313+#endif
14314+}
14315+
14316+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14317+{
14318+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
14319+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14320+#else
14321+ return 0;
14322+#endif
14323+}
14324+
14325+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14326+{
14327+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
14328+ return sb->s_magic == XENFS_SUPER_MAGIC;
14329+#else
14330+ return 0;
14331+#endif
14332+}
14333+
14334+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14335+{
14336+#ifdef CONFIG_DEBUG_FS
14337+ return sb->s_magic == DEBUGFS_MAGIC;
14338+#else
14339+ return 0;
14340+#endif
14341+}
14342+
14343+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14344+{
14345+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
14346+ return sb->s_magic == NILFS_SUPER_MAGIC;
14347+#else
14348+ return 0;
14349+#endif
14350+}
14351+
4a4d8108
AM
14352+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14353+{
14354+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
14355+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14356+#else
14357+ return 0;
14358+#endif
14359+}
14360+
1308ab2a 14361+/* ---------------------------------------------------------------------- */
14362+/*
14363+ * they can't be an aufs branch.
14364+ */
14365+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14366+{
14367+ return
14368+#ifndef CONFIG_AUFS_BR_RAMFS
14369+ au_test_ramfs(sb) ||
14370+#endif
14371+ au_test_procfs(sb)
14372+ || au_test_sysfs(sb)
14373+ || au_test_configfs(sb)
14374+ || au_test_debugfs(sb)
14375+ || au_test_securityfs(sb)
14376+ || au_test_xenfs(sb)
14377+ || au_test_ecryptfs(sb)
14378+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14379+ || au_test_aufs(sb); /* will be supported in next version */
14380+}
14381+
1308ab2a 14382+static inline int au_test_fs_remote(struct super_block *sb)
14383+{
14384+ return !au_test_tmpfs(sb)
14385+#ifdef CONFIG_AUFS_BR_RAMFS
14386+ && !au_test_ramfs(sb)
14387+#endif
14388+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14389+}
14390+
14391+/* ---------------------------------------------------------------------- */
14392+
14393+/*
14394+ * Note: these functions (below) are created after reading ->getattr() in all
14395+ * filesystems under linux/fs. it means we have to do so in every update...
14396+ */
14397+
14398+/*
14399+ * some filesystems require getattr to refresh the inode attributes before
14400+ * referencing.
14401+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14402+ * and leave the work for d_revalidate()
14403+ */
14404+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14405+{
14406+ return au_test_nfs(sb)
14407+ || au_test_fuse(sb)
1308ab2a 14408+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 14409+ ;
14410+}
14411+
14412+/*
14413+ * filesystems which don't maintain i_size or i_blocks.
14414+ */
14415+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14416+{
14417+ return au_test_xfs(sb)
4a4d8108
AM
14418+ || au_test_btrfs(sb)
14419+ || au_test_ubifs(sb)
14420+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 14421+ /* || au_test_minix(sb) */ /* untested */
14422+ ;
14423+}
14424+
14425+/*
14426+ * filesystems which don't store the correct value in some of their inode
14427+ * attributes.
14428+ */
14429+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14430+{
14431+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 14432+ || au_test_fat(sb)
14433+ || au_test_msdos(sb)
14434+ || au_test_vfat(sb);
1facf9fc 14435+}
14436+
14437+/* they don't check i_nlink in link(2) */
14438+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14439+{
14440+ return au_test_tmpfs(sb)
14441+#ifdef CONFIG_AUFS_BR_RAMFS
14442+ || au_test_ramfs(sb)
14443+#endif
4a4d8108 14444+ || au_test_ubifs(sb)
4a4d8108 14445+ || au_test_hfsplus(sb);
1facf9fc 14446+}
14447+
14448+/*
14449+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14450+ */
14451+static inline int au_test_fs_notime(struct super_block *sb)
14452+{
14453+ return au_test_nfs(sb)
14454+ || au_test_fuse(sb)
dece6358 14455+ || au_test_ubifs(sb)
1facf9fc 14456+ ;
14457+}
14458+
1facf9fc 14459+/* temporary support for i#1 in cramfs */
14460+static inline int au_test_fs_unique_ino(struct inode *inode)
14461+{
14462+ if (au_test_cramfs(inode->i_sb))
14463+ return inode->i_ino != 1;
14464+ return 1;
14465+}
14466+
14467+/* ---------------------------------------------------------------------- */
14468+
14469+/*
14470+ * the filesystem where the xino files placed must support i/o after unlink and
14471+ * maintain i_size and i_blocks.
14472+ */
14473+static inline int au_test_fs_bad_xino(struct super_block *sb)
14474+{
14475+ return au_test_fs_remote(sb)
14476+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 14477+ /* don't want unnecessary work for xino */
14478+ || au_test_aufs(sb)
1308ab2a 14479+ || au_test_ecryptfs(sb)
14480+ || au_test_nilfs(sb);
1facf9fc 14481+}
14482+
14483+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14484+{
14485+ return au_test_tmpfs(sb)
14486+ || au_test_ramfs(sb);
14487+}
14488+
14489+/*
14490+ * test if the @sb is real-readonly.
14491+ */
14492+static inline int au_test_fs_rr(struct super_block *sb)
14493+{
14494+ return au_test_squashfs(sb)
14495+ || au_test_iso9660(sb)
14496+ || au_test_cramfs(sb)
14497+ || au_test_romfs(sb);
14498+}
14499+
b912730e
AM
14500+/*
14501+ * test if the @inode is nfs with 'noacl' option
14502+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14503+ */
14504+static inline int au_test_nfs_noacl(struct inode *inode)
14505+{
14506+ return au_test_nfs(inode->i_sb)
14507+ /* && IS_POSIXACL(inode) */
14508+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14509+}
14510+
1facf9fc 14511+#endif /* __KERNEL__ */
14512+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
14513diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14514--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 14515+++ linux/fs/aufs/hfsnotify.c 2016-02-28 11:26:32.573304539 +0100
c1595e42 14516@@ -0,0 +1,288 @@
1facf9fc 14517+/*
8cdd5066 14518+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 14519+ *
14520+ * This program, aufs is free software; you can redistribute it and/or modify
14521+ * it under the terms of the GNU General Public License as published by
14522+ * the Free Software Foundation; either version 2 of the License, or
14523+ * (at your option) any later version.
dece6358
AM
14524+ *
14525+ * This program is distributed in the hope that it will be useful,
14526+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14527+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14528+ * GNU General Public License for more details.
14529+ *
14530+ * You should have received a copy of the GNU General Public License
523b37e3 14531+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14532+ */
14533+
14534+/*
4a4d8108 14535+ * fsnotify for the lower directories
1facf9fc 14536+ */
14537+
14538+#include "aufs.h"
14539+
4a4d8108
AM
14540+/* FS_IN_IGNORED is unnecessary */
14541+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14542+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 14543+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 14544+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 14545+
0c5527e5 14546+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 14547+{
0c5527e5
AM
14548+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14549+ hn_mark);
4a4d8108 14550+ AuDbg("here\n");
7eafdf33 14551+ au_cache_free_hnotify(hn);
076b876e 14552+ smp_mb__before_atomic();
1716fcea
AM
14553+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14554+ wake_up(&au_hfsn_wq);
4a4d8108 14555+}
1facf9fc 14556+
027c5e7a 14557+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 14558+{
1716fcea 14559+ int err;
027c5e7a
AM
14560+ struct au_hnotify *hn;
14561+ struct super_block *sb;
14562+ struct au_branch *br;
0c5527e5 14563+ struct fsnotify_mark *mark;
027c5e7a 14564+ aufs_bindex_t bindex;
1facf9fc 14565+
027c5e7a
AM
14566+ hn = hinode->hi_notify;
14567+ sb = hn->hn_aufs_inode->i_sb;
14568+ bindex = au_br_index(sb, hinode->hi_id);
14569+ br = au_sbr(sb, bindex);
1716fcea
AM
14570+ AuDebugOn(!br->br_hfsn);
14571+
0c5527e5
AM
14572+ mark = &hn->hn_mark;
14573+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14574+ mark->mask = AuHfsnMask;
7f207e10
AM
14575+ /*
14576+ * by udba rename or rmdir, aufs assign a new inode to the known
14577+ * h_inode, so specify 1 to allow dups.
14578+ */
c1595e42 14579+ lockdep_off();
1716fcea 14580+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 14581+ /*mnt*/NULL, /*allow_dups*/1);
1716fcea
AM
14582+ /* even if err */
14583+ fsnotify_put_mark(mark);
c1595e42 14584+ lockdep_on();
1716fcea
AM
14585+
14586+ return err;
1facf9fc 14587+}
14588+
7eafdf33 14589+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 14590+{
0c5527e5 14591+ struct fsnotify_mark *mark;
7eafdf33 14592+ unsigned long long ull;
1716fcea 14593+ struct fsnotify_group *group;
7eafdf33
AM
14594+
14595+ ull = atomic64_inc_return(&au_hfsn_ifree);
14596+ BUG_ON(!ull);
953406b4 14597+
0c5527e5 14598+ mark = &hn->hn_mark;
1716fcea
AM
14599+ spin_lock(&mark->lock);
14600+ group = mark->group;
14601+ fsnotify_get_group(group);
14602+ spin_unlock(&mark->lock);
c1595e42 14603+ lockdep_off();
1716fcea
AM
14604+ fsnotify_destroy_mark(mark, group);
14605+ fsnotify_put_group(group);
c1595e42 14606+ lockdep_on();
7f207e10 14607+
7eafdf33
AM
14608+ /* free hn by myself */
14609+ return 0;
1facf9fc 14610+}
14611+
14612+/* ---------------------------------------------------------------------- */
14613+
4a4d8108 14614+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 14615+{
0c5527e5 14616+ struct fsnotify_mark *mark;
1facf9fc 14617+
0c5527e5
AM
14618+ mark = &hinode->hi_notify->hn_mark;
14619+ spin_lock(&mark->lock);
1facf9fc 14620+ if (do_set) {
0c5527e5
AM
14621+ AuDebugOn(mark->mask & AuHfsnMask);
14622+ mark->mask |= AuHfsnMask;
1facf9fc 14623+ } else {
0c5527e5
AM
14624+ AuDebugOn(!(mark->mask & AuHfsnMask));
14625+ mark->mask &= ~AuHfsnMask;
1facf9fc 14626+ }
0c5527e5 14627+ spin_unlock(&mark->lock);
4a4d8108 14628+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 14629+}
14630+
4a4d8108 14631+/* ---------------------------------------------------------------------- */
1facf9fc 14632+
4a4d8108
AM
14633+/* #define AuDbgHnotify */
14634+#ifdef AuDbgHnotify
14635+static char *au_hfsn_name(u32 mask)
14636+{
14637+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
14638+#define test_ret(flag) \
14639+ do { \
14640+ if (mask & flag) \
14641+ return #flag; \
14642+ } while (0)
4a4d8108
AM
14643+ test_ret(FS_ACCESS);
14644+ test_ret(FS_MODIFY);
14645+ test_ret(FS_ATTRIB);
14646+ test_ret(FS_CLOSE_WRITE);
14647+ test_ret(FS_CLOSE_NOWRITE);
14648+ test_ret(FS_OPEN);
14649+ test_ret(FS_MOVED_FROM);
14650+ test_ret(FS_MOVED_TO);
14651+ test_ret(FS_CREATE);
14652+ test_ret(FS_DELETE);
14653+ test_ret(FS_DELETE_SELF);
14654+ test_ret(FS_MOVE_SELF);
14655+ test_ret(FS_UNMOUNT);
14656+ test_ret(FS_Q_OVERFLOW);
14657+ test_ret(FS_IN_IGNORED);
b912730e 14658+ test_ret(FS_ISDIR);
4a4d8108
AM
14659+ test_ret(FS_IN_ONESHOT);
14660+ test_ret(FS_EVENT_ON_CHILD);
14661+ return "";
14662+#undef test_ret
14663+#else
14664+ return "??";
14665+#endif
1facf9fc 14666+}
4a4d8108 14667+#endif
1facf9fc 14668+
14669+/* ---------------------------------------------------------------------- */
14670+
1716fcea
AM
14671+static void au_hfsn_free_group(struct fsnotify_group *group)
14672+{
14673+ struct au_br_hfsnotify *hfsn = group->private;
14674+
14675+ AuDbg("here\n");
14676+ kfree(hfsn);
14677+}
14678+
4a4d8108 14679+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 14680+ struct inode *inode,
0c5527e5
AM
14681+ struct fsnotify_mark *inode_mark,
14682+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
14683+ u32 mask, void *data, int data_type,
14684+ const unsigned char *file_name, u32 cookie)
1facf9fc 14685+{
14686+ int err;
4a4d8108
AM
14687+ struct au_hnotify *hnotify;
14688+ struct inode *h_dir, *h_inode;
fb47a38f 14689+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 14690+
fb47a38f 14691+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 14692+
14693+ err = 0;
0c5527e5 14694+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 14695+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 14696+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 14697+ goto out;
1facf9fc 14698+
fb47a38f
JR
14699+ h_dir = inode;
14700+ h_inode = NULL;
4a4d8108 14701+#ifdef AuDbgHnotify
392086de 14702+ au_debug_on();
4a4d8108
AM
14703+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14704+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14705+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14706+ h_dir->i_ino, mask, au_hfsn_name(mask),
14707+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14708+ /* WARN_ON(1); */
1facf9fc 14709+ }
392086de 14710+ au_debug_off();
1facf9fc 14711+#endif
4a4d8108 14712+
0c5527e5
AM
14713+ AuDebugOn(!inode_mark);
14714+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14715+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 14716+
4a4d8108
AM
14717+out:
14718+ return err;
14719+}
1facf9fc 14720+
4a4d8108 14721+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
14722+ .handle_event = au_hfsn_handle_event,
14723+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
14724+};
14725+
14726+/* ---------------------------------------------------------------------- */
14727+
027c5e7a
AM
14728+static void au_hfsn_fin_br(struct au_branch *br)
14729+{
1716fcea 14730+ struct au_br_hfsnotify *hfsn;
027c5e7a 14731+
1716fcea 14732+ hfsn = br->br_hfsn;
c1595e42
JR
14733+ if (hfsn) {
14734+ lockdep_off();
1716fcea 14735+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
14736+ lockdep_on();
14737+ }
027c5e7a
AM
14738+}
14739+
1716fcea 14740+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
14741+{
14742+ int err;
1716fcea
AM
14743+ struct fsnotify_group *group;
14744+ struct au_br_hfsnotify *hfsn;
1facf9fc 14745+
4a4d8108 14746+ err = 0;
1716fcea
AM
14747+ br->br_hfsn = NULL;
14748+ if (!au_br_hnotifyable(perm))
027c5e7a 14749+ goto out;
027c5e7a 14750+
1716fcea
AM
14751+ err = -ENOMEM;
14752+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14753+ if (unlikely(!hfsn))
027c5e7a
AM
14754+ goto out;
14755+
1716fcea
AM
14756+ err = 0;
14757+ group = fsnotify_alloc_group(&au_hfsn_ops);
14758+ if (IS_ERR(group)) {
14759+ err = PTR_ERR(group);
0c5527e5 14760+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 14761+ goto out_hfsn;
4a4d8108 14762+ }
1facf9fc 14763+
1716fcea
AM
14764+ group->private = hfsn;
14765+ hfsn->hfsn_group = group;
14766+ br->br_hfsn = hfsn;
14767+ goto out; /* success */
14768+
14769+out_hfsn:
14770+ kfree(hfsn);
027c5e7a 14771+out:
1716fcea
AM
14772+ return err;
14773+}
14774+
14775+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
14776+{
14777+ int err;
14778+
14779+ err = 0;
14780+ if (!br->br_hfsn)
14781+ err = au_hfsn_init_br(br, perm);
14782+
1facf9fc 14783+ return err;
14784+}
14785+
7eafdf33
AM
14786+/* ---------------------------------------------------------------------- */
14787+
14788+static void au_hfsn_fin(void)
14789+{
14790+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
14791+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
14792+}
14793+
4a4d8108
AM
14794+const struct au_hnotify_op au_hnotify_op = {
14795+ .ctl = au_hfsn_ctl,
14796+ .alloc = au_hfsn_alloc,
14797+ .free = au_hfsn_free,
1facf9fc 14798+
7eafdf33
AM
14799+ .fin = au_hfsn_fin,
14800+
027c5e7a
AM
14801+ .reset_br = au_hfsn_reset_br,
14802+ .fin_br = au_hfsn_fin_br,
14803+ .init_br = au_hfsn_init_br
4a4d8108 14804+};
7f207e10
AM
14805diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
14806--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 14807+++ linux/fs/aufs/hfsplus.c 2016-02-28 11:26:32.573304539 +0100
523b37e3 14808@@ -0,0 +1,56 @@
4a4d8108 14809+/*
8cdd5066 14810+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
14811+ *
14812+ * This program, aufs is free software; you can redistribute it and/or modify
14813+ * it under the terms of the GNU General Public License as published by
14814+ * the Free Software Foundation; either version 2 of the License, or
14815+ * (at your option) any later version.
14816+ *
14817+ * This program is distributed in the hope that it will be useful,
14818+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14819+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14820+ * GNU General Public License for more details.
14821+ *
14822+ * You should have received a copy of the GNU General Public License
523b37e3 14823+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 14824+ */
1facf9fc 14825+
4a4d8108
AM
14826+/*
14827+ * special support for filesystems which aqucires an inode mutex
14828+ * at final closing a file, eg, hfsplus.
14829+ *
14830+ * This trick is very simple and stupid, just to open the file before really
14831+ * neceeary open to tell hfsplus that this is not the final closing.
14832+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
14833+ * and au_h_open_post() after releasing it.
14834+ */
1facf9fc 14835+
4a4d8108 14836+#include "aufs.h"
1facf9fc 14837+
392086de
AM
14838+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
14839+ int force_wr)
4a4d8108
AM
14840+{
14841+ struct file *h_file;
14842+ struct dentry *h_dentry;
1facf9fc 14843+
4a4d8108
AM
14844+ h_dentry = au_h_dptr(dentry, bindex);
14845+ AuDebugOn(!h_dentry);
5527c038 14846+ AuDebugOn(d_is_negative(h_dentry));
4a4d8108
AM
14847+
14848+ h_file = NULL;
14849+ if (au_test_hfsplus(h_dentry->d_sb)
7e9cd9fe 14850+ && d_is_reg(h_dentry))
4a4d8108
AM
14851+ h_file = au_h_open(dentry, bindex,
14852+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 14853+ /*file*/NULL, force_wr);
4a4d8108 14854+ return h_file;
1facf9fc 14855+}
14856+
4a4d8108
AM
14857+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
14858+ struct file *h_file)
14859+{
14860+ if (h_file) {
14861+ fput(h_file);
14862+ au_sbr_put(dentry->d_sb, bindex);
14863+ }
14864+}
7f207e10
AM
14865diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
14866--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 14867+++ linux/fs/aufs/hnotify.c 2016-02-28 11:26:32.573304539 +0100
5527c038 14868@@ -0,0 +1,710 @@
e49829fe 14869+/*
8cdd5066 14870+ * Copyright (C) 2005-2016 Junjiro R. Okajima
e49829fe
JR
14871+ *
14872+ * This program, aufs is free software; you can redistribute it and/or modify
14873+ * it under the terms of the GNU General Public License as published by
14874+ * the Free Software Foundation; either version 2 of the License, or
14875+ * (at your option) any later version.
14876+ *
14877+ * This program is distributed in the hope that it will be useful,
14878+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14879+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14880+ * GNU General Public License for more details.
14881+ *
14882+ * You should have received a copy of the GNU General Public License
523b37e3 14883+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
14884+ */
14885+
14886+/*
7f207e10 14887+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
14888+ */
14889+
14890+#include "aufs.h"
14891+
027c5e7a 14892+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
14893+{
14894+ int err;
7f207e10 14895+ struct au_hnotify *hn;
1facf9fc 14896+
4a4d8108
AM
14897+ err = -ENOMEM;
14898+ hn = au_cache_alloc_hnotify();
14899+ if (hn) {
14900+ hn->hn_aufs_inode = inode;
027c5e7a
AM
14901+ hinode->hi_notify = hn;
14902+ err = au_hnotify_op.alloc(hinode);
14903+ AuTraceErr(err);
14904+ if (unlikely(err)) {
14905+ hinode->hi_notify = NULL;
4a4d8108
AM
14906+ au_cache_free_hnotify(hn);
14907+ /*
14908+ * The upper dir was removed by udba, but the same named
14909+ * dir left. In this case, aufs assignes a new inode
14910+ * number and set the monitor again.
14911+ * For the lower dir, the old monitnor is still left.
14912+ */
14913+ if (err == -EEXIST)
14914+ err = 0;
14915+ }
1308ab2a 14916+ }
1308ab2a 14917+
027c5e7a 14918+ AuTraceErr(err);
1308ab2a 14919+ return err;
dece6358 14920+}
1facf9fc 14921+
4a4d8108 14922+void au_hn_free(struct au_hinode *hinode)
dece6358 14923+{
4a4d8108 14924+ struct au_hnotify *hn;
1facf9fc 14925+
4a4d8108
AM
14926+ hn = hinode->hi_notify;
14927+ if (hn) {
4a4d8108 14928+ hinode->hi_notify = NULL;
7eafdf33
AM
14929+ if (au_hnotify_op.free(hinode, hn))
14930+ au_cache_free_hnotify(hn);
4a4d8108
AM
14931+ }
14932+}
dece6358 14933+
4a4d8108 14934+/* ---------------------------------------------------------------------- */
dece6358 14935+
4a4d8108
AM
14936+void au_hn_ctl(struct au_hinode *hinode, int do_set)
14937+{
14938+ if (hinode->hi_notify)
14939+ au_hnotify_op.ctl(hinode, do_set);
14940+}
14941+
14942+void au_hn_reset(struct inode *inode, unsigned int flags)
14943+{
14944+ aufs_bindex_t bindex, bend;
14945+ struct inode *hi;
14946+ struct dentry *iwhdentry;
1facf9fc 14947+
1308ab2a 14948+ bend = au_ibend(inode);
4a4d8108
AM
14949+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14950+ hi = au_h_iptr(inode, bindex);
14951+ if (!hi)
14952+ continue;
1308ab2a 14953+
4a4d8108
AM
14954+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
14955+ iwhdentry = au_hi_wh(inode, bindex);
14956+ if (iwhdentry)
14957+ dget(iwhdentry);
14958+ au_igrab(hi);
14959+ au_set_h_iptr(inode, bindex, NULL, 0);
14960+ au_set_h_iptr(inode, bindex, au_igrab(hi),
14961+ flags & ~AuHi_XINO);
14962+ iput(hi);
14963+ dput(iwhdentry);
14964+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 14965+ }
1facf9fc 14966+}
14967+
1308ab2a 14968+/* ---------------------------------------------------------------------- */
1facf9fc 14969+
4a4d8108 14970+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 14971+{
4a4d8108
AM
14972+ int err;
14973+ aufs_bindex_t bindex, bend, bfound, bstart;
14974+ struct inode *h_i;
1facf9fc 14975+
4a4d8108
AM
14976+ err = 0;
14977+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 14978+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
14979+ goto out;
14980+ }
1facf9fc 14981+
4a4d8108
AM
14982+ bfound = -1;
14983+ bend = au_ibend(inode);
14984+ bstart = au_ibstart(inode);
14985+#if 0 /* reserved for future use */
14986+ if (bindex == bend) {
14987+ /* keep this ino in rename case */
14988+ goto out;
14989+ }
14990+#endif
14991+ for (bindex = bstart; bindex <= bend; bindex++)
14992+ if (au_h_iptr(inode, bindex) == h_inode) {
14993+ bfound = bindex;
14994+ break;
14995+ }
14996+ if (bfound < 0)
1308ab2a 14997+ goto out;
1facf9fc 14998+
4a4d8108
AM
14999+ for (bindex = bstart; bindex <= bend; bindex++) {
15000+ h_i = au_h_iptr(inode, bindex);
15001+ if (!h_i)
15002+ continue;
1facf9fc 15003+
4a4d8108
AM
15004+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
15005+ /* ignore this error */
15006+ /* bad action? */
1facf9fc 15007+ }
1facf9fc 15008+
4a4d8108 15009+ /* children inode number will be broken */
1facf9fc 15010+
4f0767ce 15011+out:
4a4d8108
AM
15012+ AuTraceErr(err);
15013+ return err;
1facf9fc 15014+}
15015+
4a4d8108 15016+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 15017+{
4a4d8108
AM
15018+ int err, i, j, ndentry;
15019+ struct au_dcsub_pages dpages;
15020+ struct au_dpage *dpage;
15021+ struct dentry **dentries;
1facf9fc 15022+
4a4d8108
AM
15023+ err = au_dpages_init(&dpages, GFP_NOFS);
15024+ if (unlikely(err))
15025+ goto out;
15026+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
15027+ if (unlikely(err))
15028+ goto out_dpages;
1facf9fc 15029+
4a4d8108
AM
15030+ for (i = 0; i < dpages.ndpage; i++) {
15031+ dpage = dpages.dpages + i;
15032+ dentries = dpage->dentries;
15033+ ndentry = dpage->ndentry;
15034+ for (j = 0; j < ndentry; j++) {
15035+ struct dentry *d;
15036+
15037+ d = dentries[j];
15038+ if (IS_ROOT(d))
15039+ continue;
15040+
4a4d8108 15041+ au_digen_dec(d);
5527c038 15042+ if (d_really_is_positive(d))
4a4d8108
AM
15043+ /* todo: reset children xino?
15044+ cached children only? */
5527c038 15045+ au_iigen_dec(d_inode(d));
1308ab2a 15046+ }
dece6358 15047+ }
1facf9fc 15048+
4f0767ce 15049+out_dpages:
4a4d8108 15050+ au_dpages_free(&dpages);
dece6358 15051+
027c5e7a 15052+#if 0
4a4d8108
AM
15053+ /* discard children */
15054+ dentry_unhash(dentry);
15055+ dput(dentry);
027c5e7a 15056+#endif
4f0767ce 15057+out:
dece6358
AM
15058+ return err;
15059+}
15060+
1308ab2a 15061+/*
4a4d8108 15062+ * return 0 if processed.
1308ab2a 15063+ */
4a4d8108
AM
15064+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
15065+ const unsigned int isdir)
dece6358 15066+{
1308ab2a 15067+ int err;
4a4d8108
AM
15068+ struct dentry *d;
15069+ struct qstr *dname;
1facf9fc 15070+
4a4d8108
AM
15071+ err = 1;
15072+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15073+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15074+ err = 0;
15075+ goto out;
15076+ }
dece6358 15077+
4a4d8108
AM
15078+ if (!isdir) {
15079+ AuDebugOn(!name);
15080+ au_iigen_dec(inode);
027c5e7a 15081+ spin_lock(&inode->i_lock);
c1595e42 15082+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 15083+ spin_lock(&d->d_lock);
4a4d8108
AM
15084+ dname = &d->d_name;
15085+ if (dname->len != nlen
027c5e7a
AM
15086+ && memcmp(dname->name, name, nlen)) {
15087+ spin_unlock(&d->d_lock);
4a4d8108 15088+ continue;
027c5e7a 15089+ }
4a4d8108 15090+ err = 0;
4a4d8108
AM
15091+ au_digen_dec(d);
15092+ spin_unlock(&d->d_lock);
15093+ break;
1facf9fc 15094+ }
027c5e7a 15095+ spin_unlock(&inode->i_lock);
1308ab2a 15096+ } else {
027c5e7a 15097+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 15098+ d = d_find_any_alias(inode);
4a4d8108
AM
15099+ if (!d) {
15100+ au_iigen_dec(inode);
15101+ goto out;
15102+ }
1facf9fc 15103+
027c5e7a 15104+ spin_lock(&d->d_lock);
4a4d8108 15105+ dname = &d->d_name;
027c5e7a
AM
15106+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15107+ spin_unlock(&d->d_lock);
4a4d8108 15108+ err = hn_gen_tree(d);
027c5e7a
AM
15109+ spin_lock(&d->d_lock);
15110+ }
15111+ spin_unlock(&d->d_lock);
4a4d8108
AM
15112+ dput(d);
15113+ }
1facf9fc 15114+
4f0767ce 15115+out:
4a4d8108 15116+ AuTraceErr(err);
1308ab2a 15117+ return err;
15118+}
dece6358 15119+
4a4d8108 15120+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 15121+{
4a4d8108 15122+ int err;
1facf9fc 15123+
5527c038 15124+ if (IS_ROOT(dentry)) {
0c3ec466 15125+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15126+ return 0;
15127+ }
1308ab2a 15128+
4a4d8108
AM
15129+ err = 0;
15130+ if (!isdir) {
4a4d8108 15131+ au_digen_dec(dentry);
5527c038
JR
15132+ if (d_really_is_positive(dentry))
15133+ au_iigen_dec(d_inode(dentry));
4a4d8108 15134+ } else {
027c5e7a 15135+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
5527c038 15136+ if (d_really_is_positive(dentry))
4a4d8108
AM
15137+ err = hn_gen_tree(dentry);
15138+ }
15139+
15140+ AuTraceErr(err);
15141+ return err;
1facf9fc 15142+}
15143+
4a4d8108 15144+/* ---------------------------------------------------------------------- */
1facf9fc 15145+
4a4d8108
AM
15146+/* hnotify job flags */
15147+#define AuHnJob_XINO0 1
15148+#define AuHnJob_GEN (1 << 1)
15149+#define AuHnJob_DIRENT (1 << 2)
15150+#define AuHnJob_ISDIR (1 << 3)
15151+#define AuHnJob_TRYXINO0 (1 << 4)
15152+#define AuHnJob_MNTPNT (1 << 5)
15153+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
15154+#define au_fset_hnjob(flags, name) \
15155+ do { (flags) |= AuHnJob_##name; } while (0)
15156+#define au_fclr_hnjob(flags, name) \
15157+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 15158+
4a4d8108
AM
15159+enum {
15160+ AuHn_CHILD,
15161+ AuHn_PARENT,
15162+ AuHnLast
15163+};
1facf9fc 15164+
4a4d8108
AM
15165+struct au_hnotify_args {
15166+ struct inode *h_dir, *dir, *h_child_inode;
15167+ u32 mask;
15168+ unsigned int flags[AuHnLast];
15169+ unsigned int h_child_nlen;
15170+ char h_child_name[];
15171+};
1facf9fc 15172+
4a4d8108
AM
15173+struct hn_job_args {
15174+ unsigned int flags;
15175+ struct inode *inode, *h_inode, *dir, *h_dir;
15176+ struct dentry *dentry;
15177+ char *h_name;
15178+ int h_nlen;
15179+};
1308ab2a 15180+
4a4d8108
AM
15181+static int hn_job(struct hn_job_args *a)
15182+{
15183+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 15184+ int e;
1308ab2a 15185+
4a4d8108
AM
15186+ /* reset xino */
15187+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15188+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 15189+
4a4d8108
AM
15190+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15191+ && a->inode
15192+ && a->h_inode) {
15193+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
38d290e6
JR
15194+ if (!a->h_inode->i_nlink
15195+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108
AM
15196+ hn_xino(a->inode, a->h_inode); /* ignore this error */
15197+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 15198+ }
1facf9fc 15199+
4a4d8108
AM
15200+ /* make the generation obsolete */
15201+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 15202+ e = -1;
4a4d8108 15203+ if (a->inode)
076b876e 15204+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 15205+ isdir);
076b876e 15206+ if (e && a->dentry)
4a4d8108
AM
15207+ hn_gen_by_name(a->dentry, isdir);
15208+ /* ignore this error */
1facf9fc 15209+ }
1facf9fc 15210+
4a4d8108
AM
15211+ /* make dir entries obsolete */
15212+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15213+ struct au_vdir *vdir;
1facf9fc 15214+
4a4d8108
AM
15215+ vdir = au_ivdir(a->inode);
15216+ if (vdir)
15217+ vdir->vd_jiffy = 0;
15218+ /* IMustLock(a->inode); */
15219+ /* a->inode->i_version++; */
15220+ }
1facf9fc 15221+
4a4d8108
AM
15222+ /* can do nothing but warn */
15223+ if (au_ftest_hnjob(a->flags, MNTPNT)
15224+ && a->dentry
15225+ && d_mountpoint(a->dentry))
523b37e3 15226+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 15227+
4a4d8108 15228+ return 0;
1308ab2a 15229+}
1facf9fc 15230+
1308ab2a 15231+/* ---------------------------------------------------------------------- */
1facf9fc 15232+
4a4d8108
AM
15233+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15234+ struct inode *dir)
1308ab2a 15235+{
4a4d8108
AM
15236+ struct dentry *dentry, *d, *parent;
15237+ struct qstr *dname;
1308ab2a 15238+
c1595e42 15239+ parent = d_find_any_alias(dir);
4a4d8108
AM
15240+ if (!parent)
15241+ return NULL;
1308ab2a 15242+
4a4d8108 15243+ dentry = NULL;
027c5e7a 15244+ spin_lock(&parent->d_lock);
c1595e42 15245+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 15246+ /* AuDbg("%pd\n", d); */
027c5e7a 15247+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
15248+ dname = &d->d_name;
15249+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
15250+ goto cont_unlock;
15251+ if (au_di(d))
15252+ au_digen_dec(d);
15253+ else
15254+ goto cont_unlock;
c1595e42 15255+ if (au_dcount(d) > 0) {
027c5e7a 15256+ dentry = dget_dlock(d);
4a4d8108 15257+ spin_unlock(&d->d_lock);
027c5e7a 15258+ break;
dece6358 15259+ }
1facf9fc 15260+
f6b6e03d 15261+cont_unlock:
027c5e7a 15262+ spin_unlock(&d->d_lock);
1308ab2a 15263+ }
027c5e7a 15264+ spin_unlock(&parent->d_lock);
4a4d8108 15265+ dput(parent);
1facf9fc 15266+
4a4d8108
AM
15267+ if (dentry)
15268+ di_write_lock_child(dentry);
1308ab2a 15269+
4a4d8108
AM
15270+ return dentry;
15271+}
dece6358 15272+
4a4d8108
AM
15273+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15274+ aufs_bindex_t bindex, ino_t h_ino)
15275+{
15276+ struct inode *inode;
15277+ ino_t ino;
15278+ int err;
15279+
15280+ inode = NULL;
15281+ err = au_xino_read(sb, bindex, h_ino, &ino);
15282+ if (!err && ino)
15283+ inode = ilookup(sb, ino);
15284+ if (!inode)
15285+ goto out;
15286+
15287+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15288+ pr_warn("wrong root branch\n");
4a4d8108
AM
15289+ iput(inode);
15290+ inode = NULL;
15291+ goto out;
1308ab2a 15292+ }
15293+
4a4d8108 15294+ ii_write_lock_child(inode);
1308ab2a 15295+
4f0767ce 15296+out:
4a4d8108 15297+ return inode;
dece6358
AM
15298+}
15299+
4a4d8108 15300+static void au_hn_bh(void *_args)
1facf9fc 15301+{
4a4d8108
AM
15302+ struct au_hnotify_args *a = _args;
15303+ struct super_block *sb;
15304+ aufs_bindex_t bindex, bend, bfound;
15305+ unsigned char xino, try_iput;
1facf9fc 15306+ int err;
1308ab2a 15307+ struct inode *inode;
4a4d8108
AM
15308+ ino_t h_ino;
15309+ struct hn_job_args args;
15310+ struct dentry *dentry;
15311+ struct au_sbinfo *sbinfo;
1facf9fc 15312+
4a4d8108
AM
15313+ AuDebugOn(!_args);
15314+ AuDebugOn(!a->h_dir);
15315+ AuDebugOn(!a->dir);
15316+ AuDebugOn(!a->mask);
15317+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15318+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15319+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 15320+
4a4d8108
AM
15321+ inode = NULL;
15322+ dentry = NULL;
15323+ /*
15324+ * do not lock a->dir->i_mutex here
15325+ * because of d_revalidate() may cause a deadlock.
15326+ */
15327+ sb = a->dir->i_sb;
15328+ AuDebugOn(!sb);
15329+ sbinfo = au_sbi(sb);
15330+ AuDebugOn(!sbinfo);
7f207e10 15331+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 15332+
4a4d8108
AM
15333+ ii_read_lock_parent(a->dir);
15334+ bfound = -1;
15335+ bend = au_ibend(a->dir);
15336+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
15337+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15338+ bfound = bindex;
15339+ break;
15340+ }
15341+ ii_read_unlock(a->dir);
15342+ if (unlikely(bfound < 0))
15343+ goto out;
1facf9fc 15344+
4a4d8108
AM
15345+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15346+ h_ino = 0;
15347+ if (a->h_child_inode)
15348+ h_ino = a->h_child_inode->i_ino;
1facf9fc 15349+
4a4d8108
AM
15350+ if (a->h_child_nlen
15351+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15352+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15353+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15354+ a->dir);
15355+ try_iput = 0;
5527c038
JR
15356+ if (dentry && d_really_is_positive(dentry))
15357+ inode = d_inode(dentry);
4a4d8108
AM
15358+ if (xino && !inode && h_ino
15359+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15360+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15361+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15362+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15363+ try_iput = 1;
15364+ }
1facf9fc 15365+
4a4d8108
AM
15366+ args.flags = a->flags[AuHn_CHILD];
15367+ args.dentry = dentry;
15368+ args.inode = inode;
15369+ args.h_inode = a->h_child_inode;
15370+ args.dir = a->dir;
15371+ args.h_dir = a->h_dir;
15372+ args.h_name = a->h_child_name;
15373+ args.h_nlen = a->h_child_nlen;
15374+ err = hn_job(&args);
15375+ if (dentry) {
027c5e7a 15376+ if (au_di(dentry))
4a4d8108
AM
15377+ di_write_unlock(dentry);
15378+ dput(dentry);
15379+ }
15380+ if (inode && try_iput) {
15381+ ii_write_unlock(inode);
15382+ iput(inode);
15383+ }
1facf9fc 15384+
4a4d8108
AM
15385+ ii_write_lock_parent(a->dir);
15386+ args.flags = a->flags[AuHn_PARENT];
15387+ args.dentry = NULL;
15388+ args.inode = a->dir;
15389+ args.h_inode = a->h_dir;
15390+ args.dir = NULL;
15391+ args.h_dir = NULL;
15392+ args.h_name = NULL;
15393+ args.h_nlen = 0;
15394+ err = hn_job(&args);
15395+ ii_write_unlock(a->dir);
1facf9fc 15396+
4f0767ce 15397+out:
4a4d8108
AM
15398+ iput(a->h_child_inode);
15399+ iput(a->h_dir);
15400+ iput(a->dir);
027c5e7a
AM
15401+ si_write_unlock(sb);
15402+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 15403+ kfree(a);
dece6358 15404+}
1facf9fc 15405+
4a4d8108
AM
15406+/* ---------------------------------------------------------------------- */
15407+
15408+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15409+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 15410+{
4a4d8108 15411+ int err, len;
53392da6 15412+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
15413+ unsigned char isdir, isroot, wh;
15414+ struct inode *dir;
15415+ struct au_hnotify_args *args;
15416+ char *p, *h_child_name;
dece6358 15417+
1308ab2a 15418+ err = 0;
4a4d8108
AM
15419+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15420+ dir = igrab(hnotify->hn_aufs_inode);
15421+ if (!dir)
15422+ goto out;
1facf9fc 15423+
4a4d8108
AM
15424+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15425+ wh = 0;
15426+ h_child_name = (void *)h_child_qstr->name;
15427+ len = h_child_qstr->len;
15428+ if (h_child_name) {
15429+ if (len > AUFS_WH_PFX_LEN
15430+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15431+ h_child_name += AUFS_WH_PFX_LEN;
15432+ len -= AUFS_WH_PFX_LEN;
15433+ wh = 1;
15434+ }
1facf9fc 15435+ }
dece6358 15436+
4a4d8108
AM
15437+ isdir = 0;
15438+ if (h_child_inode)
15439+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15440+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15441+ flags[AuHn_CHILD] = 0;
15442+ if (isdir)
15443+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15444+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15445+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15446+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15447+ case FS_MOVED_FROM:
15448+ case FS_MOVED_TO:
15449+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15450+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15451+ /*FALLTHROUGH*/
15452+ case FS_CREATE:
fb47a38f 15453+ AuDebugOn(!h_child_name);
4a4d8108 15454+ break;
1facf9fc 15455+
4a4d8108
AM
15456+ case FS_DELETE:
15457+ /*
15458+ * aufs never be able to get this child inode.
15459+ * revalidation should be in d_revalidate()
15460+ * by checking i_nlink, i_generation or d_unhashed().
15461+ */
15462+ AuDebugOn(!h_child_name);
15463+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15464+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15465+ break;
dece6358 15466+
4a4d8108
AM
15467+ default:
15468+ AuDebugOn(1);
15469+ }
1308ab2a 15470+
4a4d8108
AM
15471+ if (wh)
15472+ h_child_inode = NULL;
1308ab2a 15473+
4a4d8108
AM
15474+ err = -ENOMEM;
15475+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 15476+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
15477+ if (unlikely(!args)) {
15478+ AuErr1("no memory\n");
15479+ iput(dir);
15480+ goto out;
15481+ }
15482+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15483+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15484+ args->mask = mask;
15485+ args->dir = dir;
15486+ args->h_dir = igrab(h_dir);
15487+ if (h_child_inode)
15488+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15489+ args->h_child_inode = h_child_inode;
15490+ args->h_child_nlen = len;
15491+ if (len) {
15492+ p = (void *)args;
15493+ p += sizeof(*args);
15494+ memcpy(p, h_child_name, len);
15495+ p[len] = 0;
1308ab2a 15496+ }
1308ab2a 15497+
38d290e6 15498+ /* NFS fires the event for silly-renamed one from kworker */
53392da6 15499+ f = 0;
38d290e6
JR
15500+ if (!dir->i_nlink
15501+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
53392da6
AM
15502+ f = AuWkq_NEST;
15503+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
15504+ if (unlikely(err)) {
15505+ pr_err("wkq %d\n", err);
15506+ iput(args->h_child_inode);
15507+ iput(args->h_dir);
15508+ iput(args->dir);
15509+ kfree(args);
1facf9fc 15510+ }
1facf9fc 15511+
4a4d8108 15512+out:
1facf9fc 15513+ return err;
15514+}
15515+
027c5e7a
AM
15516+/* ---------------------------------------------------------------------- */
15517+
15518+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15519+{
15520+ int err;
15521+
15522+ AuDebugOn(!(udba & AuOptMask_UDBA));
15523+
15524+ err = 0;
15525+ if (au_hnotify_op.reset_br)
15526+ err = au_hnotify_op.reset_br(udba, br, perm);
15527+
15528+ return err;
15529+}
15530+
15531+int au_hnotify_init_br(struct au_branch *br, int perm)
15532+{
15533+ int err;
15534+
15535+ err = 0;
15536+ if (au_hnotify_op.init_br)
15537+ err = au_hnotify_op.init_br(br, perm);
15538+
15539+ return err;
15540+}
15541+
15542+void au_hnotify_fin_br(struct au_branch *br)
15543+{
15544+ if (au_hnotify_op.fin_br)
15545+ au_hnotify_op.fin_br(br);
15546+}
15547+
4a4d8108
AM
15548+static void au_hn_destroy_cache(void)
15549+{
15550+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
15551+ au_cachep[AuCache_HNOTIFY] = NULL;
15552+}
1308ab2a 15553+
4a4d8108 15554+int __init au_hnotify_init(void)
1facf9fc 15555+{
1308ab2a 15556+ int err;
1308ab2a 15557+
4a4d8108
AM
15558+ err = -ENOMEM;
15559+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
15560+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
15561+ err = 0;
15562+ if (au_hnotify_op.init)
15563+ err = au_hnotify_op.init();
4a4d8108
AM
15564+ if (unlikely(err))
15565+ au_hn_destroy_cache();
1308ab2a 15566+ }
1308ab2a 15567+ AuTraceErr(err);
4a4d8108 15568+ return err;
1308ab2a 15569+}
15570+
4a4d8108 15571+void au_hnotify_fin(void)
1308ab2a 15572+{
027c5e7a
AM
15573+ if (au_hnotify_op.fin)
15574+ au_hnotify_op.fin();
4a4d8108
AM
15575+ /* cf. au_cache_fin() */
15576+ if (au_cachep[AuCache_HNOTIFY])
15577+ au_hn_destroy_cache();
dece6358 15578+}
7f207e10
AM
15579diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15580--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 15581+++ linux/fs/aufs/iinfo.c 2016-02-28 11:26:32.573304539 +0100
38d290e6 15582@@ -0,0 +1,277 @@
dece6358 15583+/*
8cdd5066 15584+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
15585+ *
15586+ * This program, aufs is free software; you can redistribute it and/or modify
15587+ * it under the terms of the GNU General Public License as published by
15588+ * the Free Software Foundation; either version 2 of the License, or
15589+ * (at your option) any later version.
15590+ *
15591+ * This program is distributed in the hope that it will be useful,
15592+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15593+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15594+ * GNU General Public License for more details.
15595+ *
15596+ * You should have received a copy of the GNU General Public License
523b37e3 15597+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15598+ */
1facf9fc 15599+
dece6358 15600+/*
4a4d8108 15601+ * inode private data
dece6358 15602+ */
1facf9fc 15603+
1308ab2a 15604+#include "aufs.h"
1facf9fc 15605+
4a4d8108 15606+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 15607+{
4a4d8108 15608+ struct inode *h_inode;
1facf9fc 15609+
4a4d8108 15610+ IiMustAnyLock(inode);
1facf9fc 15611+
4a4d8108
AM
15612+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
15613+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15614+ return h_inode;
15615+}
1facf9fc 15616+
4a4d8108
AM
15617+/* todo: hard/soft set? */
15618+void au_hiput(struct au_hinode *hinode)
15619+{
15620+ au_hn_free(hinode);
15621+ dput(hinode->hi_whdentry);
15622+ iput(hinode->hi_inode);
15623+}
1facf9fc 15624+
4a4d8108
AM
15625+unsigned int au_hi_flags(struct inode *inode, int isdir)
15626+{
15627+ unsigned int flags;
15628+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 15629+
4a4d8108
AM
15630+ flags = 0;
15631+ if (au_opt_test(mnt_flags, XINO))
15632+ au_fset_hi(flags, XINO);
15633+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15634+ au_fset_hi(flags, HNOTIFY);
15635+ return flags;
1facf9fc 15636+}
15637+
4a4d8108
AM
15638+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15639+ struct inode *h_inode, unsigned int flags)
1308ab2a 15640+{
4a4d8108
AM
15641+ struct au_hinode *hinode;
15642+ struct inode *hi;
15643+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 15644+
4a4d8108 15645+ IiMustWriteLock(inode);
dece6358 15646+
4a4d8108
AM
15647+ hinode = iinfo->ii_hinode + bindex;
15648+ hi = hinode->hi_inode;
15649+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15650+
15651+ if (hi)
15652+ au_hiput(hinode);
15653+ hinode->hi_inode = h_inode;
15654+ if (h_inode) {
15655+ int err;
15656+ struct super_block *sb = inode->i_sb;
15657+ struct au_branch *br;
15658+
027c5e7a
AM
15659+ AuDebugOn(inode->i_mode
15660+ && (h_inode->i_mode & S_IFMT)
15661+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
15662+ if (bindex == iinfo->ii_bstart)
15663+ au_cpup_igen(inode, h_inode);
15664+ br = au_sbr(sb, bindex);
15665+ hinode->hi_id = br->br_id;
15666+ if (au_ftest_hi(flags, XINO)) {
15667+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15668+ inode->i_ino);
15669+ if (unlikely(err))
15670+ AuIOErr1("failed au_xino_write() %d\n", err);
15671+ }
15672+
15673+ if (au_ftest_hi(flags, HNOTIFY)
15674+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 15675+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
15676+ if (unlikely(err))
15677+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 15678+ }
15679+ }
4a4d8108 15680+}
dece6358 15681+
4a4d8108
AM
15682+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15683+ struct dentry *h_wh)
15684+{
15685+ struct au_hinode *hinode;
dece6358 15686+
4a4d8108
AM
15687+ IiMustWriteLock(inode);
15688+
15689+ hinode = au_ii(inode)->ii_hinode + bindex;
15690+ AuDebugOn(hinode->hi_whdentry);
15691+ hinode->hi_whdentry = h_wh;
1facf9fc 15692+}
15693+
537831f9 15694+void au_update_iigen(struct inode *inode, int half)
1308ab2a 15695+{
537831f9
AM
15696+ struct au_iinfo *iinfo;
15697+ struct au_iigen *iigen;
15698+ unsigned int sigen;
15699+
15700+ sigen = au_sigen(inode->i_sb);
15701+ iinfo = au_ii(inode);
15702+ iigen = &iinfo->ii_generation;
be52b249 15703+ spin_lock(&iigen->ig_spin);
537831f9
AM
15704+ iigen->ig_generation = sigen;
15705+ if (half)
15706+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15707+ else
15708+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
be52b249 15709+ spin_unlock(&iigen->ig_spin);
4a4d8108 15710+}
1facf9fc 15711+
4a4d8108
AM
15712+/* it may be called at remount time, too */
15713+void au_update_ibrange(struct inode *inode, int do_put_zero)
15714+{
15715+ struct au_iinfo *iinfo;
027c5e7a 15716+ aufs_bindex_t bindex, bend;
1facf9fc 15717+
4a4d8108 15718+ iinfo = au_ii(inode);
027c5e7a 15719+ if (!iinfo)
4a4d8108 15720+ return;
1facf9fc 15721+
4a4d8108 15722+ IiMustWriteLock(inode);
1facf9fc 15723+
027c5e7a 15724+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
15725+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15726+ bindex++) {
15727+ struct inode *h_i;
1facf9fc 15728+
4a4d8108 15729+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
38d290e6
JR
15730+ if (h_i
15731+ && !h_i->i_nlink
15732+ && !(h_i->i_state & I_LINKABLE))
027c5e7a
AM
15733+ au_set_h_iptr(inode, bindex, NULL, 0);
15734+ }
4a4d8108
AM
15735+ }
15736+
027c5e7a
AM
15737+ iinfo->ii_bstart = -1;
15738+ iinfo->ii_bend = -1;
15739+ bend = au_sbend(inode->i_sb);
15740+ for (bindex = 0; bindex <= bend; bindex++)
15741+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15742+ iinfo->ii_bstart = bindex;
4a4d8108 15743+ break;
027c5e7a
AM
15744+ }
15745+ if (iinfo->ii_bstart >= 0)
15746+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
15747+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15748+ iinfo->ii_bend = bindex;
15749+ break;
15750+ }
15751+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 15752+}
1facf9fc 15753+
dece6358 15754+/* ---------------------------------------------------------------------- */
1facf9fc 15755+
4a4d8108 15756+void au_icntnr_init_once(void *_c)
dece6358 15757+{
4a4d8108
AM
15758+ struct au_icntnr *c = _c;
15759+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 15760+ static struct lock_class_key aufs_ii;
1facf9fc 15761+
be52b249 15762+ spin_lock_init(&iinfo->ii_generation.ig_spin);
4a4d8108 15763+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 15764+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
15765+ inode_init_once(&c->vfs_inode);
15766+}
1facf9fc 15767+
4a4d8108
AM
15768+int au_iinfo_init(struct inode *inode)
15769+{
15770+ struct au_iinfo *iinfo;
15771+ struct super_block *sb;
15772+ int nbr, i;
1facf9fc 15773+
4a4d8108
AM
15774+ sb = inode->i_sb;
15775+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15776+ nbr = au_sbend(sb) + 1;
15777+ if (unlikely(nbr <= 0))
15778+ nbr = 1;
15779+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
15780+ if (iinfo->ii_hinode) {
7f207e10 15781+ au_ninodes_inc(sb);
4a4d8108
AM
15782+ for (i = 0; i < nbr; i++)
15783+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 15784+
537831f9 15785+ iinfo->ii_generation.ig_generation = au_sigen(sb);
4a4d8108
AM
15786+ iinfo->ii_bstart = -1;
15787+ iinfo->ii_bend = -1;
15788+ iinfo->ii_vdir = NULL;
15789+ return 0;
1308ab2a 15790+ }
4a4d8108
AM
15791+ return -ENOMEM;
15792+}
1facf9fc 15793+
4a4d8108
AM
15794+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
15795+{
15796+ int err, sz;
15797+ struct au_hinode *hip;
1facf9fc 15798+
4a4d8108
AM
15799+ AuRwMustWriteLock(&iinfo->ii_rwsem);
15800+
15801+ err = -ENOMEM;
15802+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
15803+ if (!sz)
15804+ sz = sizeof(*hip);
15805+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
15806+ if (hip) {
15807+ iinfo->ii_hinode = hip;
15808+ err = 0;
1308ab2a 15809+ }
4a4d8108 15810+
1308ab2a 15811+ return err;
1facf9fc 15812+}
15813+
4a4d8108 15814+void au_iinfo_fin(struct inode *inode)
1facf9fc 15815+{
4a4d8108
AM
15816+ struct au_iinfo *iinfo;
15817+ struct au_hinode *hi;
15818+ struct super_block *sb;
b752ccd1
AM
15819+ aufs_bindex_t bindex, bend;
15820+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 15821+
4a4d8108
AM
15822+ iinfo = au_ii(inode);
15823+ /* bad_inode case */
15824+ if (!iinfo)
15825+ return;
1308ab2a 15826+
b752ccd1 15827+ sb = inode->i_sb;
7f207e10 15828+ au_ninodes_dec(sb);
b752ccd1
AM
15829+ if (si_pid_test(sb))
15830+ au_xino_delete_inode(inode, unlinked);
15831+ else {
15832+ /*
15833+ * it is safe to hide the dependency between sbinfo and
15834+ * sb->s_umount.
15835+ */
15836+ lockdep_off();
15837+ si_noflush_read_lock(sb);
15838+ au_xino_delete_inode(inode, unlinked);
15839+ si_read_unlock(sb);
15840+ lockdep_on();
15841+ }
15842+
4a4d8108
AM
15843+ if (iinfo->ii_vdir)
15844+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 15845+
b752ccd1
AM
15846+ bindex = iinfo->ii_bstart;
15847+ if (bindex >= 0) {
15848+ hi = iinfo->ii_hinode + bindex;
4a4d8108 15849+ bend = iinfo->ii_bend;
b752ccd1
AM
15850+ while (bindex++ <= bend) {
15851+ if (hi->hi_inode)
4a4d8108 15852+ au_hiput(hi);
4a4d8108
AM
15853+ hi++;
15854+ }
15855+ }
4a4d8108 15856+ kfree(iinfo->ii_hinode);
027c5e7a 15857+ iinfo->ii_hinode = NULL;
4a4d8108 15858+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 15859+}
7f207e10
AM
15860diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
15861--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 15862+++ linux/fs/aufs/inode.c 2016-02-28 11:26:32.573304539 +0100
cfc41e69 15863@@ -0,0 +1,527 @@
4a4d8108 15864+/*
8cdd5066 15865+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
15866+ *
15867+ * This program, aufs is free software; you can redistribute it and/or modify
15868+ * it under the terms of the GNU General Public License as published by
15869+ * the Free Software Foundation; either version 2 of the License, or
15870+ * (at your option) any later version.
15871+ *
15872+ * This program is distributed in the hope that it will be useful,
15873+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15874+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15875+ * GNU General Public License for more details.
15876+ *
15877+ * You should have received a copy of the GNU General Public License
523b37e3 15878+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 15879+ */
1facf9fc 15880+
4a4d8108
AM
15881+/*
15882+ * inode functions
15883+ */
1facf9fc 15884+
4a4d8108 15885+#include "aufs.h"
1308ab2a 15886+
4a4d8108
AM
15887+struct inode *au_igrab(struct inode *inode)
15888+{
15889+ if (inode) {
15890+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 15891+ ihold(inode);
1facf9fc 15892+ }
4a4d8108
AM
15893+ return inode;
15894+}
1facf9fc 15895+
4a4d8108
AM
15896+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
15897+{
15898+ au_cpup_attr_all(inode, /*force*/0);
537831f9 15899+ au_update_iigen(inode, /*half*/1);
4a4d8108
AM
15900+ if (do_version)
15901+ inode->i_version++;
dece6358 15902+}
1facf9fc 15903+
027c5e7a 15904+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 15905+{
4a4d8108 15906+ int err, e;
027c5e7a 15907+ umode_t type;
4a4d8108 15908+ aufs_bindex_t bindex, new_bindex;
1308ab2a 15909+ struct super_block *sb;
4a4d8108 15910+ struct au_iinfo *iinfo;
027c5e7a 15911+ struct au_hinode *p, *q, tmp;
1facf9fc 15912+
4a4d8108 15913+ IiMustWriteLock(inode);
1facf9fc 15914+
027c5e7a 15915+ *update = 0;
4a4d8108 15916+ sb = inode->i_sb;
027c5e7a 15917+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
15918+ iinfo = au_ii(inode);
15919+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
15920+ if (unlikely(err))
1308ab2a 15921+ goto out;
1facf9fc 15922+
027c5e7a 15923+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 15924+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
15925+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15926+ bindex++, p++) {
15927+ if (!p->hi_inode)
15928+ continue;
1facf9fc 15929+
027c5e7a 15930+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
15931+ new_bindex = au_br_index(sb, p->hi_id);
15932+ if (new_bindex == bindex)
15933+ continue;
1facf9fc 15934+
4a4d8108 15935+ if (new_bindex < 0) {
027c5e7a 15936+ *update = 1;
4a4d8108
AM
15937+ au_hiput(p);
15938+ p->hi_inode = NULL;
15939+ continue;
1308ab2a 15940+ }
4a4d8108
AM
15941+
15942+ if (new_bindex < iinfo->ii_bstart)
15943+ iinfo->ii_bstart = new_bindex;
15944+ if (iinfo->ii_bend < new_bindex)
15945+ iinfo->ii_bend = new_bindex;
15946+ /* swap two lower inode, and loop again */
15947+ q = iinfo->ii_hinode + new_bindex;
15948+ tmp = *q;
15949+ *q = *p;
15950+ *p = tmp;
15951+ if (tmp.hi_inode) {
15952+ bindex--;
15953+ p--;
1308ab2a 15954+ }
15955+ }
4a4d8108
AM
15956+ au_update_ibrange(inode, /*do_put_zero*/0);
15957+ e = au_dy_irefresh(inode);
15958+ if (unlikely(e && !err))
15959+ err = e;
1facf9fc 15960+
4f0767ce 15961+out:
027c5e7a
AM
15962+ AuTraceErr(err);
15963+ return err;
15964+}
15965+
b95c5147
AM
15966+void au_refresh_iop(struct inode *inode, int force_getattr)
15967+{
15968+ int type;
15969+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
15970+ const struct inode_operations *iop
15971+ = force_getattr ? aufs_iop : sbi->si_iop_array;
15972+
15973+ if (inode->i_op == iop)
15974+ return;
15975+
15976+ switch (inode->i_mode & S_IFMT) {
15977+ case S_IFDIR:
15978+ type = AuIop_DIR;
15979+ break;
15980+ case S_IFLNK:
15981+ type = AuIop_SYMLINK;
15982+ break;
15983+ default:
15984+ type = AuIop_OTHER;
15985+ break;
15986+ }
15987+
15988+ inode->i_op = iop + type;
15989+ /* unnecessary smp_wmb() */
15990+}
15991+
027c5e7a
AM
15992+int au_refresh_hinode_self(struct inode *inode)
15993+{
15994+ int err, update;
15995+
15996+ err = au_ii_refresh(inode, &update);
15997+ if (!err)
15998+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
15999+
16000+ AuTraceErr(err);
4a4d8108
AM
16001+ return err;
16002+}
1facf9fc 16003+
4a4d8108
AM
16004+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
16005+{
027c5e7a 16006+ int err, e, update;
4a4d8108 16007+ unsigned int flags;
027c5e7a 16008+ umode_t mode;
4a4d8108 16009+ aufs_bindex_t bindex, bend;
027c5e7a 16010+ unsigned char isdir;
4a4d8108
AM
16011+ struct au_hinode *p;
16012+ struct au_iinfo *iinfo;
1facf9fc 16013+
027c5e7a 16014+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
16015+ if (unlikely(err))
16016+ goto out;
16017+
16018+ update = 0;
16019+ iinfo = au_ii(inode);
16020+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
16021+ mode = (inode->i_mode & S_IFMT);
16022+ isdir = S_ISDIR(mode);
4a4d8108
AM
16023+ flags = au_hi_flags(inode, isdir);
16024+ bend = au_dbend(dentry);
16025+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
5527c038 16026+ struct inode *h_i, *h_inode;
4a4d8108
AM
16027+ struct dentry *h_d;
16028+
16029+ h_d = au_h_dptr(dentry, bindex);
5527c038 16030+ if (!h_d || d_is_negative(h_d))
4a4d8108
AM
16031+ continue;
16032+
5527c038
JR
16033+ h_inode = d_inode(h_d);
16034+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
4a4d8108
AM
16035+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
16036+ h_i = au_h_iptr(inode, bindex);
16037+ if (h_i) {
5527c038 16038+ if (h_i == h_inode)
4a4d8108
AM
16039+ continue;
16040+ err = -EIO;
16041+ break;
16042+ }
16043+ }
16044+ if (bindex < iinfo->ii_bstart)
16045+ iinfo->ii_bstart = bindex;
16046+ if (iinfo->ii_bend < bindex)
16047+ iinfo->ii_bend = bindex;
5527c038 16048+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
4a4d8108 16049+ update = 1;
1308ab2a 16050+ }
4a4d8108
AM
16051+ au_update_ibrange(inode, /*do_put_zero*/0);
16052+ e = au_dy_irefresh(inode);
16053+ if (unlikely(e && !err))
16054+ err = e;
027c5e7a
AM
16055+ if (!err)
16056+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 16057+
4f0767ce 16058+out:
4a4d8108 16059+ AuTraceErr(err);
1308ab2a 16060+ return err;
dece6358
AM
16061+}
16062+
4a4d8108 16063+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 16064+{
4a4d8108
AM
16065+ int err;
16066+ unsigned int flags;
16067+ umode_t mode;
16068+ aufs_bindex_t bindex, bstart, btail;
16069+ unsigned char isdir;
16070+ struct dentry *h_dentry;
16071+ struct inode *h_inode;
16072+ struct au_iinfo *iinfo;
b95c5147 16073+ struct inode_operations *iop;
dece6358 16074+
4a4d8108 16075+ IiMustWriteLock(inode);
dece6358 16076+
4a4d8108
AM
16077+ err = 0;
16078+ isdir = 0;
b95c5147 16079+ iop = au_sbi(inode->i_sb)->si_iop_array;
4a4d8108 16080+ bstart = au_dbstart(dentry);
5527c038
JR
16081+ h_dentry = au_h_dptr(dentry, bstart);
16082+ h_inode = d_inode(h_dentry);
4a4d8108
AM
16083+ mode = h_inode->i_mode;
16084+ switch (mode & S_IFMT) {
16085+ case S_IFREG:
16086+ btail = au_dbtail(dentry);
b95c5147 16087+ inode->i_op = iop + AuIop_OTHER;
4a4d8108
AM
16088+ inode->i_fop = &aufs_file_fop;
16089+ err = au_dy_iaop(inode, bstart, h_inode);
16090+ if (unlikely(err))
16091+ goto out;
16092+ break;
16093+ case S_IFDIR:
16094+ isdir = 1;
16095+ btail = au_dbtaildir(dentry);
b95c5147 16096+ inode->i_op = iop + AuIop_DIR;
4a4d8108
AM
16097+ inode->i_fop = &aufs_dir_fop;
16098+ break;
16099+ case S_IFLNK:
16100+ btail = au_dbtail(dentry);
b95c5147 16101+ inode->i_op = iop + AuIop_SYMLINK;
4a4d8108
AM
16102+ break;
16103+ case S_IFBLK:
16104+ case S_IFCHR:
16105+ case S_IFIFO:
16106+ case S_IFSOCK:
16107+ btail = au_dbtail(dentry);
b95c5147 16108+ inode->i_op = iop + AuIop_OTHER;
38d290e6 16109+ init_special_inode(inode, mode, h_inode->i_rdev);
4a4d8108
AM
16110+ break;
16111+ default:
16112+ AuIOErr("Unknown file type 0%o\n", mode);
16113+ err = -EIO;
1308ab2a 16114+ goto out;
4a4d8108 16115+ }
dece6358 16116+
4a4d8108
AM
16117+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16118+ flags = au_hi_flags(inode, isdir);
16119+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16120+ && au_ftest_hi(flags, HNOTIFY)
16121+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16122+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16123+ au_fclr_hi(flags, HNOTIFY);
16124+ iinfo = au_ii(inode);
16125+ iinfo->ii_bstart = bstart;
16126+ iinfo->ii_bend = btail;
16127+ for (bindex = bstart; bindex <= btail; bindex++) {
16128+ h_dentry = au_h_dptr(dentry, bindex);
16129+ if (h_dentry)
16130+ au_set_h_iptr(inode, bindex,
5527c038 16131+ au_igrab(d_inode(h_dentry)), flags);
4a4d8108
AM
16132+ }
16133+ au_cpup_attr_all(inode, /*force*/1);
c1595e42
JR
16134+ /*
16135+ * to force calling aufs_get_acl() every time,
16136+ * do not call cache_no_acl() for aufs inode.
16137+ */
dece6358 16138+
4f0767ce 16139+out:
4a4d8108
AM
16140+ return err;
16141+}
dece6358 16142+
027c5e7a
AM
16143+/*
16144+ * successful returns with iinfo write_locked
16145+ * minus: errno
16146+ * zero: success, matched
16147+ * plus: no error, but unmatched
16148+ */
16149+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
16150+{
16151+ int err;
cfc41e69 16152+ unsigned int gen, igflags;
4a4d8108
AM
16153+ aufs_bindex_t bindex, bend;
16154+ struct inode *h_inode, *h_dinode;
5527c038 16155+ struct dentry *h_dentry;
dece6358 16156+
4a4d8108
AM
16157+ /*
16158+ * before this function, if aufs got any iinfo lock, it must be only
16159+ * one, the parent dir.
16160+ * it can happen by UDBA and the obsoleted inode number.
16161+ */
16162+ err = -EIO;
16163+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16164+ goto out;
16165+
027c5e7a 16166+ err = 1;
4a4d8108 16167+ ii_write_lock_new_child(inode);
5527c038
JR
16168+ h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
16169+ h_dinode = d_inode(h_dentry);
4a4d8108
AM
16170+ bend = au_ibend(inode);
16171+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
16172+ h_inode = au_h_iptr(inode, bindex);
537831f9
AM
16173+ if (!h_inode || h_inode != h_dinode)
16174+ continue;
16175+
16176+ err = 0;
cfc41e69 16177+ gen = au_iigen(inode, &igflags);
537831f9 16178+ if (gen == au_digen(dentry)
cfc41e69 16179+ && !au_ig_ftest(igflags, HALF_REFRESHED))
4a4d8108 16180+ break;
537831f9
AM
16181+
16182+ /* fully refresh inode using dentry */
16183+ err = au_refresh_hinode(inode, dentry);
16184+ if (!err)
16185+ au_update_iigen(inode, /*half*/0);
16186+ break;
1facf9fc 16187+ }
dece6358 16188+
4a4d8108
AM
16189+ if (unlikely(err))
16190+ ii_write_unlock(inode);
4f0767ce 16191+out:
1facf9fc 16192+ return err;
16193+}
1facf9fc 16194+
4a4d8108
AM
16195+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16196+ unsigned int d_type, ino_t *ino)
1facf9fc 16197+{
4a4d8108
AM
16198+ int err;
16199+ struct mutex *mtx;
1facf9fc 16200+
b752ccd1 16201+ /* prevent hardlinked inode number from race condition */
4a4d8108 16202+ mtx = NULL;
b752ccd1 16203+ if (d_type != DT_DIR) {
4a4d8108
AM
16204+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16205+ mutex_lock(mtx);
16206+ }
16207+ err = au_xino_read(sb, bindex, h_ino, ino);
16208+ if (unlikely(err))
16209+ goto out;
1308ab2a 16210+
4a4d8108
AM
16211+ if (!*ino) {
16212+ err = -EIO;
16213+ *ino = au_xino_new_ino(sb);
16214+ if (unlikely(!*ino))
1facf9fc 16215+ goto out;
4a4d8108
AM
16216+ err = au_xino_write(sb, bindex, h_ino, *ino);
16217+ if (unlikely(err))
1308ab2a 16218+ goto out;
1308ab2a 16219+ }
1facf9fc 16220+
4f0767ce 16221+out:
b752ccd1 16222+ if (mtx)
4a4d8108 16223+ mutex_unlock(mtx);
1facf9fc 16224+ return err;
16225+}
16226+
4a4d8108
AM
16227+/* successful returns with iinfo write_locked */
16228+/* todo: return with unlocked? */
16229+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 16230+{
5527c038 16231+ struct inode *inode, *h_inode;
4a4d8108
AM
16232+ struct dentry *h_dentry;
16233+ struct super_block *sb;
b752ccd1 16234+ struct mutex *mtx;
4a4d8108 16235+ ino_t h_ino, ino;
1716fcea 16236+ int err;
4a4d8108 16237+ aufs_bindex_t bstart;
1facf9fc 16238+
4a4d8108
AM
16239+ sb = dentry->d_sb;
16240+ bstart = au_dbstart(dentry);
16241+ h_dentry = au_h_dptr(dentry, bstart);
5527c038
JR
16242+ h_inode = d_inode(h_dentry);
16243+ h_ino = h_inode->i_ino;
b752ccd1
AM
16244+
16245+ /*
16246+ * stop 'race'-ing between hardlinks under different
16247+ * parents.
16248+ */
16249+ mtx = NULL;
2000de60 16250+ if (!d_is_dir(h_dentry))
b752ccd1
AM
16251+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
16252+
4f0767ce 16253+new_ino:
b752ccd1
AM
16254+ if (mtx)
16255+ mutex_lock(mtx);
4a4d8108
AM
16256+ err = au_xino_read(sb, bstart, h_ino, &ino);
16257+ inode = ERR_PTR(err);
16258+ if (unlikely(err))
16259+ goto out;
b752ccd1 16260+
4a4d8108
AM
16261+ if (!ino) {
16262+ ino = au_xino_new_ino(sb);
16263+ if (unlikely(!ino)) {
16264+ inode = ERR_PTR(-EIO);
dece6358
AM
16265+ goto out;
16266+ }
16267+ }
1facf9fc 16268+
4a4d8108
AM
16269+ AuDbg("i%lu\n", (unsigned long)ino);
16270+ inode = au_iget_locked(sb, ino);
16271+ err = PTR_ERR(inode);
16272+ if (IS_ERR(inode))
1facf9fc 16273+ goto out;
1facf9fc 16274+
4a4d8108
AM
16275+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16276+ if (inode->i_state & I_NEW) {
1716fcea 16277+ /* verbose coding for lock class name */
2000de60 16278+ if (unlikely(d_is_symlink(h_dentry)))
1716fcea
AM
16279+ au_rw_class(&au_ii(inode)->ii_rwsem,
16280+ au_lc_key + AuLcSymlink_IIINFO);
2000de60 16281+ else if (unlikely(d_is_dir(h_dentry)))
1716fcea
AM
16282+ au_rw_class(&au_ii(inode)->ii_rwsem,
16283+ au_lc_key + AuLcDir_IIINFO);
16284+ else /* likely */
16285+ au_rw_class(&au_ii(inode)->ii_rwsem,
16286+ au_lc_key + AuLcNonDir_IIINFO);
2dfbb274 16287+
4a4d8108
AM
16288+ ii_write_lock_new_child(inode);
16289+ err = set_inode(inode, dentry);
16290+ if (!err) {
16291+ unlock_new_inode(inode);
16292+ goto out; /* success */
16293+ }
1308ab2a 16294+
027c5e7a
AM
16295+ /*
16296+ * iget_failed() calls iput(), but we need to call
16297+ * ii_write_unlock() after iget_failed(). so dirty hack for
16298+ * i_count.
16299+ */
16300+ atomic_inc(&inode->i_count);
4a4d8108 16301+ iget_failed(inode);
027c5e7a
AM
16302+ ii_write_unlock(inode);
16303+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
16304+ /* ignore this error */
16305+ goto out_iput;
16306+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
16307+ /*
16308+ * horrible race condition between lookup, readdir and copyup
16309+ * (or something).
16310+ */
16311+ if (mtx)
16312+ mutex_unlock(mtx);
027c5e7a
AM
16313+ err = reval_inode(inode, dentry);
16314+ if (unlikely(err < 0)) {
16315+ mtx = NULL;
16316+ goto out_iput;
16317+ }
16318+
b752ccd1
AM
16319+ if (!err) {
16320+ mtx = NULL;
4a4d8108 16321+ goto out; /* success */
b752ccd1
AM
16322+ } else if (mtx)
16323+ mutex_lock(mtx);
4a4d8108
AM
16324+ }
16325+
5527c038 16326+ if (unlikely(au_test_fs_unique_ino(h_inode)))
4a4d8108 16327+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
523b37e3
AM
16328+ " b%d, %s, %pd, hi%lu, i%lu.\n",
16329+ bstart, au_sbtype(h_dentry->d_sb), dentry,
4a4d8108
AM
16330+ (unsigned long)h_ino, (unsigned long)ino);
16331+ ino = 0;
16332+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
16333+ if (!err) {
16334+ iput(inode);
b752ccd1
AM
16335+ if (mtx)
16336+ mutex_unlock(mtx);
4a4d8108
AM
16337+ goto new_ino;
16338+ }
1308ab2a 16339+
4f0767ce 16340+out_iput:
4a4d8108 16341+ iput(inode);
4a4d8108 16342+ inode = ERR_PTR(err);
4f0767ce 16343+out:
b752ccd1
AM
16344+ if (mtx)
16345+ mutex_unlock(mtx);
4a4d8108 16346+ return inode;
1facf9fc 16347+}
16348+
4a4d8108 16349+/* ---------------------------------------------------------------------- */
1facf9fc 16350+
4a4d8108
AM
16351+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16352+ struct inode *inode)
16353+{
16354+ int err;
076b876e 16355+ struct inode *hi;
1facf9fc 16356+
4a4d8108 16357+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 16358+
4a4d8108
AM
16359+ /* pseudo-link after flushed may happen out of bounds */
16360+ if (!err
16361+ && inode
16362+ && au_ibstart(inode) <= bindex
16363+ && bindex <= au_ibend(inode)) {
16364+ /*
16365+ * permission check is unnecessary since vfsub routine
16366+ * will be called later
16367+ */
076b876e 16368+ hi = au_h_iptr(inode, bindex);
4a4d8108
AM
16369+ if (hi)
16370+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 16371+ }
16372+
4a4d8108
AM
16373+ return err;
16374+}
dece6358 16375+
4a4d8108
AM
16376+int au_test_h_perm(struct inode *h_inode, int mask)
16377+{
2dfbb274 16378+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
4a4d8108
AM
16379+ return 0;
16380+ return inode_permission(h_inode, mask);
16381+}
1facf9fc 16382+
4a4d8108
AM
16383+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16384+{
16385+ if (au_test_nfs(h_inode->i_sb)
16386+ && (mask & MAY_WRITE)
16387+ && S_ISDIR(h_inode->i_mode))
16388+ mask |= MAY_READ; /* force permission check */
16389+ return au_test_h_perm(h_inode, mask);
1facf9fc 16390+}
7f207e10
AM
16391diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16392--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 16393+++ linux/fs/aufs/inode.h 2016-02-28 11:26:32.573304539 +0100
be52b249 16394@@ -0,0 +1,685 @@
4a4d8108 16395+/*
8cdd5066 16396+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
16397+ *
16398+ * This program, aufs is free software; you can redistribute it and/or modify
16399+ * it under the terms of the GNU General Public License as published by
16400+ * the Free Software Foundation; either version 2 of the License, or
16401+ * (at your option) any later version.
16402+ *
16403+ * This program is distributed in the hope that it will be useful,
16404+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16405+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16406+ * GNU General Public License for more details.
16407+ *
16408+ * You should have received a copy of the GNU General Public License
523b37e3 16409+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16410+ */
1facf9fc 16411+
1308ab2a 16412+/*
4a4d8108 16413+ * inode operations
1308ab2a 16414+ */
dece6358 16415+
4a4d8108
AM
16416+#ifndef __AUFS_INODE_H__
16417+#define __AUFS_INODE_H__
dece6358 16418+
4a4d8108 16419+#ifdef __KERNEL__
1308ab2a 16420+
4a4d8108 16421+#include <linux/fsnotify.h>
4a4d8108 16422+#include "rwsem.h"
1308ab2a 16423+
4a4d8108 16424+struct vfsmount;
1facf9fc 16425+
4a4d8108
AM
16426+struct au_hnotify {
16427+#ifdef CONFIG_AUFS_HNOTIFY
16428+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 16429+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 16430+ struct fsnotify_mark hn_mark;
4a4d8108 16431+#endif
7f207e10 16432+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
16433+#endif
16434+} ____cacheline_aligned_in_smp;
1facf9fc 16435+
4a4d8108
AM
16436+struct au_hinode {
16437+ struct inode *hi_inode;
16438+ aufs_bindex_t hi_id;
16439+#ifdef CONFIG_AUFS_HNOTIFY
16440+ struct au_hnotify *hi_notify;
16441+#endif
dece6358 16442+
4a4d8108
AM
16443+ /* reference to the copied-up whiteout with get/put */
16444+ struct dentry *hi_whdentry;
16445+};
dece6358 16446+
537831f9
AM
16447+/* ig_flags */
16448+#define AuIG_HALF_REFRESHED 1
16449+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16450+#define au_ig_fset(flags, name) \
16451+ do { (flags) |= AuIG_##name; } while (0)
16452+#define au_ig_fclr(flags, name) \
16453+ do { (flags) &= ~AuIG_##name; } while (0)
16454+
16455+struct au_iigen {
be52b249 16456+ spinlock_t ig_spin;
537831f9
AM
16457+ __u32 ig_generation, ig_flags;
16458+};
16459+
4a4d8108
AM
16460+struct au_vdir;
16461+struct au_iinfo {
7a9e40b8 16462+ struct au_iigen ii_generation;
4a4d8108 16463+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 16464+
4a4d8108
AM
16465+ struct au_rwsem ii_rwsem;
16466+ aufs_bindex_t ii_bstart, ii_bend;
16467+ __u32 ii_higen;
16468+ struct au_hinode *ii_hinode;
16469+ struct au_vdir *ii_vdir;
16470+};
1facf9fc 16471+
4a4d8108
AM
16472+struct au_icntnr {
16473+ struct au_iinfo iinfo;
16474+ struct inode vfs_inode;
16475+} ____cacheline_aligned_in_smp;
1308ab2a 16476+
4a4d8108
AM
16477+/* au_pin flags */
16478+#define AuPin_DI_LOCKED 1
16479+#define AuPin_MNT_WRITE (1 << 1)
16480+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
16481+#define au_fset_pin(flags, name) \
16482+ do { (flags) |= AuPin_##name; } while (0)
16483+#define au_fclr_pin(flags, name) \
16484+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
16485+
16486+struct au_pin {
16487+ /* input */
16488+ struct dentry *dentry;
16489+ unsigned int udba;
16490+ unsigned char lsc_di, lsc_hi, flags;
16491+ aufs_bindex_t bindex;
16492+
16493+ /* output */
16494+ struct dentry *parent;
16495+ struct au_hinode *hdir;
16496+ struct vfsmount *h_mnt;
86dc4139
AM
16497+
16498+ /* temporary unlock/relock for copyup */
16499+ struct dentry *h_dentry, *h_parent;
16500+ struct au_branch *br;
16501+ struct task_struct *task;
4a4d8108 16502+};
1facf9fc 16503+
86dc4139 16504+void au_pin_hdir_unlock(struct au_pin *p);
c1595e42 16505+int au_pin_hdir_lock(struct au_pin *p);
86dc4139
AM
16506+int au_pin_hdir_relock(struct au_pin *p);
16507+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task);
16508+void au_pin_hdir_acquire_nest(struct au_pin *p);
16509+void au_pin_hdir_release(struct au_pin *p);
16510+
1308ab2a 16511+/* ---------------------------------------------------------------------- */
16512+
4a4d8108 16513+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 16514+{
4a4d8108 16515+ struct au_iinfo *iinfo;
1facf9fc 16516+
4a4d8108
AM
16517+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
16518+ if (iinfo->ii_hinode)
16519+ return iinfo;
16520+ return NULL; /* debugging bad_inode case */
16521+}
1facf9fc 16522+
4a4d8108 16523+/* ---------------------------------------------------------------------- */
1facf9fc 16524+
4a4d8108
AM
16525+/* inode.c */
16526+struct inode *au_igrab(struct inode *inode);
b95c5147 16527+void au_refresh_iop(struct inode *inode, int force_getattr);
027c5e7a 16528+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
16529+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16530+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16531+ unsigned int d_type, ino_t *ino);
16532+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16533+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16534+ struct inode *inode);
16535+int au_test_h_perm(struct inode *h_inode, int mask);
16536+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 16537+
4a4d8108
AM
16538+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16539+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16540+{
16541+#ifdef CONFIG_AUFS_SHWH
16542+ return au_ino(sb, bindex, h_ino, d_type, ino);
16543+#else
16544+ return 0;
16545+#endif
16546+}
1facf9fc 16547+
4a4d8108 16548+/* i_op.c */
b95c5147
AM
16549+enum {
16550+ AuIop_SYMLINK,
16551+ AuIop_DIR,
16552+ AuIop_OTHER,
16553+ AuIop_Last
16554+};
16555+extern struct inode_operations aufs_iop[AuIop_Last],
16556+ aufs_iop_nogetattr[AuIop_Last];
1308ab2a 16557+
4a4d8108
AM
16558+/* au_wr_dir flags */
16559+#define AuWrDir_ADD_ENTRY 1
7e9cd9fe
AM
16560+#define AuWrDir_ISDIR (1 << 1)
16561+#define AuWrDir_TMPFILE (1 << 2)
4a4d8108 16562+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
16563+#define au_fset_wrdir(flags, name) \
16564+ do { (flags) |= AuWrDir_##name; } while (0)
16565+#define au_fclr_wrdir(flags, name) \
16566+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 16567+
4a4d8108
AM
16568+struct au_wr_dir_args {
16569+ aufs_bindex_t force_btgt;
16570+ unsigned char flags;
16571+};
16572+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16573+ struct au_wr_dir_args *args);
dece6358 16574+
4a4d8108
AM
16575+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16576+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16577+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16578+ unsigned int udba, unsigned char flags);
16579+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16580+ unsigned int udba, unsigned char flags) __must_check;
16581+int au_do_pin(struct au_pin *pin) __must_check;
16582+void au_unpin(struct au_pin *pin);
c1595e42
JR
16583+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16584+
16585+#define AuIcpup_DID_CPUP 1
16586+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16587+#define au_fset_icpup(flags, name) \
16588+ do { (flags) |= AuIcpup_##name; } while (0)
16589+#define au_fclr_icpup(flags, name) \
16590+ do { (flags) &= ~AuIcpup_##name; } while (0)
16591+
16592+struct au_icpup_args {
16593+ unsigned char flags;
16594+ unsigned char pin_flags;
16595+ aufs_bindex_t btgt;
16596+ unsigned int udba;
16597+ struct au_pin pin;
16598+ struct path h_path;
16599+ struct inode *h_inode;
16600+};
16601+
16602+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16603+ struct au_icpup_args *a);
16604+
16605+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
1facf9fc 16606+
4a4d8108
AM
16607+/* i_op_add.c */
16608+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16609+ struct dentry *h_parent, int isdir);
7eafdf33
AM
16610+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16611+ dev_t dev);
4a4d8108 16612+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
7eafdf33 16613+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 16614+ bool want_excl);
b912730e
AM
16615+struct vfsub_aopen_args;
16616+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16617+ struct vfsub_aopen_args *args);
38d290e6 16618+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
4a4d8108
AM
16619+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16620+ struct dentry *dentry);
7eafdf33 16621+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
1facf9fc 16622+
4a4d8108
AM
16623+/* i_op_del.c */
16624+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16625+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16626+ struct dentry *h_parent, int isdir);
16627+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16628+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 16629+
4a4d8108
AM
16630+/* i_op_ren.c */
16631+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16632+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
16633+ struct inode *dir, struct dentry *dentry);
1facf9fc 16634+
4a4d8108
AM
16635+/* iinfo.c */
16636+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16637+void au_hiput(struct au_hinode *hinode);
16638+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16639+ struct dentry *h_wh);
16640+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 16641+
4a4d8108
AM
16642+/* hinode flags */
16643+#define AuHi_XINO 1
16644+#define AuHi_HNOTIFY (1 << 1)
16645+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
16646+#define au_fset_hi(flags, name) \
16647+ do { (flags) |= AuHi_##name; } while (0)
16648+#define au_fclr_hi(flags, name) \
16649+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 16650+
4a4d8108
AM
16651+#ifndef CONFIG_AUFS_HNOTIFY
16652+#undef AuHi_HNOTIFY
16653+#define AuHi_HNOTIFY 0
16654+#endif
1facf9fc 16655+
4a4d8108
AM
16656+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16657+ struct inode *h_inode, unsigned int flags);
1facf9fc 16658+
537831f9 16659+void au_update_iigen(struct inode *inode, int half);
4a4d8108 16660+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 16661+
4a4d8108
AM
16662+void au_icntnr_init_once(void *_c);
16663+int au_iinfo_init(struct inode *inode);
16664+void au_iinfo_fin(struct inode *inode);
16665+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 16666+
e49829fe 16667+#ifdef CONFIG_PROC_FS
4a4d8108 16668+/* plink.c */
e49829fe 16669+int au_plink_maint(struct super_block *sb, int flags);
7e9cd9fe 16670+struct au_sbinfo;
e49829fe
JR
16671+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16672+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
16673+#ifdef CONFIG_AUFS_DEBUG
16674+void au_plink_list(struct super_block *sb);
16675+#else
16676+AuStubVoid(au_plink_list, struct super_block *sb)
16677+#endif
16678+int au_plink_test(struct inode *inode);
16679+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16680+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16681+ struct dentry *h_dentry);
e49829fe
JR
16682+void au_plink_put(struct super_block *sb, int verbose);
16683+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 16684+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
16685+#else
16686+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16687+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16688+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16689+AuStubVoid(au_plink_list, struct super_block *sb);
16690+AuStubInt0(au_plink_test, struct inode *inode);
16691+AuStub(struct dentry *, au_plink_lkup, return NULL,
16692+ struct inode *inode, aufs_bindex_t bindex);
16693+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16694+ struct dentry *h_dentry);
16695+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16696+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16697+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16698+#endif /* CONFIG_PROC_FS */
1facf9fc 16699+
c1595e42
JR
16700+#ifdef CONFIG_AUFS_XATTR
16701+/* xattr.c */
7e9cd9fe
AM
16702+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16703+ unsigned int verbose);
c1595e42
JR
16704+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
16705+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
16706+ size_t size);
16707+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
16708+ size_t size, int flags);
16709+int aufs_removexattr(struct dentry *dentry, const char *name);
16710+
16711+/* void au_xattr_init(struct super_block *sb); */
16712+#else
16713+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe 16714+ int ignore_flags, unsigned int verbose);
c1595e42
JR
16715+/* AuStubVoid(au_xattr_init, struct super_block *sb); */
16716+#endif
16717+
16718+#ifdef CONFIG_FS_POSIX_ACL
16719+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16720+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16721+#endif
16722+
16723+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16724+enum {
16725+ AU_XATTR_SET,
16726+ AU_XATTR_REMOVE,
16727+ AU_ACL_SET
16728+};
16729+
16730+struct au_srxattr {
16731+ int type;
16732+ union {
16733+ struct {
16734+ const char *name;
16735+ const void *value;
16736+ size_t size;
16737+ int flags;
16738+ } set;
16739+ struct {
16740+ const char *name;
16741+ } remove;
16742+ struct {
16743+ struct posix_acl *acl;
16744+ int type;
16745+ } acl_set;
16746+ } u;
16747+};
16748+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg);
16749+#endif
16750+
4a4d8108 16751+/* ---------------------------------------------------------------------- */
1308ab2a 16752+
4a4d8108
AM
16753+/* lock subclass for iinfo */
16754+enum {
16755+ AuLsc_II_CHILD, /* child first */
16756+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
16757+ AuLsc_II_CHILD3, /* copyup dirs */
16758+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
16759+ AuLsc_II_PARENT2,
16760+ AuLsc_II_PARENT3, /* copyup dirs */
16761+ AuLsc_II_NEW_CHILD
16762+};
1308ab2a 16763+
1facf9fc 16764+/*
4a4d8108
AM
16765+ * ii_read_lock_child, ii_write_lock_child,
16766+ * ii_read_lock_child2, ii_write_lock_child2,
16767+ * ii_read_lock_child3, ii_write_lock_child3,
16768+ * ii_read_lock_parent, ii_write_lock_parent,
16769+ * ii_read_lock_parent2, ii_write_lock_parent2,
16770+ * ii_read_lock_parent3, ii_write_lock_parent3,
16771+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 16772+ */
4a4d8108
AM
16773+#define AuReadLockFunc(name, lsc) \
16774+static inline void ii_read_lock_##name(struct inode *i) \
16775+{ \
16776+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16777+}
16778+
16779+#define AuWriteLockFunc(name, lsc) \
16780+static inline void ii_write_lock_##name(struct inode *i) \
16781+{ \
16782+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16783+}
16784+
16785+#define AuRWLockFuncs(name, lsc) \
16786+ AuReadLockFunc(name, lsc) \
16787+ AuWriteLockFunc(name, lsc)
16788+
16789+AuRWLockFuncs(child, CHILD);
16790+AuRWLockFuncs(child2, CHILD2);
16791+AuRWLockFuncs(child3, CHILD3);
16792+AuRWLockFuncs(parent, PARENT);
16793+AuRWLockFuncs(parent2, PARENT2);
16794+AuRWLockFuncs(parent3, PARENT3);
16795+AuRWLockFuncs(new_child, NEW_CHILD);
16796+
16797+#undef AuReadLockFunc
16798+#undef AuWriteLockFunc
16799+#undef AuRWLockFuncs
1facf9fc 16800+
16801+/*
4a4d8108 16802+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 16803+ */
4a4d8108 16804+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 16805+
4a4d8108
AM
16806+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
16807+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
16808+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 16809+
4a4d8108 16810+/* ---------------------------------------------------------------------- */
1308ab2a 16811+
027c5e7a
AM
16812+static inline void au_icntnr_init(struct au_icntnr *c)
16813+{
16814+#ifdef CONFIG_AUFS_DEBUG
16815+ c->vfs_inode.i_mode = 0;
16816+#endif
16817+}
16818+
cfc41e69 16819+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
4a4d8108 16820+{
537831f9
AM
16821+ unsigned int gen;
16822+ struct au_iinfo *iinfo;
be52b249 16823+ struct au_iigen *iigen;
537831f9
AM
16824+
16825+ iinfo = au_ii(inode);
be52b249
AM
16826+ iigen = &iinfo->ii_generation;
16827+ spin_lock(&iigen->ig_spin);
cfc41e69
AM
16828+ if (igflags)
16829+ *igflags = iigen->ig_flags;
be52b249
AM
16830+ gen = iigen->ig_generation;
16831+ spin_unlock(&iigen->ig_spin);
537831f9
AM
16832+
16833+ return gen;
4a4d8108 16834+}
1308ab2a 16835+
4a4d8108
AM
16836+/* tiny test for inode number */
16837+/* tmpfs generation is too rough */
16838+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
16839+{
16840+ struct au_iinfo *iinfo;
1308ab2a 16841+
4a4d8108
AM
16842+ iinfo = au_ii(inode);
16843+ AuRwMustAnyLock(&iinfo->ii_rwsem);
16844+ return !(iinfo->ii_hsb1 == h_inode->i_sb
16845+ && iinfo->ii_higen == h_inode->i_generation);
16846+}
1308ab2a 16847+
4a4d8108
AM
16848+static inline void au_iigen_dec(struct inode *inode)
16849+{
537831f9 16850+ struct au_iinfo *iinfo;
be52b249 16851+ struct au_iigen *iigen;
537831f9
AM
16852+
16853+ iinfo = au_ii(inode);
be52b249
AM
16854+ iigen = &iinfo->ii_generation;
16855+ spin_lock(&iigen->ig_spin);
16856+ iigen->ig_generation--;
16857+ spin_unlock(&iigen->ig_spin);
027c5e7a
AM
16858+}
16859+
16860+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
16861+{
16862+ int err;
16863+
16864+ err = 0;
537831f9 16865+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
027c5e7a
AM
16866+ err = -EIO;
16867+
16868+ return err;
4a4d8108 16869+}
1308ab2a 16870+
4a4d8108 16871+/* ---------------------------------------------------------------------- */
1308ab2a 16872+
4a4d8108
AM
16873+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
16874+ aufs_bindex_t bindex)
16875+{
16876+ IiMustAnyLock(inode);
16877+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
16878+}
1308ab2a 16879+
4a4d8108
AM
16880+static inline aufs_bindex_t au_ibstart(struct inode *inode)
16881+{
16882+ IiMustAnyLock(inode);
16883+ return au_ii(inode)->ii_bstart;
16884+}
1308ab2a 16885+
4a4d8108
AM
16886+static inline aufs_bindex_t au_ibend(struct inode *inode)
16887+{
16888+ IiMustAnyLock(inode);
16889+ return au_ii(inode)->ii_bend;
16890+}
1308ab2a 16891+
4a4d8108
AM
16892+static inline struct au_vdir *au_ivdir(struct inode *inode)
16893+{
16894+ IiMustAnyLock(inode);
16895+ return au_ii(inode)->ii_vdir;
16896+}
1308ab2a 16897+
4a4d8108
AM
16898+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
16899+{
16900+ IiMustAnyLock(inode);
16901+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
16902+}
1308ab2a 16903+
4a4d8108 16904+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16905+{
4a4d8108
AM
16906+ IiMustWriteLock(inode);
16907+ au_ii(inode)->ii_bstart = bindex;
16908+}
1308ab2a 16909+
4a4d8108
AM
16910+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
16911+{
16912+ IiMustWriteLock(inode);
16913+ au_ii(inode)->ii_bend = bindex;
1308ab2a 16914+}
16915+
4a4d8108
AM
16916+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
16917+{
16918+ IiMustWriteLock(inode);
16919+ au_ii(inode)->ii_vdir = vdir;
16920+}
1facf9fc 16921+
4a4d8108 16922+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16923+{
4a4d8108
AM
16924+ IiMustAnyLock(inode);
16925+ return au_ii(inode)->ii_hinode + bindex;
16926+}
dece6358 16927+
4a4d8108 16928+/* ---------------------------------------------------------------------- */
1facf9fc 16929+
4a4d8108
AM
16930+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
16931+{
16932+ if (pin)
16933+ return pin->parent;
16934+ return NULL;
1facf9fc 16935+}
16936+
4a4d8108 16937+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 16938+{
4a4d8108
AM
16939+ if (pin && pin->hdir)
16940+ return pin->hdir->hi_inode;
16941+ return NULL;
1308ab2a 16942+}
1facf9fc 16943+
4a4d8108
AM
16944+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
16945+{
16946+ if (pin)
16947+ return pin->hdir;
16948+ return NULL;
16949+}
1facf9fc 16950+
4a4d8108 16951+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 16952+{
4a4d8108
AM
16953+ if (pin)
16954+ pin->dentry = dentry;
16955+}
1308ab2a 16956+
4a4d8108
AM
16957+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
16958+ unsigned char lflag)
16959+{
16960+ if (pin) {
7f207e10 16961+ if (lflag)
4a4d8108 16962+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 16963+ else
4a4d8108 16964+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 16965+ }
4a4d8108
AM
16966+}
16967+
7e9cd9fe 16968+#if 0 /* reserved */
4a4d8108
AM
16969+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
16970+{
16971+ if (pin) {
16972+ dput(pin->parent);
16973+ pin->parent = dget(parent);
1facf9fc 16974+ }
4a4d8108 16975+}
7e9cd9fe 16976+#endif
1facf9fc 16977+
4a4d8108
AM
16978+/* ---------------------------------------------------------------------- */
16979+
027c5e7a 16980+struct au_branch;
4a4d8108
AM
16981+#ifdef CONFIG_AUFS_HNOTIFY
16982+struct au_hnotify_op {
16983+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a 16984+ int (*alloc)(struct au_hinode *hinode);
7eafdf33
AM
16985+
16986+ /*
16987+ * if it returns true, the the caller should free hinode->hi_notify,
16988+ * otherwise ->free() frees it.
16989+ */
16990+ int (*free)(struct au_hinode *hinode,
16991+ struct au_hnotify *hn) __must_check;
4a4d8108
AM
16992+
16993+ void (*fin)(void);
16994+ int (*init)(void);
027c5e7a
AM
16995+
16996+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
16997+ void (*fin_br)(struct au_branch *br);
16998+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
16999+};
17000+
17001+/* hnotify.c */
027c5e7a 17002+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
17003+void au_hn_free(struct au_hinode *hinode);
17004+void au_hn_ctl(struct au_hinode *hinode, int do_set);
17005+void au_hn_reset(struct inode *inode, unsigned int flags);
17006+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
17007+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
17008+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
17009+int au_hnotify_init_br(struct au_branch *br, int perm);
17010+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
17011+int __init au_hnotify_init(void);
17012+void au_hnotify_fin(void);
17013+
7f207e10 17014+/* hfsnotify.c */
4a4d8108
AM
17015+extern const struct au_hnotify_op au_hnotify_op;
17016+
17017+static inline
17018+void au_hn_init(struct au_hinode *hinode)
17019+{
17020+ hinode->hi_notify = NULL;
1308ab2a 17021+}
17022+
53392da6
AM
17023+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
17024+{
17025+ return hinode->hi_notify;
17026+}
17027+
4a4d8108 17028+#else
c1595e42
JR
17029+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
17030+ struct au_hinode *hinode __maybe_unused,
17031+ struct inode *inode __maybe_unused)
17032+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
4a4d8108
AM
17033+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
17034+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
17035+ int do_set __maybe_unused)
17036+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
17037+ unsigned int flags __maybe_unused)
027c5e7a
AM
17038+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
17039+ struct au_branch *br __maybe_unused,
17040+ int perm __maybe_unused)
17041+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
17042+ int perm __maybe_unused)
17043+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
17044+AuStubInt0(__init au_hnotify_init, void)
17045+AuStubVoid(au_hnotify_fin, void)
17046+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
17047+#endif /* CONFIG_AUFS_HNOTIFY */
17048+
17049+static inline void au_hn_suspend(struct au_hinode *hdir)
17050+{
17051+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 17052+}
17053+
4a4d8108 17054+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 17055+{
4a4d8108
AM
17056+ au_hn_ctl(hdir, /*do_set*/1);
17057+}
1308ab2a 17058+
4a4d8108
AM
17059+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
17060+{
17061+ mutex_lock(&hdir->hi_inode->i_mutex);
17062+ au_hn_suspend(hdir);
17063+}
dece6358 17064+
4a4d8108
AM
17065+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
17066+ unsigned int sc __maybe_unused)
17067+{
17068+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
17069+ au_hn_suspend(hdir);
1facf9fc 17070+}
1facf9fc 17071+
4a4d8108
AM
17072+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
17073+{
17074+ au_hn_resume(hdir);
17075+ mutex_unlock(&hdir->hi_inode->i_mutex);
17076+}
17077+
17078+#endif /* __KERNEL__ */
17079+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
17080diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
17081--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 17082+++ linux/fs/aufs/ioctl.c 2016-02-28 11:26:32.573304539 +0100
c1595e42 17083@@ -0,0 +1,219 @@
4a4d8108 17084+/*
8cdd5066 17085+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17086+ *
17087+ * This program, aufs is free software; you can redistribute it and/or modify
17088+ * it under the terms of the GNU General Public License as published by
17089+ * the Free Software Foundation; either version 2 of the License, or
17090+ * (at your option) any later version.
17091+ *
17092+ * This program is distributed in the hope that it will be useful,
17093+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17094+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17095+ * GNU General Public License for more details.
17096+ *
17097+ * You should have received a copy of the GNU General Public License
523b37e3 17098+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17099+ */
17100+
17101+/*
17102+ * ioctl
17103+ * plink-management and readdir in userspace.
17104+ * assist the pathconf(3) wrapper library.
c2b27bf2 17105+ * move-down
076b876e 17106+ * File-based Hierarchical Storage Management.
4a4d8108
AM
17107+ */
17108+
c2b27bf2
AM
17109+#include <linux/compat.h>
17110+#include <linux/file.h>
4a4d8108
AM
17111+#include "aufs.h"
17112+
1e00d052 17113+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
17114+{
17115+ int err, fd;
17116+ aufs_bindex_t wbi, bindex, bend;
17117+ struct file *h_file;
17118+ struct super_block *sb;
17119+ struct dentry *root;
1e00d052
AM
17120+ struct au_branch *br;
17121+ struct aufs_wbr_fd wbrfd = {
17122+ .oflags = au_dir_roflags,
17123+ .brid = -1
17124+ };
17125+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17126+ | O_NOATIME | O_CLOEXEC;
4a4d8108 17127+
1e00d052
AM
17128+ AuDebugOn(wbrfd.oflags & ~valid);
17129+
17130+ if (arg) {
17131+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17132+ if (unlikely(err)) {
17133+ err = -EFAULT;
17134+ goto out;
17135+ }
17136+
17137+ err = -EINVAL;
17138+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17139+ wbrfd.oflags |= au_dir_roflags;
17140+ AuDbg("0%o\n", wbrfd.oflags);
17141+ if (unlikely(wbrfd.oflags & ~valid))
17142+ goto out;
17143+ }
17144+
2000de60 17145+ fd = get_unused_fd_flags(0);
1e00d052
AM
17146+ err = fd;
17147+ if (unlikely(fd < 0))
4a4d8108 17148+ goto out;
4a4d8108 17149+
1e00d052 17150+ h_file = ERR_PTR(-EINVAL);
4a4d8108 17151+ wbi = 0;
1e00d052 17152+ br = NULL;
4a4d8108
AM
17153+ sb = path->dentry->d_sb;
17154+ root = sb->s_root;
17155+ aufs_read_lock(root, AuLock_IR);
1e00d052
AM
17156+ bend = au_sbend(sb);
17157+ if (wbrfd.brid >= 0) {
17158+ wbi = au_br_index(sb, wbrfd.brid);
17159+ if (unlikely(wbi < 0 || wbi > bend))
17160+ goto out_unlock;
17161+ }
17162+
17163+ h_file = ERR_PTR(-ENOENT);
17164+ br = au_sbr(sb, wbi);
17165+ if (!au_br_writable(br->br_perm)) {
17166+ if (arg)
17167+ goto out_unlock;
17168+
17169+ bindex = wbi + 1;
17170+ wbi = -1;
17171+ for (; bindex <= bend; bindex++) {
17172+ br = au_sbr(sb, bindex);
17173+ if (au_br_writable(br->br_perm)) {
4a4d8108 17174+ wbi = bindex;
1e00d052 17175+ br = au_sbr(sb, wbi);
4a4d8108
AM
17176+ break;
17177+ }
17178+ }
4a4d8108
AM
17179+ }
17180+ AuDbg("wbi %d\n", wbi);
1e00d052 17181+ if (wbi >= 0)
392086de
AM
17182+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17183+ /*force_wr*/0);
1e00d052
AM
17184+
17185+out_unlock:
4a4d8108
AM
17186+ aufs_read_unlock(root, AuLock_IR);
17187+ err = PTR_ERR(h_file);
17188+ if (IS_ERR(h_file))
17189+ goto out_fd;
17190+
1e00d052 17191+ atomic_dec(&br->br_count); /* cf. au_h_open() */
4a4d8108
AM
17192+ fd_install(fd, h_file);
17193+ err = fd;
17194+ goto out; /* success */
17195+
4f0767ce 17196+out_fd:
4a4d8108 17197+ put_unused_fd(fd);
4f0767ce 17198+out:
1e00d052 17199+ AuTraceErr(err);
4a4d8108
AM
17200+ return err;
17201+}
17202+
17203+/* ---------------------------------------------------------------------- */
17204+
17205+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17206+{
17207+ long err;
c1595e42 17208+ struct dentry *dentry;
4a4d8108
AM
17209+
17210+ switch (cmd) {
4a4d8108
AM
17211+ case AUFS_CTL_RDU:
17212+ case AUFS_CTL_RDU_INO:
17213+ err = au_rdu_ioctl(file, cmd, arg);
17214+ break;
17215+
17216+ case AUFS_CTL_WBR_FD:
1e00d052 17217+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17218+ break;
17219+
027c5e7a
AM
17220+ case AUFS_CTL_IBUSY:
17221+ err = au_ibusy_ioctl(file, arg);
17222+ break;
17223+
076b876e
AM
17224+ case AUFS_CTL_BRINFO:
17225+ err = au_brinfo_ioctl(file, arg);
17226+ break;
17227+
17228+ case AUFS_CTL_FHSM_FD:
2000de60 17229+ dentry = file->f_path.dentry;
c1595e42
JR
17230+ if (IS_ROOT(dentry))
17231+ err = au_fhsm_fd(dentry->d_sb, arg);
17232+ else
17233+ err = -ENOTTY;
076b876e
AM
17234+ break;
17235+
4a4d8108
AM
17236+ default:
17237+ /* do not call the lower */
17238+ AuDbg("0x%x\n", cmd);
17239+ err = -ENOTTY;
17240+ }
17241+
17242+ AuTraceErr(err);
17243+ return err;
17244+}
17245+
17246+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17247+{
17248+ long err;
17249+
17250+ switch (cmd) {
c2b27bf2 17251+ case AUFS_CTL_MVDOWN:
2000de60 17252+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
c2b27bf2
AM
17253+ break;
17254+
4a4d8108 17255+ case AUFS_CTL_WBR_FD:
1e00d052 17256+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17257+ break;
17258+
17259+ default:
17260+ /* do not call the lower */
17261+ AuDbg("0x%x\n", cmd);
17262+ err = -ENOTTY;
17263+ }
17264+
17265+ AuTraceErr(err);
17266+ return err;
17267+}
b752ccd1
AM
17268+
17269+#ifdef CONFIG_COMPAT
17270+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17271+ unsigned long arg)
17272+{
17273+ long err;
17274+
17275+ switch (cmd) {
17276+ case AUFS_CTL_RDU:
17277+ case AUFS_CTL_RDU_INO:
17278+ err = au_rdu_compat_ioctl(file, cmd, arg);
17279+ break;
17280+
027c5e7a
AM
17281+ case AUFS_CTL_IBUSY:
17282+ err = au_ibusy_compat_ioctl(file, arg);
17283+ break;
17284+
076b876e
AM
17285+ case AUFS_CTL_BRINFO:
17286+ err = au_brinfo_compat_ioctl(file, arg);
17287+ break;
17288+
b752ccd1
AM
17289+ default:
17290+ err = aufs_ioctl_dir(file, cmd, arg);
17291+ }
17292+
17293+ AuTraceErr(err);
17294+ return err;
17295+}
17296+
b752ccd1
AM
17297+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17298+ unsigned long arg)
17299+{
17300+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17301+}
17302+#endif
7f207e10
AM
17303diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17304--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 17305+++ linux/fs/aufs/i_op_add.c 2016-02-28 11:26:32.573304539 +0100
5527c038 17306@@ -0,0 +1,932 @@
4a4d8108 17307+/*
8cdd5066 17308+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17309+ *
17310+ * This program, aufs is free software; you can redistribute it and/or modify
17311+ * it under the terms of the GNU General Public License as published by
17312+ * the Free Software Foundation; either version 2 of the License, or
17313+ * (at your option) any later version.
17314+ *
17315+ * This program is distributed in the hope that it will be useful,
17316+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17317+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17318+ * GNU General Public License for more details.
17319+ *
17320+ * You should have received a copy of the GNU General Public License
523b37e3 17321+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17322+ */
17323+
17324+/*
17325+ * inode operations (add entry)
17326+ */
17327+
17328+#include "aufs.h"
17329+
17330+/*
17331+ * final procedure of adding a new entry, except link(2).
17332+ * remove whiteout, instantiate, copyup the parent dir's times and size
17333+ * and update version.
17334+ * if it failed, re-create the removed whiteout.
17335+ */
17336+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17337+ struct dentry *wh_dentry, struct dentry *dentry)
17338+{
17339+ int err, rerr;
17340+ aufs_bindex_t bwh;
17341+ struct path h_path;
076b876e 17342+ struct super_block *sb;
4a4d8108
AM
17343+ struct inode *inode, *h_dir;
17344+ struct dentry *wh;
17345+
17346+ bwh = -1;
076b876e 17347+ sb = dir->i_sb;
4a4d8108 17348+ if (wh_dentry) {
5527c038 17349+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
4a4d8108
AM
17350+ IMustLock(h_dir);
17351+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17352+ bwh = au_dbwh(dentry);
17353+ h_path.dentry = wh_dentry;
076b876e 17354+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
17355+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17356+ dentry);
17357+ if (unlikely(err))
17358+ goto out;
17359+ }
17360+
17361+ inode = au_new_inode(dentry, /*must_new*/1);
17362+ if (!IS_ERR(inode)) {
17363+ d_instantiate(dentry, inode);
5527c038 17364+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
4a4d8108 17365+ IMustLock(dir);
b912730e 17366+ au_dir_ts(dir, bindex);
4a4d8108 17367+ dir->i_version++;
076b876e 17368+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
17369+ return 0; /* success */
17370+ }
17371+
17372+ err = PTR_ERR(inode);
17373+ if (!wh_dentry)
17374+ goto out;
17375+
17376+ /* revert */
17377+ /* dir inode is locked */
17378+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17379+ rerr = PTR_ERR(wh);
17380+ if (IS_ERR(wh)) {
523b37e3
AM
17381+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17382+ dentry, err, rerr);
4a4d8108
AM
17383+ err = -EIO;
17384+ } else
17385+ dput(wh);
17386+
4f0767ce 17387+out:
4a4d8108
AM
17388+ return err;
17389+}
17390+
027c5e7a
AM
17391+static int au_d_may_add(struct dentry *dentry)
17392+{
17393+ int err;
17394+
17395+ err = 0;
17396+ if (unlikely(d_unhashed(dentry)))
17397+ err = -ENOENT;
5527c038 17398+ if (unlikely(d_really_is_positive(dentry)))
027c5e7a
AM
17399+ err = -EEXIST;
17400+ return err;
17401+}
17402+
4a4d8108
AM
17403+/*
17404+ * simple tests for the adding inode operations.
17405+ * following the checks in vfs, plus the parent-child relationship.
17406+ */
17407+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17408+ struct dentry *h_parent, int isdir)
17409+{
17410+ int err;
17411+ umode_t h_mode;
17412+ struct dentry *h_dentry;
17413+ struct inode *h_inode;
17414+
17415+ err = -ENAMETOOLONG;
17416+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17417+ goto out;
17418+
17419+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 17420+ if (d_really_is_negative(dentry)) {
4a4d8108 17421+ err = -EEXIST;
5527c038 17422+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
17423+ goto out;
17424+ } else {
17425+ /* rename(2) case */
17426+ err = -EIO;
5527c038
JR
17427+ if (unlikely(d_is_negative(h_dentry)))
17428+ goto out;
17429+ h_inode = d_inode(h_dentry);
17430+ if (unlikely(!h_inode->i_nlink))
4a4d8108
AM
17431+ goto out;
17432+
17433+ h_mode = h_inode->i_mode;
17434+ if (!isdir) {
17435+ err = -EISDIR;
17436+ if (unlikely(S_ISDIR(h_mode)))
17437+ goto out;
17438+ } else if (unlikely(!S_ISDIR(h_mode))) {
17439+ err = -ENOTDIR;
17440+ goto out;
17441+ }
17442+ }
17443+
17444+ err = 0;
17445+ /* expected parent dir is locked */
17446+ if (unlikely(h_parent != h_dentry->d_parent))
17447+ err = -EIO;
17448+
4f0767ce 17449+out:
4a4d8108
AM
17450+ AuTraceErr(err);
17451+ return err;
17452+}
17453+
17454+/*
17455+ * initial procedure of adding a new entry.
17456+ * prepare writable branch and the parent dir, lock it,
17457+ * and lookup whiteout for the new entry.
17458+ */
17459+static struct dentry*
17460+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17461+ struct dentry *src_dentry, struct au_pin *pin,
17462+ struct au_wr_dir_args *wr_dir_args)
17463+{
17464+ struct dentry *wh_dentry, *h_parent;
17465+ struct super_block *sb;
17466+ struct au_branch *br;
17467+ int err;
17468+ unsigned int udba;
17469+ aufs_bindex_t bcpup;
17470+
523b37e3 17471+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17472+
17473+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17474+ bcpup = err;
17475+ wh_dentry = ERR_PTR(err);
17476+ if (unlikely(err < 0))
17477+ goto out;
17478+
17479+ sb = dentry->d_sb;
17480+ udba = au_opt_udba(sb);
17481+ err = au_pin(pin, dentry, bcpup, udba,
17482+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17483+ wh_dentry = ERR_PTR(err);
17484+ if (unlikely(err))
17485+ goto out;
17486+
17487+ h_parent = au_pinned_h_parent(pin);
17488+ if (udba != AuOpt_UDBA_NONE
17489+ && au_dbstart(dentry) == bcpup)
17490+ err = au_may_add(dentry, bcpup, h_parent,
17491+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17492+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17493+ err = -ENAMETOOLONG;
17494+ wh_dentry = ERR_PTR(err);
17495+ if (unlikely(err))
17496+ goto out_unpin;
17497+
17498+ br = au_sbr(sb, bcpup);
17499+ if (dt) {
17500+ struct path tmp = {
17501+ .dentry = h_parent,
86dc4139 17502+ .mnt = au_br_mnt(br)
4a4d8108
AM
17503+ };
17504+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17505+ }
17506+
17507+ wh_dentry = NULL;
17508+ if (bcpup != au_dbwh(dentry))
17509+ goto out; /* success */
17510+
2000de60
JR
17511+ /*
17512+ * ENAMETOOLONG here means that if we allowed create such name, then it
17513+ * would not be able to removed in the future. So we don't allow such
17514+ * name here and we don't handle ENAMETOOLONG differently here.
17515+ */
4a4d8108
AM
17516+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17517+
4f0767ce 17518+out_unpin:
4a4d8108
AM
17519+ if (IS_ERR(wh_dentry))
17520+ au_unpin(pin);
4f0767ce 17521+out:
4a4d8108
AM
17522+ return wh_dentry;
17523+}
17524+
17525+/* ---------------------------------------------------------------------- */
17526+
17527+enum { Mknod, Symlink, Creat };
17528+struct simple_arg {
17529+ int type;
17530+ union {
17531+ struct {
b912730e
AM
17532+ umode_t mode;
17533+ bool want_excl;
17534+ bool try_aopen;
17535+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
17536+ } c;
17537+ struct {
17538+ const char *symname;
17539+ } s;
17540+ struct {
7eafdf33 17541+ umode_t mode;
4a4d8108
AM
17542+ dev_t dev;
17543+ } m;
17544+ } u;
17545+};
17546+
17547+static int add_simple(struct inode *dir, struct dentry *dentry,
17548+ struct simple_arg *arg)
17549+{
076b876e 17550+ int err, rerr;
4a4d8108
AM
17551+ aufs_bindex_t bstart;
17552+ unsigned char created;
b912730e
AM
17553+ const unsigned char try_aopen
17554+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
17555+ struct dentry *wh_dentry, *parent;
17556+ struct inode *h_dir;
b912730e
AM
17557+ struct super_block *sb;
17558+ struct au_branch *br;
c2b27bf2
AM
17559+ /* to reuduce stack size */
17560+ struct {
17561+ struct au_dtime dt;
17562+ struct au_pin pin;
17563+ struct path h_path;
17564+ struct au_wr_dir_args wr_dir_args;
17565+ } *a;
4a4d8108 17566+
523b37e3 17567+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17568+ IMustLock(dir);
17569+
c2b27bf2
AM
17570+ err = -ENOMEM;
17571+ a = kmalloc(sizeof(*a), GFP_NOFS);
17572+ if (unlikely(!a))
17573+ goto out;
17574+ a->wr_dir_args.force_btgt = -1;
17575+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17576+
4a4d8108 17577+ parent = dentry->d_parent; /* dir inode is locked */
b912730e
AM
17578+ if (!try_aopen) {
17579+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17580+ if (unlikely(err))
17581+ goto out_free;
17582+ }
027c5e7a
AM
17583+ err = au_d_may_add(dentry);
17584+ if (unlikely(err))
17585+ goto out_unlock;
b912730e
AM
17586+ if (!try_aopen)
17587+ di_write_lock_parent(parent);
c2b27bf2
AM
17588+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17589+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
17590+ err = PTR_ERR(wh_dentry);
17591+ if (IS_ERR(wh_dentry))
027c5e7a 17592+ goto out_parent;
4a4d8108
AM
17593+
17594+ bstart = au_dbstart(dentry);
b912730e
AM
17595+ sb = dentry->d_sb;
17596+ br = au_sbr(sb, bstart);
c2b27bf2 17597+ a->h_path.dentry = au_h_dptr(dentry, bstart);
b912730e 17598+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 17599+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
17600+ switch (arg->type) {
17601+ case Creat:
b912730e
AM
17602+ err = 0;
17603+ if (!try_aopen || !h_dir->i_op->atomic_open)
17604+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17605+ arg->u.c.want_excl);
17606+ else
17607+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17608+ arg->u.c.aopen, br);
4a4d8108
AM
17609+ break;
17610+ case Symlink:
c2b27bf2 17611+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
17612+ break;
17613+ case Mknod:
c2b27bf2
AM
17614+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17615+ arg->u.m.dev);
4a4d8108
AM
17616+ break;
17617+ default:
17618+ BUG();
17619+ }
17620+ created = !err;
17621+ if (!err)
17622+ err = epilog(dir, bstart, wh_dentry, dentry);
17623+
17624+ /* revert */
5527c038 17625+ if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
523b37e3
AM
17626+ /* no delegation since it is just created */
17627+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17628+ /*force*/0);
4a4d8108 17629+ if (rerr) {
523b37e3
AM
17630+ AuIOErr("%pd revert failure(%d, %d)\n",
17631+ dentry, err, rerr);
4a4d8108
AM
17632+ err = -EIO;
17633+ }
c2b27bf2 17634+ au_dtime_revert(&a->dt);
4a4d8108
AM
17635+ }
17636+
b912730e
AM
17637+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17638+ *arg->u.c.aopen->opened |= FILE_CREATED;
17639+
c2b27bf2 17640+ au_unpin(&a->pin);
4a4d8108
AM
17641+ dput(wh_dentry);
17642+
027c5e7a 17643+out_parent:
b912730e
AM
17644+ if (!try_aopen)
17645+ di_write_unlock(parent);
027c5e7a 17646+out_unlock:
4a4d8108
AM
17647+ if (unlikely(err)) {
17648+ au_update_dbstart(dentry);
17649+ d_drop(dentry);
17650+ }
b912730e
AM
17651+ if (!try_aopen)
17652+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
17653+out_free:
17654+ kfree(a);
027c5e7a 17655+out:
4a4d8108
AM
17656+ return err;
17657+}
17658+
7eafdf33
AM
17659+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17660+ dev_t dev)
4a4d8108
AM
17661+{
17662+ struct simple_arg arg = {
17663+ .type = Mknod,
17664+ .u.m = {
17665+ .mode = mode,
17666+ .dev = dev
17667+ }
17668+ };
17669+ return add_simple(dir, dentry, &arg);
17670+}
17671+
17672+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17673+{
17674+ struct simple_arg arg = {
17675+ .type = Symlink,
17676+ .u.s.symname = symname
17677+ };
17678+ return add_simple(dir, dentry, &arg);
17679+}
17680+
7eafdf33 17681+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 17682+ bool want_excl)
4a4d8108
AM
17683+{
17684+ struct simple_arg arg = {
17685+ .type = Creat,
17686+ .u.c = {
b4510431
AM
17687+ .mode = mode,
17688+ .want_excl = want_excl
4a4d8108
AM
17689+ }
17690+ };
17691+ return add_simple(dir, dentry, &arg);
17692+}
17693+
b912730e
AM
17694+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17695+ struct vfsub_aopen_args *aopen_args)
17696+{
17697+ struct simple_arg arg = {
17698+ .type = Creat,
17699+ .u.c = {
17700+ .mode = aopen_args->create_mode,
17701+ .want_excl = aopen_args->open_flag & O_EXCL,
17702+ .try_aopen = true,
17703+ .aopen = aopen_args
17704+ }
17705+ };
17706+ return add_simple(dir, dentry, &arg);
17707+}
17708+
38d290e6
JR
17709+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17710+{
17711+ int err;
17712+ aufs_bindex_t bindex;
17713+ struct super_block *sb;
17714+ struct dentry *parent, *h_parent, *h_dentry;
17715+ struct inode *h_dir, *inode;
17716+ struct vfsmount *h_mnt;
17717+ struct au_wr_dir_args wr_dir_args = {
17718+ .force_btgt = -1,
17719+ .flags = AuWrDir_TMPFILE
17720+ };
17721+
17722+ /* copy-up may happen */
17723+ mutex_lock(&dir->i_mutex);
17724+
17725+ sb = dir->i_sb;
17726+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17727+ if (unlikely(err))
17728+ goto out;
17729+
17730+ err = au_di_init(dentry);
17731+ if (unlikely(err))
17732+ goto out_si;
17733+
17734+ err = -EBUSY;
17735+ parent = d_find_any_alias(dir);
17736+ AuDebugOn(!parent);
17737+ di_write_lock_parent(parent);
5527c038 17738+ if (unlikely(d_inode(parent) != dir))
38d290e6
JR
17739+ goto out_parent;
17740+
17741+ err = au_digen_test(parent, au_sigen(sb));
17742+ if (unlikely(err))
17743+ goto out_parent;
17744+
17745+ bindex = au_dbstart(parent);
17746+ au_set_dbstart(dentry, bindex);
17747+ au_set_dbend(dentry, bindex);
17748+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17749+ bindex = err;
17750+ if (unlikely(err < 0))
17751+ goto out_parent;
17752+
17753+ err = -EOPNOTSUPP;
17754+ h_dir = au_h_iptr(dir, bindex);
17755+ if (unlikely(!h_dir->i_op->tmpfile))
17756+ goto out_parent;
17757+
17758+ h_mnt = au_sbr_mnt(sb, bindex);
17759+ err = vfsub_mnt_want_write(h_mnt);
17760+ if (unlikely(err))
17761+ goto out_parent;
17762+
17763+ h_parent = au_h_dptr(parent, bindex);
5527c038 17764+ err = inode_permission(d_inode(h_parent), MAY_WRITE | MAY_EXEC);
38d290e6
JR
17765+ if (unlikely(err))
17766+ goto out_mnt;
17767+
17768+ err = -ENOMEM;
17769+ h_dentry = d_alloc(h_parent, &dentry->d_name);
17770+ if (unlikely(!h_dentry))
17771+ goto out_mnt;
17772+
17773+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
17774+ if (unlikely(err))
17775+ goto out_dentry;
17776+
17777+ au_set_dbstart(dentry, bindex);
17778+ au_set_dbend(dentry, bindex);
17779+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
17780+ inode = au_new_inode(dentry, /*must_new*/1);
17781+ if (IS_ERR(inode)) {
17782+ err = PTR_ERR(inode);
17783+ au_set_h_dptr(dentry, bindex, NULL);
17784+ au_set_dbstart(dentry, -1);
17785+ au_set_dbend(dentry, -1);
17786+ } else {
17787+ if (!inode->i_nlink)
17788+ set_nlink(inode, 1);
17789+ d_tmpfile(dentry, inode);
17790+ au_di(dentry)->di_tmpfile = 1;
17791+
17792+ /* update without i_mutex */
17793+ if (au_ibstart(dir) == au_dbstart(dentry))
17794+ au_cpup_attr_timesizes(dir);
17795+ }
17796+
17797+out_dentry:
17798+ dput(h_dentry);
17799+out_mnt:
17800+ vfsub_mnt_drop_write(h_mnt);
17801+out_parent:
17802+ di_write_unlock(parent);
17803+ dput(parent);
17804+ di_write_unlock(dentry);
17805+ if (!err)
17806+#if 0
17807+ /* verbose coding for lock class name */
17808+ au_rw_class(&au_di(dentry)->di_rwsem,
17809+ au_lc_key + AuLcNonDir_DIINFO);
17810+#else
17811+ ;
17812+#endif
17813+ else {
17814+ au_di_fin(dentry);
17815+ dentry->d_fsdata = NULL;
17816+ }
17817+out_si:
17818+ si_read_unlock(sb);
17819+out:
17820+ mutex_unlock(&dir->i_mutex);
17821+ return err;
17822+}
17823+
4a4d8108
AM
17824+/* ---------------------------------------------------------------------- */
17825+
17826+struct au_link_args {
17827+ aufs_bindex_t bdst, bsrc;
17828+ struct au_pin pin;
17829+ struct path h_path;
17830+ struct dentry *src_parent, *parent;
17831+};
17832+
17833+static int au_cpup_before_link(struct dentry *src_dentry,
17834+ struct au_link_args *a)
17835+{
17836+ int err;
17837+ struct dentry *h_src_dentry;
c2b27bf2
AM
17838+ struct au_cp_generic cpg = {
17839+ .dentry = src_dentry,
17840+ .bdst = a->bdst,
17841+ .bsrc = a->bsrc,
17842+ .len = -1,
17843+ .pin = &a->pin,
17844+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
17845+ };
4a4d8108
AM
17846+
17847+ di_read_lock_parent(a->src_parent, AuLock_IR);
17848+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
17849+ if (unlikely(err))
17850+ goto out;
17851+
17852+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
17853+ err = au_pin(&a->pin, src_dentry, a->bdst,
17854+ au_opt_udba(src_dentry->d_sb),
17855+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17856+ if (unlikely(err))
17857+ goto out;
367653fa 17858+
c2b27bf2 17859+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
17860+ au_unpin(&a->pin);
17861+
4f0767ce 17862+out:
4a4d8108
AM
17863+ di_read_unlock(a->src_parent, AuLock_IR);
17864+ return err;
17865+}
17866+
86dc4139
AM
17867+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
17868+ struct au_link_args *a)
4a4d8108
AM
17869+{
17870+ int err;
17871+ unsigned char plink;
86dc4139 17872+ aufs_bindex_t bend;
4a4d8108 17873+ struct dentry *h_src_dentry;
523b37e3 17874+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
17875+ struct super_block *sb;
17876+ struct file *h_file;
17877+
17878+ plink = 0;
17879+ h_inode = NULL;
17880+ sb = src_dentry->d_sb;
5527c038 17881+ inode = d_inode(src_dentry);
4a4d8108
AM
17882+ if (au_ibstart(inode) <= a->bdst)
17883+ h_inode = au_h_iptr(inode, a->bdst);
17884+ if (!h_inode || !h_inode->i_nlink) {
17885+ /* copyup src_dentry as the name of dentry. */
86dc4139
AM
17886+ bend = au_dbend(dentry);
17887+ if (bend < a->bsrc)
17888+ au_set_dbend(dentry, a->bsrc);
17889+ au_set_h_dptr(dentry, a->bsrc,
17890+ dget(au_h_dptr(src_dentry, a->bsrc)));
17891+ dget(a->h_path.dentry);
17892+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
17893+ AuDbg("temporary d_inode...\n");
17894+ spin_lock(&dentry->d_lock);
5527c038 17895+ dentry->d_inode = d_inode(src_dentry); /* tmp */
c1595e42 17896+ spin_unlock(&dentry->d_lock);
392086de 17897+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 17898+ if (IS_ERR(h_file))
4a4d8108 17899+ err = PTR_ERR(h_file);
86dc4139 17900+ else {
c2b27bf2
AM
17901+ struct au_cp_generic cpg = {
17902+ .dentry = dentry,
17903+ .bdst = a->bdst,
17904+ .bsrc = -1,
17905+ .len = -1,
17906+ .pin = &a->pin,
17907+ .flags = AuCpup_KEEPLINO
17908+ };
17909+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
17910+ au_h_open_post(dentry, a->bsrc, h_file);
17911+ if (!err) {
17912+ dput(a->h_path.dentry);
17913+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
17914+ } else
17915+ au_set_h_dptr(dentry, a->bdst,
17916+ a->h_path.dentry);
17917+ }
c1595e42 17918+ spin_lock(&dentry->d_lock);
86dc4139 17919+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
17920+ spin_unlock(&dentry->d_lock);
17921+ AuDbg("temporary d_inode...done\n");
86dc4139
AM
17922+ au_set_h_dptr(dentry, a->bsrc, NULL);
17923+ au_set_dbend(dentry, bend);
4a4d8108
AM
17924+ } else {
17925+ /* the inode of src_dentry already exists on a.bdst branch */
17926+ h_src_dentry = d_find_alias(h_inode);
17927+ if (!h_src_dentry && au_plink_test(inode)) {
17928+ plink = 1;
17929+ h_src_dentry = au_plink_lkup(inode, a->bdst);
17930+ err = PTR_ERR(h_src_dentry);
17931+ if (IS_ERR(h_src_dentry))
17932+ goto out;
17933+
5527c038 17934+ if (unlikely(d_is_negative(h_src_dentry))) {
4a4d8108
AM
17935+ dput(h_src_dentry);
17936+ h_src_dentry = NULL;
17937+ }
17938+
17939+ }
17940+ if (h_src_dentry) {
523b37e3 17941+ delegated = NULL;
4a4d8108 17942+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
17943+ &a->h_path, &delegated);
17944+ if (unlikely(err == -EWOULDBLOCK)) {
17945+ pr_warn("cannot retry for NFSv4 delegation"
17946+ " for an internal link\n");
17947+ iput(delegated);
17948+ }
4a4d8108
AM
17949+ dput(h_src_dentry);
17950+ } else {
17951+ AuIOErr("no dentry found for hi%lu on b%d\n",
17952+ h_inode->i_ino, a->bdst);
17953+ err = -EIO;
17954+ }
17955+ }
17956+
17957+ if (!err && !plink)
17958+ au_plink_append(inode, a->bdst, a->h_path.dentry);
17959+
17960+out:
2cbb1c4b 17961+ AuTraceErr(err);
4a4d8108
AM
17962+ return err;
17963+}
17964+
17965+int aufs_link(struct dentry *src_dentry, struct inode *dir,
17966+ struct dentry *dentry)
17967+{
17968+ int err, rerr;
17969+ struct au_dtime dt;
17970+ struct au_link_args *a;
17971+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 17972+ struct inode *inode, *delegated;
4a4d8108
AM
17973+ struct super_block *sb;
17974+ struct au_wr_dir_args wr_dir_args = {
17975+ /* .force_btgt = -1, */
17976+ .flags = AuWrDir_ADD_ENTRY
17977+ };
17978+
17979+ IMustLock(dir);
5527c038 17980+ inode = d_inode(src_dentry);
4a4d8108
AM
17981+ IMustLock(inode);
17982+
4a4d8108
AM
17983+ err = -ENOMEM;
17984+ a = kzalloc(sizeof(*a), GFP_NOFS);
17985+ if (unlikely(!a))
17986+ goto out;
17987+
17988+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
17989+ err = aufs_read_and_write_lock2(dentry, src_dentry,
17990+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
17991+ if (unlikely(err))
17992+ goto out_kfree;
38d290e6 17993+ err = au_d_linkable(src_dentry);
027c5e7a
AM
17994+ if (unlikely(err))
17995+ goto out_unlock;
17996+ err = au_d_may_add(dentry);
17997+ if (unlikely(err))
17998+ goto out_unlock;
e49829fe 17999+
4a4d8108 18000+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 18001+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
18002+
18003+ di_write_lock_parent(a->parent);
18004+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
18005+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
18006+ &wr_dir_args);
18007+ err = PTR_ERR(wh_dentry);
18008+ if (IS_ERR(wh_dentry))
027c5e7a 18009+ goto out_parent;
4a4d8108
AM
18010+
18011+ err = 0;
18012+ sb = dentry->d_sb;
18013+ a->bdst = au_dbstart(dentry);
18014+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18015+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
18016+ a->bsrc = au_ibstart(inode);
18017+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
18018+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
18019+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b
JR
18020+ if (!h_src_dentry) {
18021+ a->bsrc = au_dbstart(src_dentry);
18022+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
18023+ AuDebugOn(!h_src_dentry);
38d290e6
JR
18024+ } else if (IS_ERR(h_src_dentry)) {
18025+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 18026+ goto out_parent;
38d290e6 18027+ }
2cbb1c4b 18028+
4a4d8108
AM
18029+ if (au_opt_test(au_mntflags(sb), PLINK)) {
18030+ if (a->bdst < a->bsrc
18031+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 18032+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
18033+ else {
18034+ delegated = NULL;
4a4d8108 18035+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
18036+ &a->h_path, &delegated);
18037+ if (unlikely(err == -EWOULDBLOCK)) {
18038+ pr_warn("cannot retry for NFSv4 delegation"
18039+ " for an internal link\n");
18040+ iput(delegated);
18041+ }
18042+ }
2cbb1c4b 18043+ dput(h_src_dentry);
4a4d8108
AM
18044+ } else {
18045+ /*
18046+ * copyup src_dentry to the branch we process,
18047+ * and then link(2) to it.
18048+ */
2cbb1c4b 18049+ dput(h_src_dentry);
4a4d8108
AM
18050+ if (a->bdst < a->bsrc
18051+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
18052+ au_unpin(&a->pin);
18053+ di_write_unlock(a->parent);
18054+ err = au_cpup_before_link(src_dentry, a);
18055+ di_write_lock_parent(a->parent);
18056+ if (!err)
18057+ err = au_pin(&a->pin, dentry, a->bdst,
18058+ au_opt_udba(sb),
18059+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18060+ if (unlikely(err))
18061+ goto out_wh;
18062+ }
18063+ if (!err) {
18064+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
18065+ err = -ENOENT;
5527c038 18066+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
523b37e3 18067+ delegated = NULL;
4a4d8108
AM
18068+ err = vfsub_link(h_src_dentry,
18069+ au_pinned_h_dir(&a->pin),
523b37e3
AM
18070+ &a->h_path, &delegated);
18071+ if (unlikely(err == -EWOULDBLOCK)) {
18072+ pr_warn("cannot retry"
18073+ " for NFSv4 delegation"
18074+ " for an internal link\n");
18075+ iput(delegated);
18076+ }
18077+ }
4a4d8108
AM
18078+ }
18079+ }
18080+ if (unlikely(err))
18081+ goto out_unpin;
18082+
18083+ if (wh_dentry) {
18084+ a->h_path.dentry = wh_dentry;
18085+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
18086+ dentry);
18087+ if (unlikely(err))
18088+ goto out_revert;
18089+ }
18090+
b912730e 18091+ au_dir_ts(dir, a->bdst);
4a4d8108 18092+ dir->i_version++;
4a4d8108
AM
18093+ inc_nlink(inode);
18094+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
18095+ d_instantiate(dentry, au_igrab(inode));
18096+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
18097+ /* some filesystem calls d_drop() */
18098+ d_drop(dentry);
076b876e
AM
18099+ /* some filesystems consume an inode even hardlink */
18100+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
18101+ goto out_unpin; /* success */
18102+
4f0767ce 18103+out_revert:
523b37e3
AM
18104+ /* no delegation since it is just created */
18105+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18106+ /*delegated*/NULL, /*force*/0);
027c5e7a 18107+ if (unlikely(rerr)) {
523b37e3 18108+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
18109+ err = -EIO;
18110+ }
4a4d8108 18111+ au_dtime_revert(&dt);
4f0767ce 18112+out_unpin:
4a4d8108 18113+ au_unpin(&a->pin);
4f0767ce 18114+out_wh:
4a4d8108 18115+ dput(wh_dentry);
027c5e7a
AM
18116+out_parent:
18117+ di_write_unlock(a->parent);
18118+ dput(a->src_parent);
4f0767ce 18119+out_unlock:
4a4d8108
AM
18120+ if (unlikely(err)) {
18121+ au_update_dbstart(dentry);
18122+ d_drop(dentry);
18123+ }
4a4d8108 18124+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 18125+out_kfree:
4a4d8108 18126+ kfree(a);
4f0767ce 18127+out:
86dc4139 18128+ AuTraceErr(err);
4a4d8108
AM
18129+ return err;
18130+}
18131+
7eafdf33 18132+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
18133+{
18134+ int err, rerr;
18135+ aufs_bindex_t bindex;
18136+ unsigned char diropq;
18137+ struct path h_path;
18138+ struct dentry *wh_dentry, *parent, *opq_dentry;
18139+ struct mutex *h_mtx;
18140+ struct super_block *sb;
18141+ struct {
18142+ struct au_pin pin;
18143+ struct au_dtime dt;
18144+ } *a; /* reduce the stack usage */
18145+ struct au_wr_dir_args wr_dir_args = {
18146+ .force_btgt = -1,
18147+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18148+ };
18149+
18150+ IMustLock(dir);
18151+
18152+ err = -ENOMEM;
18153+ a = kmalloc(sizeof(*a), GFP_NOFS);
18154+ if (unlikely(!a))
18155+ goto out;
18156+
027c5e7a
AM
18157+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18158+ if (unlikely(err))
18159+ goto out_free;
18160+ err = au_d_may_add(dentry);
18161+ if (unlikely(err))
18162+ goto out_unlock;
18163+
4a4d8108
AM
18164+ parent = dentry->d_parent; /* dir inode is locked */
18165+ di_write_lock_parent(parent);
18166+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18167+ &a->pin, &wr_dir_args);
18168+ err = PTR_ERR(wh_dentry);
18169+ if (IS_ERR(wh_dentry))
027c5e7a 18170+ goto out_parent;
4a4d8108
AM
18171+
18172+ sb = dentry->d_sb;
18173+ bindex = au_dbstart(dentry);
18174+ h_path.dentry = au_h_dptr(dentry, bindex);
18175+ h_path.mnt = au_sbr_mnt(sb, bindex);
18176+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18177+ if (unlikely(err))
027c5e7a 18178+ goto out_unpin;
4a4d8108
AM
18179+
18180+ /* make the dir opaque */
18181+ diropq = 0;
5527c038 18182+ h_mtx = &d_inode(h_path.dentry)->i_mutex;
4a4d8108
AM
18183+ if (wh_dentry
18184+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
18185+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18186+ opq_dentry = au_diropq_create(dentry, bindex);
18187+ mutex_unlock(h_mtx);
18188+ err = PTR_ERR(opq_dentry);
18189+ if (IS_ERR(opq_dentry))
18190+ goto out_dir;
18191+ dput(opq_dentry);
18192+ diropq = 1;
18193+ }
18194+
18195+ err = epilog(dir, bindex, wh_dentry, dentry);
18196+ if (!err) {
18197+ inc_nlink(dir);
027c5e7a 18198+ goto out_unpin; /* success */
4a4d8108
AM
18199+ }
18200+
18201+ /* revert */
18202+ if (diropq) {
18203+ AuLabel(revert opq);
18204+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18205+ rerr = au_diropq_remove(dentry, bindex);
18206+ mutex_unlock(h_mtx);
18207+ if (rerr) {
523b37e3
AM
18208+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18209+ dentry, err, rerr);
4a4d8108
AM
18210+ err = -EIO;
18211+ }
18212+ }
18213+
4f0767ce 18214+out_dir:
4a4d8108
AM
18215+ AuLabel(revert dir);
18216+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18217+ if (rerr) {
523b37e3
AM
18218+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18219+ dentry, err, rerr);
4a4d8108
AM
18220+ err = -EIO;
18221+ }
4a4d8108 18222+ au_dtime_revert(&a->dt);
027c5e7a 18223+out_unpin:
4a4d8108
AM
18224+ au_unpin(&a->pin);
18225+ dput(wh_dentry);
027c5e7a
AM
18226+out_parent:
18227+ di_write_unlock(parent);
18228+out_unlock:
4a4d8108
AM
18229+ if (unlikely(err)) {
18230+ au_update_dbstart(dentry);
18231+ d_drop(dentry);
18232+ }
4a4d8108 18233+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 18234+out_free:
4a4d8108 18235+ kfree(a);
4f0767ce 18236+out:
4a4d8108
AM
18237+ return err;
18238+}
7f207e10
AM
18239diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18240--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
18241+++ linux/fs/aufs/i_op.c 2016-02-28 11:26:32.573304539 +0100
18242@@ -0,0 +1,1490 @@
4a4d8108 18243+/*
8cdd5066 18244+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
18245+ *
18246+ * This program, aufs is free software; you can redistribute it and/or modify
18247+ * it under the terms of the GNU General Public License as published by
18248+ * the Free Software Foundation; either version 2 of the License, or
18249+ * (at your option) any later version.
18250+ *
18251+ * This program is distributed in the hope that it will be useful,
18252+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18253+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18254+ * GNU General Public License for more details.
18255+ *
18256+ * You should have received a copy of the GNU General Public License
523b37e3 18257+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 18258+ */
1facf9fc 18259+
1308ab2a 18260+/*
4a4d8108 18261+ * inode operations (except add/del/rename)
1308ab2a 18262+ */
4a4d8108
AM
18263+
18264+#include <linux/device_cgroup.h>
18265+#include <linux/fs_stack.h>
4a4d8108
AM
18266+#include <linux/namei.h>
18267+#include <linux/security.h>
4a4d8108
AM
18268+#include "aufs.h"
18269+
1e00d052 18270+static int h_permission(struct inode *h_inode, int mask,
79b8bda9 18271+ struct path *h_path, int brperm)
1facf9fc 18272+{
1308ab2a 18273+ int err;
4a4d8108 18274+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 18275+
4a4d8108
AM
18276+ err = -EACCES;
18277+ if ((write_mask && IS_IMMUTABLE(h_inode))
18278+ || ((mask & MAY_EXEC)
18279+ && S_ISREG(h_inode->i_mode)
79b8bda9 18280+ && (path_noexec(h_path)
4a4d8108
AM
18281+ || !(h_inode->i_mode & S_IXUGO))))
18282+ goto out;
18283+
18284+ /*
18285+ * - skip the lower fs test in the case of write to ro branch.
18286+ * - nfs dir permission write check is optimized, but a policy for
18287+ * link/rename requires a real check.
b912730e
AM
18288+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18289+ * in this case, generic_permission() returns -EOPNOTSUPP.
4a4d8108
AM
18290+ */
18291+ if ((write_mask && !au_br_writable(brperm))
18292+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18293+ && write_mask && !(mask & MAY_READ))
18294+ || !h_inode->i_op->permission) {
18295+ /* AuLabel(generic_permission); */
b912730e 18296+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
1e00d052 18297+ err = generic_permission(h_inode, mask);
b912730e
AM
18298+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18299+ err = h_inode->i_op->permission(h_inode, mask);
18300+ AuTraceErr(err);
1308ab2a 18301+ } else {
4a4d8108 18302+ /* AuLabel(h_inode->permission); */
1e00d052 18303+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
18304+ AuTraceErr(err);
18305+ }
1facf9fc 18306+
4a4d8108
AM
18307+ if (!err)
18308+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 18309+ if (!err)
4a4d8108 18310+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
18311+
18312+#if 0
18313+ if (!err) {
18314+ /* todo: do we need to call ima_path_check()? */
18315+ struct path h_path = {
18316+ .dentry =
18317+ .mnt = h_mnt
18318+ };
18319+ err = ima_path_check(&h_path,
18320+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18321+ IMA_COUNT_LEAVE);
1308ab2a 18322+ }
4a4d8108 18323+#endif
dece6358 18324+
4f0767ce 18325+out:
1308ab2a 18326+ return err;
18327+}
dece6358 18328+
1e00d052 18329+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 18330+{
18331+ int err;
4a4d8108
AM
18332+ aufs_bindex_t bindex, bend;
18333+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18334+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18335+ struct inode *h_inode;
18336+ struct super_block *sb;
18337+ struct au_branch *br;
1facf9fc 18338+
027c5e7a 18339+ /* todo: support rcu-walk? */
1e00d052 18340+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
18341+ return -ECHILD;
18342+
4a4d8108
AM
18343+ sb = inode->i_sb;
18344+ si_read_lock(sb, AuLock_FLUSH);
18345+ ii_read_lock_child(inode);
027c5e7a
AM
18346+#if 0
18347+ err = au_iigen_test(inode, au_sigen(sb));
18348+ if (unlikely(err))
18349+ goto out;
18350+#endif
dece6358 18351+
076b876e
AM
18352+ if (!isdir
18353+ || write_mask
18354+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
4a4d8108
AM
18355+ err = au_busy_or_stale();
18356+ h_inode = au_h_iptr(inode, au_ibstart(inode));
18357+ if (unlikely(!h_inode
18358+ || (h_inode->i_mode & S_IFMT)
18359+ != (inode->i_mode & S_IFMT)))
18360+ goto out;
1facf9fc 18361+
4a4d8108
AM
18362+ err = 0;
18363+ bindex = au_ibstart(inode);
18364+ br = au_sbr(sb, bindex);
79b8bda9 18365+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
4a4d8108
AM
18366+ if (write_mask
18367+ && !err
18368+ && !special_file(h_inode->i_mode)) {
18369+ /* test whether the upper writable branch exists */
18370+ err = -EROFS;
18371+ for (; bindex >= 0; bindex--)
18372+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18373+ err = 0;
18374+ break;
18375+ }
18376+ }
18377+ goto out;
18378+ }
dece6358 18379+
4a4d8108 18380+ /* non-write to dir */
1308ab2a 18381+ err = 0;
4a4d8108
AM
18382+ bend = au_ibend(inode);
18383+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
18384+ h_inode = au_h_iptr(inode, bindex);
18385+ if (h_inode) {
18386+ err = au_busy_or_stale();
18387+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18388+ break;
18389+
18390+ br = au_sbr(sb, bindex);
79b8bda9 18391+ err = h_permission(h_inode, mask, &br->br_path,
4a4d8108
AM
18392+ br->br_perm);
18393+ }
18394+ }
1308ab2a 18395+
4f0767ce 18396+out:
4a4d8108
AM
18397+ ii_read_unlock(inode);
18398+ si_read_unlock(sb);
1308ab2a 18399+ return err;
18400+}
18401+
4a4d8108 18402+/* ---------------------------------------------------------------------- */
1facf9fc 18403+
4a4d8108 18404+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
b4510431 18405+ unsigned int flags)
4a4d8108
AM
18406+{
18407+ struct dentry *ret, *parent;
b752ccd1 18408+ struct inode *inode;
4a4d8108 18409+ struct super_block *sb;
1716fcea 18410+ int err, npositive;
dece6358 18411+
4a4d8108 18412+ IMustLock(dir);
1308ab2a 18413+
537831f9
AM
18414+ /* todo: support rcu-walk? */
18415+ ret = ERR_PTR(-ECHILD);
18416+ if (flags & LOOKUP_RCU)
18417+ goto out;
18418+
18419+ ret = ERR_PTR(-ENAMETOOLONG);
18420+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18421+ goto out;
18422+
4a4d8108 18423+ sb = dir->i_sb;
7f207e10
AM
18424+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18425+ ret = ERR_PTR(err);
18426+ if (unlikely(err))
18427+ goto out;
18428+
4a4d8108
AM
18429+ err = au_di_init(dentry);
18430+ ret = ERR_PTR(err);
18431+ if (unlikely(err))
7f207e10 18432+ goto out_si;
1308ab2a 18433+
9dbd164d 18434+ inode = NULL;
027c5e7a 18435+ npositive = 0; /* suppress a warning */
4a4d8108
AM
18436+ parent = dentry->d_parent; /* dir inode is locked */
18437+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
18438+ err = au_alive_dir(parent);
18439+ if (!err)
18440+ err = au_digen_test(parent, au_sigen(sb));
18441+ if (!err) {
18442+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
537831f9 18443+ /*type*/0);
027c5e7a
AM
18444+ err = npositive;
18445+ }
4a4d8108 18446+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
18447+ ret = ERR_PTR(err);
18448+ if (unlikely(err < 0))
18449+ goto out_unlock;
1308ab2a 18450+
4a4d8108 18451+ if (npositive) {
b752ccd1 18452+ inode = au_new_inode(dentry, /*must_new*/0);
c1595e42
JR
18453+ if (IS_ERR(inode)) {
18454+ ret = (void *)inode;
18455+ inode = NULL;
18456+ goto out_unlock;
18457+ }
9dbd164d 18458+ }
4a4d8108 18459+
c1595e42
JR
18460+ if (inode)
18461+ atomic_inc(&inode->i_count);
4a4d8108 18462+ ret = d_splice_alias(inode, dentry);
537831f9
AM
18463+#if 0
18464+ if (unlikely(d_need_lookup(dentry))) {
18465+ spin_lock(&dentry->d_lock);
18466+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18467+ spin_unlock(&dentry->d_lock);
18468+ } else
18469+#endif
c1595e42 18470+ if (inode) {
2000de60 18471+ if (!IS_ERR(ret)) {
c1595e42 18472+ iput(inode);
2000de60
JR
18473+ if (ret && ret != dentry)
18474+ ii_write_unlock(inode);
18475+ } else {
c1595e42
JR
18476+ ii_write_unlock(inode);
18477+ iput(inode);
18478+ inode = NULL;
18479+ }
7f207e10 18480+ }
1facf9fc 18481+
4f0767ce 18482+out_unlock:
4a4d8108 18483+ di_write_unlock(dentry);
2dfbb274 18484+ if (inode) {
1716fcea
AM
18485+ /* verbose coding for lock class name */
18486+ if (unlikely(S_ISLNK(inode->i_mode)))
18487+ au_rw_class(&au_di(dentry)->di_rwsem,
18488+ au_lc_key + AuLcSymlink_DIINFO);
18489+ else if (unlikely(S_ISDIR(inode->i_mode)))
18490+ au_rw_class(&au_di(dentry)->di_rwsem,
18491+ au_lc_key + AuLcDir_DIINFO);
18492+ else /* likely */
18493+ au_rw_class(&au_di(dentry)->di_rwsem,
18494+ au_lc_key + AuLcNonDir_DIINFO);
9dbd164d 18495+ }
7f207e10 18496+out_si:
4a4d8108 18497+ si_read_unlock(sb);
7f207e10 18498+out:
4a4d8108
AM
18499+ return ret;
18500+}
1facf9fc 18501+
4a4d8108 18502+/* ---------------------------------------------------------------------- */
1facf9fc 18503+
b912730e
AM
18504+struct aopen_node {
18505+ struct hlist_node hlist;
18506+ struct file *file, *h_file;
18507+};
18508+
18509+static int au_do_aopen(struct inode *inode, struct file *file)
18510+{
18511+ struct au_sphlhead *aopen;
18512+ struct aopen_node *node;
18513+ struct au_do_open_args args = {
18514+ .no_lock = 1,
18515+ .open = au_do_open_nondir
18516+ };
18517+
18518+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18519+ spin_lock(&aopen->spin);
18520+ hlist_for_each_entry(node, &aopen->head, hlist)
18521+ if (node->file == file) {
18522+ args.h_file = node->h_file;
18523+ break;
18524+ }
18525+ spin_unlock(&aopen->spin);
18526+ /* AuDebugOn(!args.h_file); */
18527+
18528+ return au_do_open(file, &args);
18529+}
18530+
18531+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18532+ struct file *file, unsigned int open_flag,
18533+ umode_t create_mode, int *opened)
18534+{
18535+ int err, h_opened = *opened;
18536+ struct dentry *parent;
18537+ struct dentry *d;
18538+ struct au_sphlhead *aopen;
18539+ struct vfsub_aopen_args args = {
18540+ .open_flag = open_flag,
18541+ .create_mode = create_mode,
18542+ .opened = &h_opened
18543+ };
18544+ struct aopen_node aopen_node = {
18545+ .file = file
18546+ };
18547+
18548+ IMustLock(dir);
18549+ AuDbg("open_flag 0x%x\n", open_flag);
18550+ AuDbgDentry(dentry);
18551+
18552+ err = 0;
18553+ if (!au_di(dentry)) {
18554+ d = aufs_lookup(dir, dentry, /*flags*/0);
18555+ if (IS_ERR(d)) {
18556+ err = PTR_ERR(d);
18557+ goto out;
18558+ } else if (d) {
18559+ /*
18560+ * obsoleted dentry found.
18561+ * another error will be returned later.
18562+ */
18563+ d_drop(d);
18564+ dput(d);
18565+ AuDbgDentry(d);
18566+ }
18567+ AuDbgDentry(dentry);
18568+ }
18569+
18570+ if (d_is_positive(dentry)
18571+ || d_unhashed(dentry)
18572+ || d_unlinked(dentry)
18573+ || !(open_flag & O_CREAT))
18574+ goto out_no_open;
18575+
18576+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18577+ if (unlikely(err))
18578+ goto out;
18579+
18580+ parent = dentry->d_parent; /* dir is locked */
18581+ di_write_lock_parent(parent);
18582+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
18583+ if (unlikely(err))
18584+ goto out_unlock;
18585+
18586+ AuDbgDentry(dentry);
18587+ if (d_is_positive(dentry))
18588+ goto out_unlock;
18589+
18590+ args.file = get_empty_filp();
18591+ err = PTR_ERR(args.file);
18592+ if (IS_ERR(args.file))
18593+ goto out_unlock;
18594+
18595+ args.file->f_flags = file->f_flags;
18596+ err = au_aopen_or_create(dir, dentry, &args);
18597+ AuTraceErr(err);
18598+ AuDbgFile(args.file);
18599+ if (unlikely(err < 0)) {
18600+ if (h_opened & FILE_OPENED)
18601+ fput(args.file);
18602+ else
18603+ put_filp(args.file);
18604+ goto out_unlock;
18605+ }
18606+
18607+ /* some filesystems don't set FILE_CREATED while succeeded? */
18608+ *opened |= FILE_CREATED;
18609+ if (h_opened & FILE_OPENED)
18610+ aopen_node.h_file = args.file;
18611+ else {
18612+ put_filp(args.file);
18613+ args.file = NULL;
18614+ }
18615+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18616+ au_sphl_add(&aopen_node.hlist, aopen);
18617+ err = finish_open(file, dentry, au_do_aopen, opened);
18618+ au_sphl_del(&aopen_node.hlist, aopen);
18619+ AuTraceErr(err);
18620+ AuDbgFile(file);
18621+ if (aopen_node.h_file)
18622+ fput(aopen_node.h_file);
18623+
18624+out_unlock:
18625+ di_write_unlock(parent);
18626+ aufs_read_unlock(dentry, AuLock_DW);
18627+ AuDbgDentry(dentry);
18628+ if (unlikely(err))
18629+ goto out;
18630+out_no_open:
18631+ if (!err && !(*opened & FILE_CREATED)) {
18632+ AuLabel(out_no_open);
18633+ dget(dentry);
18634+ err = finish_no_open(file, dentry);
18635+ }
18636+out:
18637+ AuDbg("%pd%s%s\n", dentry,
18638+ (*opened & FILE_CREATED) ? " created" : "",
18639+ (*opened & FILE_OPENED) ? " opened" : "");
18640+ AuTraceErr(err);
18641+ return err;
18642+}
18643+
18644+
18645+/* ---------------------------------------------------------------------- */
18646+
4a4d8108
AM
18647+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18648+ const unsigned char add_entry, aufs_bindex_t bcpup,
18649+ aufs_bindex_t bstart)
18650+{
18651+ int err;
18652+ struct dentry *h_parent;
18653+ struct inode *h_dir;
1facf9fc 18654+
027c5e7a 18655+ if (add_entry)
5527c038 18656+ IMustLock(d_inode(parent));
027c5e7a 18657+ else
4a4d8108
AM
18658+ di_write_lock_parent(parent);
18659+
18660+ err = 0;
18661+ if (!au_h_dptr(parent, bcpup)) {
c2b27bf2
AM
18662+ if (bstart > bcpup)
18663+ err = au_cpup_dirs(dentry, bcpup);
18664+ else if (bstart < bcpup)
4a4d8108
AM
18665+ err = au_cpdown_dirs(dentry, bcpup);
18666+ else
c2b27bf2 18667+ BUG();
4a4d8108 18668+ }
38d290e6 18669+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
4a4d8108 18670+ h_parent = au_h_dptr(parent, bcpup);
5527c038 18671+ h_dir = d_inode(h_parent);
4a4d8108 18672+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7e9cd9fe 18673+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
4a4d8108
AM
18674+ /* todo: no unlock here */
18675+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
18676+
18677+ AuDbg("bcpup %d\n", bcpup);
18678+ if (!err) {
5527c038 18679+ if (d_really_is_negative(dentry))
027c5e7a 18680+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
18681+ au_update_dbrange(dentry, /*do_put_zero*/0);
18682+ }
1308ab2a 18683+ }
1facf9fc 18684+
4a4d8108
AM
18685+ if (!add_entry)
18686+ di_write_unlock(parent);
18687+ if (!err)
18688+ err = bcpup; /* success */
1308ab2a 18689+
027c5e7a 18690+ AuTraceErr(err);
4a4d8108
AM
18691+ return err;
18692+}
1facf9fc 18693+
4a4d8108
AM
18694+/*
18695+ * decide the branch and the parent dir where we will create a new entry.
18696+ * returns new bindex or an error.
18697+ * copyup the parent dir if needed.
18698+ */
18699+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18700+ struct au_wr_dir_args *args)
18701+{
18702+ int err;
392086de 18703+ unsigned int flags;
4a4d8108 18704+ aufs_bindex_t bcpup, bstart, src_bstart;
86dc4139
AM
18705+ const unsigned char add_entry
18706+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
38d290e6 18707+ | au_ftest_wrdir(args->flags, TMPFILE);
4a4d8108
AM
18708+ struct super_block *sb;
18709+ struct dentry *parent;
18710+ struct au_sbinfo *sbinfo;
1facf9fc 18711+
4a4d8108
AM
18712+ sb = dentry->d_sb;
18713+ sbinfo = au_sbi(sb);
18714+ parent = dget_parent(dentry);
18715+ bstart = au_dbstart(dentry);
18716+ bcpup = bstart;
18717+ if (args->force_btgt < 0) {
18718+ if (src_dentry) {
18719+ src_bstart = au_dbstart(src_dentry);
18720+ if (src_bstart < bstart)
18721+ bcpup = src_bstart;
18722+ } else if (add_entry) {
392086de
AM
18723+ flags = 0;
18724+ if (au_ftest_wrdir(args->flags, ISDIR))
18725+ au_fset_wbr(flags, DIR);
18726+ err = AuWbrCreate(sbinfo, dentry, flags);
4a4d8108
AM
18727+ bcpup = err;
18728+ }
1facf9fc 18729+
5527c038 18730+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
4a4d8108
AM
18731+ if (add_entry)
18732+ err = AuWbrCopyup(sbinfo, dentry);
18733+ else {
18734+ if (!IS_ROOT(dentry)) {
18735+ di_read_lock_parent(parent, !AuLock_IR);
18736+ err = AuWbrCopyup(sbinfo, dentry);
18737+ di_read_unlock(parent, !AuLock_IR);
18738+ } else
18739+ err = AuWbrCopyup(sbinfo, dentry);
18740+ }
18741+ bcpup = err;
18742+ if (unlikely(err < 0))
18743+ goto out;
18744+ }
18745+ } else {
18746+ bcpup = args->force_btgt;
5527c038 18747+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
1308ab2a 18748+ }
027c5e7a 18749+
4a4d8108
AM
18750+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
18751+ err = bcpup;
18752+ if (bcpup == bstart)
18753+ goto out; /* success */
4a4d8108
AM
18754+
18755+ /* copyup the new parent into the branch we process */
18756+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a 18757+ if (err >= 0) {
5527c038 18758+ if (d_really_is_negative(dentry)) {
027c5e7a
AM
18759+ au_set_h_dptr(dentry, bstart, NULL);
18760+ au_set_dbstart(dentry, bcpup);
18761+ au_set_dbend(dentry, bcpup);
18762+ }
38d290e6
JR
18763+ AuDebugOn(add_entry
18764+ && !au_ftest_wrdir(args->flags, TMPFILE)
18765+ && !au_h_dptr(dentry, bcpup));
027c5e7a 18766+ }
86dc4139
AM
18767+
18768+out:
18769+ dput(parent);
18770+ return err;
18771+}
18772+
18773+/* ---------------------------------------------------------------------- */
18774+
18775+void au_pin_hdir_unlock(struct au_pin *p)
18776+{
18777+ if (p->hdir)
18778+ au_hn_imtx_unlock(p->hdir);
18779+}
18780+
c1595e42 18781+int au_pin_hdir_lock(struct au_pin *p)
86dc4139
AM
18782+{
18783+ int err;
18784+
18785+ err = 0;
18786+ if (!p->hdir)
18787+ goto out;
18788+
18789+ /* even if an error happens later, keep this lock */
18790+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
18791+
18792+ err = -EBUSY;
5527c038 18793+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
86dc4139
AM
18794+ goto out;
18795+
18796+ err = 0;
18797+ if (p->h_dentry)
18798+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
18799+ p->h_parent, p->br);
18800+
18801+out:
18802+ return err;
18803+}
18804+
18805+int au_pin_hdir_relock(struct au_pin *p)
18806+{
18807+ int err, i;
18808+ struct inode *h_i;
18809+ struct dentry *h_d[] = {
18810+ p->h_dentry,
18811+ p->h_parent
18812+ };
18813+
18814+ err = au_pin_hdir_lock(p);
18815+ if (unlikely(err))
18816+ goto out;
18817+
18818+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
18819+ if (!h_d[i])
18820+ continue;
5527c038
JR
18821+ if (d_is_positive(h_d[i])) {
18822+ h_i = d_inode(h_d[i]);
86dc4139 18823+ err = !h_i->i_nlink;
5527c038 18824+ }
86dc4139
AM
18825+ }
18826+
18827+out:
18828+ return err;
18829+}
18830+
18831+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
18832+{
18833+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
18834+ p->hdir->hi_inode->i_mutex.owner = task;
18835+#endif
18836+}
18837+
18838+void au_pin_hdir_acquire_nest(struct au_pin *p)
18839+{
18840+ if (p->hdir) {
18841+ mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map,
18842+ p->lsc_hi, 0, NULL, _RET_IP_);
18843+ au_pin_hdir_set_owner(p, current);
18844+ }
dece6358 18845+}
1facf9fc 18846+
86dc4139
AM
18847+void au_pin_hdir_release(struct au_pin *p)
18848+{
18849+ if (p->hdir) {
18850+ au_pin_hdir_set_owner(p, p->task);
18851+ mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_);
18852+ }
18853+}
1308ab2a 18854+
4a4d8108 18855+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 18856+{
4a4d8108
AM
18857+ if (pin && pin->parent)
18858+ return au_h_dptr(pin->parent, pin->bindex);
18859+ return NULL;
dece6358 18860+}
1facf9fc 18861+
4a4d8108 18862+void au_unpin(struct au_pin *p)
dece6358 18863+{
86dc4139
AM
18864+ if (p->hdir)
18865+ au_pin_hdir_unlock(p);
e49829fe 18866+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
b4510431 18867+ vfsub_mnt_drop_write(p->h_mnt);
4a4d8108
AM
18868+ if (!p->hdir)
18869+ return;
1facf9fc 18870+
4a4d8108
AM
18871+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18872+ di_read_unlock(p->parent, AuLock_IR);
18873+ iput(p->hdir->hi_inode);
18874+ dput(p->parent);
18875+ p->parent = NULL;
18876+ p->hdir = NULL;
18877+ p->h_mnt = NULL;
86dc4139 18878+ /* do not clear p->task */
4a4d8108 18879+}
1308ab2a 18880+
4a4d8108
AM
18881+int au_do_pin(struct au_pin *p)
18882+{
18883+ int err;
18884+ struct super_block *sb;
4a4d8108
AM
18885+ struct inode *h_dir;
18886+
18887+ err = 0;
18888+ sb = p->dentry->d_sb;
86dc4139 18889+ p->br = au_sbr(sb, p->bindex);
4a4d8108
AM
18890+ if (IS_ROOT(p->dentry)) {
18891+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18892+ p->h_mnt = au_br_mnt(p->br);
b4510431 18893+ err = vfsub_mnt_want_write(p->h_mnt);
4a4d8108
AM
18894+ if (unlikely(err)) {
18895+ au_fclr_pin(p->flags, MNT_WRITE);
18896+ goto out_err;
18897+ }
18898+ }
dece6358 18899+ goto out;
1facf9fc 18900+ }
18901+
86dc4139 18902+ p->h_dentry = NULL;
4a4d8108 18903+ if (p->bindex <= au_dbend(p->dentry))
86dc4139 18904+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 18905+
4a4d8108
AM
18906+ p->parent = dget_parent(p->dentry);
18907+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18908+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 18909+
4a4d8108 18910+ h_dir = NULL;
86dc4139 18911+ p->h_parent = au_h_dptr(p->parent, p->bindex);
5527c038 18912+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
4a4d8108
AM
18913+ if (p->hdir)
18914+ h_dir = p->hdir->hi_inode;
dece6358 18915+
b752ccd1
AM
18916+ /*
18917+ * udba case, or
18918+ * if DI_LOCKED is not set, then p->parent may be different
18919+ * and h_parent can be NULL.
18920+ */
86dc4139 18921+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
e49829fe 18922+ err = -EBUSY;
4a4d8108
AM
18923+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18924+ di_read_unlock(p->parent, AuLock_IR);
18925+ dput(p->parent);
18926+ p->parent = NULL;
18927+ goto out_err;
18928+ }
1308ab2a 18929+
4a4d8108 18930+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18931+ p->h_mnt = au_br_mnt(p->br);
b4510431 18932+ err = vfsub_mnt_want_write(p->h_mnt);
dece6358 18933+ if (unlikely(err)) {
4a4d8108 18934+ au_fclr_pin(p->flags, MNT_WRITE);
86dc4139
AM
18935+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18936+ di_read_unlock(p->parent, AuLock_IR);
18937+ dput(p->parent);
18938+ p->parent = NULL;
18939+ goto out_err;
dece6358
AM
18940+ }
18941+ }
4a4d8108 18942+
86dc4139
AM
18943+ au_igrab(h_dir);
18944+ err = au_pin_hdir_lock(p);
18945+ if (!err)
18946+ goto out; /* success */
18947+
076b876e
AM
18948+ au_unpin(p);
18949+
4f0767ce 18950+out_err:
4a4d8108
AM
18951+ pr_err("err %d\n", err);
18952+ err = au_busy_or_stale();
4f0767ce 18953+out:
1facf9fc 18954+ return err;
18955+}
18956+
4a4d8108
AM
18957+void au_pin_init(struct au_pin *p, struct dentry *dentry,
18958+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
18959+ unsigned int udba, unsigned char flags)
18960+{
18961+ p->dentry = dentry;
18962+ p->udba = udba;
18963+ p->lsc_di = lsc_di;
18964+ p->lsc_hi = lsc_hi;
18965+ p->flags = flags;
18966+ p->bindex = bindex;
18967+
18968+ p->parent = NULL;
18969+ p->hdir = NULL;
18970+ p->h_mnt = NULL;
86dc4139
AM
18971+
18972+ p->h_dentry = NULL;
18973+ p->h_parent = NULL;
18974+ p->br = NULL;
18975+ p->task = current;
4a4d8108
AM
18976+}
18977+
18978+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
18979+ unsigned int udba, unsigned char flags)
18980+{
18981+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
18982+ udba, flags);
18983+ return au_do_pin(pin);
18984+}
18985+
dece6358
AM
18986+/* ---------------------------------------------------------------------- */
18987+
1308ab2a 18988+/*
4a4d8108
AM
18989+ * ->setattr() and ->getattr() are called in various cases.
18990+ * chmod, stat: dentry is revalidated.
18991+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
18992+ * unhashed.
18993+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 18994+ */
027c5e7a 18995+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
c1595e42 18996+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 18997+{
4a4d8108 18998+ int err;
4a4d8108 18999+ struct dentry *parent;
1facf9fc 19000+
1308ab2a 19001+ err = 0;
027c5e7a 19002+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
19003+ parent = dget_parent(dentry);
19004+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 19005+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
19006+ di_read_unlock(parent, AuLock_IR);
19007+ dput(parent);
dece6358 19008+ }
1facf9fc 19009+
4a4d8108 19010+ AuTraceErr(err);
1308ab2a 19011+ return err;
19012+}
dece6358 19013+
c1595e42
JR
19014+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
19015+ struct au_icpup_args *a)
1308ab2a 19016+{
19017+ int err;
4a4d8108 19018+ loff_t sz;
e49829fe 19019+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
19020+ struct dentry *hi_wh, *parent;
19021+ struct inode *inode;
4a4d8108
AM
19022+ struct au_wr_dir_args wr_dir_args = {
19023+ .force_btgt = -1,
19024+ .flags = 0
19025+ };
19026+
2000de60 19027+ if (d_is_dir(dentry))
4a4d8108
AM
19028+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
19029+ /* plink or hi_wh() case */
2000de60 19030+ bstart = au_dbstart(dentry);
5527c038 19031+ inode = d_inode(dentry);
e49829fe 19032+ ibstart = au_ibstart(inode);
027c5e7a 19033+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 19034+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
19035+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
19036+ if (unlikely(err < 0))
19037+ goto out;
19038+ a->btgt = err;
19039+ if (err != bstart)
19040+ au_fset_icpup(a->flags, DID_CPUP);
19041+
19042+ err = 0;
19043+ a->pin_flags = AuPin_MNT_WRITE;
19044+ parent = NULL;
19045+ if (!IS_ROOT(dentry)) {
19046+ au_fset_pin(a->pin_flags, DI_LOCKED);
19047+ parent = dget_parent(dentry);
19048+ di_write_lock_parent(parent);
19049+ }
19050+
19051+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
19052+ if (unlikely(err))
19053+ goto out_parent;
19054+
19055+ a->h_path.dentry = au_h_dptr(dentry, bstart);
4a4d8108 19056+ sz = -1;
5527c038 19057+ a->h_inode = d_inode(a->h_path.dentry);
c1595e42
JR
19058+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
19059+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
19060+ if (ia->ia_size < i_size_read(a->h_inode))
19061+ sz = ia->ia_size;
19062+ mutex_unlock(&a->h_inode->i_mutex);
19063+ }
4a4d8108 19064+
4a4d8108 19065+ hi_wh = NULL;
027c5e7a 19066+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
19067+ hi_wh = au_hi_wh(inode, a->btgt);
19068+ if (!hi_wh) {
c2b27bf2
AM
19069+ struct au_cp_generic cpg = {
19070+ .dentry = dentry,
19071+ .bdst = a->btgt,
19072+ .bsrc = -1,
19073+ .len = sz,
19074+ .pin = &a->pin
19075+ };
19076+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
4a4d8108
AM
19077+ if (unlikely(err))
19078+ goto out_unlock;
19079+ hi_wh = au_hi_wh(inode, a->btgt);
19080+ /* todo: revalidate hi_wh? */
19081+ }
19082+ }
19083+
19084+ if (parent) {
19085+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
19086+ di_downgrade_lock(parent, AuLock_IR);
19087+ dput(parent);
19088+ parent = NULL;
19089+ }
19090+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19091+ goto out; /* success */
19092+
19093+ if (!d_unhashed(dentry)) {
c2b27bf2
AM
19094+ struct au_cp_generic cpg = {
19095+ .dentry = dentry,
19096+ .bdst = a->btgt,
19097+ .bsrc = bstart,
19098+ .len = sz,
19099+ .pin = &a->pin,
19100+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19101+ };
19102+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19103+ if (!err)
19104+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19105+ } else if (!hi_wh)
19106+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19107+ else
19108+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 19109+
4f0767ce 19110+out_unlock:
5527c038 19111+ a->h_inode = d_inode(a->h_path.dentry);
86dc4139 19112+ if (!err)
dece6358 19113+ goto out; /* success */
4a4d8108 19114+ au_unpin(&a->pin);
4f0767ce 19115+out_parent:
4a4d8108
AM
19116+ if (parent) {
19117+ di_write_unlock(parent);
19118+ dput(parent);
19119+ }
4f0767ce 19120+out:
86dc4139
AM
19121+ if (!err)
19122+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
1facf9fc 19123+ return err;
19124+}
19125+
4a4d8108 19126+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 19127+{
4a4d8108 19128+ int err;
523b37e3 19129+ struct inode *inode, *delegated;
4a4d8108
AM
19130+ struct super_block *sb;
19131+ struct file *file;
19132+ struct au_icpup_args *a;
1facf9fc 19133+
5527c038 19134+ inode = d_inode(dentry);
4a4d8108 19135+ IMustLock(inode);
dece6358 19136+
4a4d8108
AM
19137+ err = -ENOMEM;
19138+ a = kzalloc(sizeof(*a), GFP_NOFS);
19139+ if (unlikely(!a))
19140+ goto out;
1facf9fc 19141+
4a4d8108
AM
19142+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19143+ ia->ia_valid &= ~ATTR_MODE;
dece6358 19144+
4a4d8108
AM
19145+ file = NULL;
19146+ sb = dentry->d_sb;
e49829fe
JR
19147+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19148+ if (unlikely(err))
19149+ goto out_kfree;
19150+
4a4d8108
AM
19151+ if (ia->ia_valid & ATTR_FILE) {
19152+ /* currently ftruncate(2) only */
7e9cd9fe 19153+ AuDebugOn(!d_is_reg(dentry));
4a4d8108
AM
19154+ file = ia->ia_file;
19155+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19156+ if (unlikely(err))
19157+ goto out_si;
19158+ ia->ia_file = au_hf_top(file);
19159+ a->udba = AuOpt_UDBA_NONE;
19160+ } else {
19161+ /* fchmod() doesn't pass ia_file */
19162+ a->udba = au_opt_udba(sb);
027c5e7a
AM
19163+ di_write_lock_child(dentry);
19164+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
19165+ if (d_unhashed(dentry))
19166+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
19167+ if (a->udba != AuOpt_UDBA_NONE) {
19168+ AuDebugOn(IS_ROOT(dentry));
19169+ err = au_reval_for_attr(dentry, au_sigen(sb));
19170+ if (unlikely(err))
19171+ goto out_dentry;
19172+ }
dece6358 19173+ }
dece6358 19174+
4a4d8108
AM
19175+ err = au_pin_and_icpup(dentry, ia, a);
19176+ if (unlikely(err < 0))
19177+ goto out_dentry;
19178+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19179+ ia->ia_file = NULL;
19180+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 19181+ }
dece6358 19182+
4a4d8108
AM
19183+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19184+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19185+ == (ATTR_MODE | ATTR_CTIME)) {
7eafdf33 19186+ err = security_path_chmod(&a->h_path, ia->ia_mode);
4a4d8108
AM
19187+ if (unlikely(err))
19188+ goto out_unlock;
19189+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19190+ && (ia->ia_valid & ATTR_CTIME)) {
86dc4139 19191+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
4a4d8108
AM
19192+ if (unlikely(err))
19193+ goto out_unlock;
19194+ }
dece6358 19195+
4a4d8108
AM
19196+ if (ia->ia_valid & ATTR_SIZE) {
19197+ struct file *f;
1308ab2a 19198+
953406b4 19199+ if (ia->ia_size < i_size_read(inode))
4a4d8108 19200+ /* unmap only */
953406b4 19201+ truncate_setsize(inode, ia->ia_size);
1308ab2a 19202+
4a4d8108
AM
19203+ f = NULL;
19204+ if (ia->ia_valid & ATTR_FILE)
19205+ f = ia->ia_file;
19206+ mutex_unlock(&a->h_inode->i_mutex);
19207+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
19208+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
523b37e3
AM
19209+ } else {
19210+ delegated = NULL;
19211+ while (1) {
19212+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19213+ if (delegated) {
19214+ err = break_deleg_wait(&delegated);
19215+ if (!err)
19216+ continue;
19217+ }
19218+ break;
19219+ }
19220+ }
8cdd5066
JR
19221+ /*
19222+ * regardless aufs 'acl' option setting.
19223+ * why don't all acl-aware fs call this func from their ->setattr()?
19224+ */
19225+ if (!err && (ia->ia_valid & ATTR_MODE))
19226+ err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
4a4d8108
AM
19227+ if (!err)
19228+ au_cpup_attr_changeable(inode);
1308ab2a 19229+
4f0767ce 19230+out_unlock:
4a4d8108
AM
19231+ mutex_unlock(&a->h_inode->i_mutex);
19232+ au_unpin(&a->pin);
027c5e7a
AM
19233+ if (unlikely(err))
19234+ au_update_dbstart(dentry);
4f0767ce 19235+out_dentry:
4a4d8108
AM
19236+ di_write_unlock(dentry);
19237+ if (file) {
19238+ fi_write_unlock(file);
19239+ ia->ia_file = file;
19240+ ia->ia_valid |= ATTR_FILE;
19241+ }
4f0767ce 19242+out_si:
4a4d8108 19243+ si_read_unlock(sb);
e49829fe 19244+out_kfree:
4a4d8108 19245+ kfree(a);
4f0767ce 19246+out:
4a4d8108
AM
19247+ AuTraceErr(err);
19248+ return err;
1facf9fc 19249+}
19250+
c1595e42
JR
19251+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19252+static int au_h_path_to_set_attr(struct dentry *dentry,
19253+ struct au_icpup_args *a, struct path *h_path)
19254+{
19255+ int err;
19256+ struct super_block *sb;
19257+
19258+ sb = dentry->d_sb;
19259+ a->udba = au_opt_udba(sb);
19260+ /* no d_unlinked(), to set UDBA_NONE for root */
19261+ if (d_unhashed(dentry))
19262+ a->udba = AuOpt_UDBA_NONE;
19263+ if (a->udba != AuOpt_UDBA_NONE) {
19264+ AuDebugOn(IS_ROOT(dentry));
19265+ err = au_reval_for_attr(dentry, au_sigen(sb));
19266+ if (unlikely(err))
19267+ goto out;
19268+ }
19269+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19270+ if (unlikely(err < 0))
19271+ goto out;
19272+
19273+ h_path->dentry = a->h_path.dentry;
19274+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19275+
19276+out:
19277+ return err;
19278+}
19279+
19280+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg)
19281+{
19282+ int err;
19283+ struct path h_path;
19284+ struct super_block *sb;
19285+ struct au_icpup_args *a;
19286+ struct inode *inode, *h_inode;
19287+
5527c038 19288+ inode = d_inode(dentry);
c1595e42
JR
19289+ IMustLock(inode);
19290+
19291+ err = -ENOMEM;
19292+ a = kzalloc(sizeof(*a), GFP_NOFS);
19293+ if (unlikely(!a))
19294+ goto out;
19295+
19296+ sb = dentry->d_sb;
19297+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19298+ if (unlikely(err))
19299+ goto out_kfree;
19300+
19301+ h_path.dentry = NULL; /* silence gcc */
19302+ di_write_lock_child(dentry);
19303+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19304+ if (unlikely(err))
19305+ goto out_di;
19306+
19307+ mutex_unlock(&a->h_inode->i_mutex);
19308+ switch (arg->type) {
19309+ case AU_XATTR_SET:
19310+ err = vfsub_setxattr(h_path.dentry,
19311+ arg->u.set.name, arg->u.set.value,
19312+ arg->u.set.size, arg->u.set.flags);
19313+ break;
19314+ case AU_XATTR_REMOVE:
19315+ err = vfsub_removexattr(h_path.dentry, arg->u.remove.name);
19316+ break;
19317+ case AU_ACL_SET:
19318+ err = -EOPNOTSUPP;
5527c038 19319+ h_inode = d_inode(h_path.dentry);
c1595e42
JR
19320+ if (h_inode->i_op->set_acl)
19321+ err = h_inode->i_op->set_acl(h_inode,
19322+ arg->u.acl_set.acl,
19323+ arg->u.acl_set.type);
19324+ break;
19325+ }
19326+ if (!err)
19327+ au_cpup_attr_timesizes(inode);
19328+
19329+ au_unpin(&a->pin);
19330+ if (unlikely(err))
19331+ au_update_dbstart(dentry);
19332+
19333+out_di:
19334+ di_write_unlock(dentry);
19335+ si_read_unlock(sb);
19336+out_kfree:
19337+ kfree(a);
19338+out:
19339+ AuTraceErr(err);
19340+ return err;
19341+}
19342+#endif
19343+
4a4d8108
AM
19344+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19345+ unsigned int nlink)
1facf9fc 19346+{
9dbd164d
AM
19347+ unsigned int n;
19348+
4a4d8108 19349+ inode->i_mode = st->mode;
86dc4139
AM
19350+ /* don't i_[ug]id_write() here */
19351+ inode->i_uid = st->uid;
19352+ inode->i_gid = st->gid;
4a4d8108
AM
19353+ inode->i_atime = st->atime;
19354+ inode->i_mtime = st->mtime;
19355+ inode->i_ctime = st->ctime;
1facf9fc 19356+
4a4d8108
AM
19357+ au_cpup_attr_nlink(inode, /*force*/0);
19358+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
19359+ n = inode->i_nlink;
19360+ n -= nlink;
19361+ n += st->nlink;
f6b6e03d 19362+ smp_mb(); /* for i_nlink */
7eafdf33 19363+ /* 0 can happen */
92d182d2 19364+ set_nlink(inode, n);
4a4d8108 19365+ }
1facf9fc 19366+
4a4d8108
AM
19367+ spin_lock(&inode->i_lock);
19368+ inode->i_blocks = st->blocks;
19369+ i_size_write(inode, st->size);
19370+ spin_unlock(&inode->i_lock);
1facf9fc 19371+}
19372+
c1595e42
JR
19373+/*
19374+ * common routine for aufs_getattr() and aufs_getxattr().
19375+ * returns zero or negative (an error).
19376+ * @dentry will be read-locked in success.
19377+ */
19378+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
1facf9fc 19379+{
4a4d8108 19380+ int err;
076b876e 19381+ unsigned int mnt_flags, sigen;
c1595e42 19382+ unsigned char udba_none;
4a4d8108 19383+ aufs_bindex_t bindex;
4a4d8108
AM
19384+ struct super_block *sb, *h_sb;
19385+ struct inode *inode;
1facf9fc 19386+
c1595e42
JR
19387+ h_path->mnt = NULL;
19388+ h_path->dentry = NULL;
19389+
19390+ err = 0;
4a4d8108 19391+ sb = dentry->d_sb;
4a4d8108
AM
19392+ mnt_flags = au_mntflags(sb);
19393+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 19394+
4a4d8108 19395+ /* support fstat(2) */
027c5e7a 19396+ if (!d_unlinked(dentry) && !udba_none) {
076b876e 19397+ sigen = au_sigen(sb);
027c5e7a
AM
19398+ err = au_digen_test(dentry, sigen);
19399+ if (!err) {
4a4d8108 19400+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a 19401+ err = au_dbrange_test(dentry);
c1595e42
JR
19402+ if (unlikely(err)) {
19403+ di_read_unlock(dentry, AuLock_IR);
19404+ goto out;
19405+ }
027c5e7a 19406+ } else {
4a4d8108
AM
19407+ AuDebugOn(IS_ROOT(dentry));
19408+ di_write_lock_child(dentry);
027c5e7a
AM
19409+ err = au_dbrange_test(dentry);
19410+ if (!err)
19411+ err = au_reval_for_attr(dentry, sigen);
c1595e42
JR
19412+ if (!err)
19413+ di_downgrade_lock(dentry, AuLock_IR);
19414+ else {
19415+ di_write_unlock(dentry);
19416+ goto out;
19417+ }
4a4d8108
AM
19418+ }
19419+ } else
19420+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 19421+
5527c038 19422+ inode = d_inode(dentry);
4a4d8108 19423+ bindex = au_ibstart(inode);
c1595e42
JR
19424+ h_path->mnt = au_sbr_mnt(sb, bindex);
19425+ h_sb = h_path->mnt->mnt_sb;
19426+ if (!force
19427+ && !au_test_fs_bad_iattr(h_sb)
19428+ && udba_none)
19429+ goto out; /* success */
1facf9fc 19430+
4a4d8108 19431+ if (au_dbstart(dentry) == bindex)
c1595e42 19432+ h_path->dentry = au_h_dptr(dentry, bindex);
4a4d8108 19433+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
c1595e42
JR
19434+ h_path->dentry = au_plink_lkup(inode, bindex);
19435+ if (IS_ERR(h_path->dentry))
19436+ /* pretending success */
19437+ h_path->dentry = NULL;
19438+ else
19439+ dput(h_path->dentry);
4a4d8108 19440+ }
c1595e42
JR
19441+
19442+out:
19443+ return err;
19444+}
19445+
19446+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19447+ struct dentry *dentry, struct kstat *st)
19448+{
19449+ int err;
19450+ unsigned char positive;
19451+ struct path h_path;
19452+ struct inode *inode;
19453+ struct super_block *sb;
19454+
5527c038 19455+ inode = d_inode(dentry);
c1595e42
JR
19456+ sb = dentry->d_sb;
19457+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19458+ if (unlikely(err))
19459+ goto out;
19460+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19461+ if (unlikely(err))
19462+ goto out_si;
c06a8ce3 19463+ if (unlikely(!h_path.dentry))
c1595e42 19464+ /* illegally overlapped or something */
4a4d8108
AM
19465+ goto out_fill; /* pretending success */
19466+
5527c038 19467+ positive = d_is_positive(h_path.dentry);
4a4d8108 19468+ if (positive)
c06a8ce3 19469+ err = vfs_getattr(&h_path, st);
4a4d8108
AM
19470+ if (!err) {
19471+ if (positive)
c06a8ce3 19472+ au_refresh_iattr(inode, st,
5527c038 19473+ d_inode(h_path.dentry)->i_nlink);
4a4d8108 19474+ goto out_fill; /* success */
1facf9fc 19475+ }
7f207e10 19476+ AuTraceErr(err);
c1595e42 19477+ goto out_di;
4a4d8108 19478+
4f0767ce 19479+out_fill:
4a4d8108 19480+ generic_fillattr(inode, st);
c1595e42 19481+out_di:
4a4d8108 19482+ di_read_unlock(dentry, AuLock_IR);
c1595e42 19483+out_si:
4a4d8108 19484+ si_read_unlock(sb);
7f207e10
AM
19485+out:
19486+ AuTraceErr(err);
4a4d8108 19487+ return err;
1facf9fc 19488+}
19489+
19490+/* ---------------------------------------------------------------------- */
19491+
c2c0f25c
AM
19492+/*
19493+ * Assumption:
19494+ * - the number of symlinks is not so many.
19495+ *
19496+ * Structure:
19497+ * - sbinfo (instead of iinfo) contains an hlist of struct au_symlink.
19498+ * If iinfo contained the hlist, then it would be rather large waste of memory
19499+ * I am afraid.
19500+ * - struct au_symlink contains the necessary info for h_inode follow_link() and
19501+ * put_link().
19502+ */
1facf9fc 19503+
c2c0f25c
AM
19504+struct au_symlink {
19505+ union {
19506+ struct hlist_node hlist;
19507+ struct rcu_head rcu;
19508+ };
1facf9fc 19509+
c2c0f25c
AM
19510+ struct inode *h_inode;
19511+ void *h_cookie;
19512+};
1facf9fc 19513+
c2c0f25c
AM
19514+static void au_symlink_add(struct super_block *sb, struct au_symlink *slink,
19515+ struct inode *h_inode, void *cookie)
19516+{
19517+ struct au_sbinfo *sbinfo;
1facf9fc 19518+
c2c0f25c
AM
19519+ ihold(h_inode);
19520+ slink->h_inode = h_inode;
19521+ slink->h_cookie = cookie;
19522+ sbinfo = au_sbi(sb);
19523+ au_sphl_add(&slink->hlist, &sbinfo->si_symlink);
4a4d8108 19524+}
1facf9fc 19525+
c2c0f25c 19526+static void au_symlink_del(struct super_block *sb, struct au_symlink *slink)
4a4d8108 19527+{
c2c0f25c 19528+ struct au_sbinfo *sbinfo;
1facf9fc 19529+
c2c0f25c
AM
19530+ /* do not iput() within rcu */
19531+ iput(slink->h_inode);
19532+ slink->h_inode = NULL;
19533+ sbinfo = au_sbi(sb);
19534+ au_sphl_del_rcu(&slink->hlist, &sbinfo->si_symlink);
19535+ kfree_rcu(slink, rcu);
4a4d8108 19536+}
1facf9fc 19537+
c2c0f25c 19538+static const char *aufs_follow_link(struct dentry *dentry, void **cookie)
4a4d8108 19539+{
c2c0f25c
AM
19540+ const char *ret;
19541+ struct inode *inode, *h_inode;
19542+ struct dentry *h_dentry;
19543+ struct au_symlink *slink;
4a4d8108 19544+ int err;
c2c0f25c 19545+ aufs_bindex_t bindex;
1facf9fc 19546+
79b8bda9 19547+ ret = NULL; /* suppress a warning */
027c5e7a
AM
19548+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19549+ if (unlikely(err))
c2c0f25c 19550+ goto out;
027c5e7a
AM
19551+
19552+ err = au_d_hashed_positive(dentry);
c2c0f25c
AM
19553+ if (unlikely(err))
19554+ goto out_unlock;
19555+
19556+ err = -EINVAL;
19557+ inode = d_inode(dentry);
19558+ bindex = au_ibstart(inode);
19559+ h_inode = au_h_iptr(inode, bindex);
19560+ if (unlikely(!h_inode->i_op->follow_link))
19561+ goto out_unlock;
19562+
19563+ err = -ENOMEM;
19564+ slink = kmalloc(sizeof(*slink), GFP_NOFS);
19565+ if (unlikely(!slink))
19566+ goto out_unlock;
19567+
19568+ err = -EBUSY;
19569+ h_dentry = NULL;
19570+ if (au_dbstart(dentry) <= bindex) {
19571+ h_dentry = au_h_dptr(dentry, bindex);
19572+ if (h_dentry)
19573+ dget(h_dentry);
027c5e7a 19574+ }
c2c0f25c
AM
19575+ if (!h_dentry) {
19576+ h_dentry = d_find_any_alias(h_inode);
19577+ if (IS_ERR(h_dentry)) {
19578+ err = PTR_ERR(h_dentry);
19579+ goto out_free;
19580+ }
19581+ }
19582+ if (unlikely(!h_dentry))
19583+ goto out_free;
1facf9fc 19584+
c2c0f25c
AM
19585+ err = 0;
19586+ AuDbg("%pf\n", h_inode->i_op->follow_link);
19587+ AuDbgDentry(h_dentry);
19588+ ret = h_inode->i_op->follow_link(h_dentry, cookie);
19589+ dput(h_dentry);
19590+
19591+ if (!IS_ERR_OR_NULL(ret)) {
19592+ au_symlink_add(inode->i_sb, slink, h_inode, *cookie);
19593+ *cookie = slink;
19594+ AuDbg("slink %p\n", slink);
19595+ goto out_unlock; /* success */
1308ab2a 19596+ }
1facf9fc 19597+
c2c0f25c
AM
19598+out_free:
19599+ slink->h_inode = NULL;
19600+ kfree_rcu(slink, rcu);
19601+out_unlock:
19602+ aufs_read_unlock(dentry, AuLock_IR);
4f0767ce 19603+out:
c2c0f25c
AM
19604+ if (unlikely(err))
19605+ ret = ERR_PTR(err);
19606+ AuTraceErrPtr(ret);
19607+ return ret;
4a4d8108 19608+}
1facf9fc 19609+
c2c0f25c 19610+static void aufs_put_link(struct inode *inode, void *cookie)
4a4d8108 19611+{
c2c0f25c
AM
19612+ struct au_symlink *slink;
19613+ struct inode *h_inode;
537831f9 19614+
c2c0f25c
AM
19615+ slink = cookie;
19616+ AuDbg("slink %p\n", slink);
19617+ h_inode = slink->h_inode;
19618+ AuDbg("%pf\n", h_inode->i_op->put_link);
19619+ AuDbgInode(h_inode);
19620+ if (h_inode->i_op->put_link)
19621+ h_inode->i_op->put_link(h_inode, slink->h_cookie);
19622+ au_symlink_del(inode->i_sb, slink);
4a4d8108 19623+}
1facf9fc 19624+
4a4d8108 19625+/* ---------------------------------------------------------------------- */
1facf9fc 19626+
0c3ec466 19627+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
4a4d8108 19628+{
0c3ec466
AM
19629+ int err;
19630+ struct super_block *sb;
19631+ struct inode *h_inode;
19632+
19633+ sb = inode->i_sb;
19634+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19635+ lockdep_off();
19636+ si_read_lock(sb, AuLock_FLUSH);
19637+ ii_write_lock_child(inode);
19638+ lockdep_on();
19639+ h_inode = au_h_iptr(inode, au_ibstart(inode));
19640+ err = vfsub_update_time(h_inode, ts, flags);
19641+ lockdep_off();
38d290e6
JR
19642+ if (!err)
19643+ au_cpup_attr_timesizes(inode);
0c3ec466
AM
19644+ ii_write_unlock(inode);
19645+ si_read_unlock(sb);
19646+ lockdep_on();
38d290e6
JR
19647+
19648+ if (!err && (flags & S_VERSION))
19649+ inode_inc_iversion(inode);
19650+
0c3ec466 19651+ return err;
4a4d8108 19652+}
1facf9fc 19653+
4a4d8108 19654+/* ---------------------------------------------------------------------- */
1308ab2a 19655+
b95c5147
AM
19656+/* no getattr version will be set by module.c:aufs_init() */
19657+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19658+ aufs_iop[] = {
19659+ [AuIop_SYMLINK] = {
19660+ .permission = aufs_permission,
c1595e42 19661+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19662+ .get_acl = aufs_get_acl,
19663+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
c1595e42
JR
19664+#endif
19665+
b95c5147
AM
19666+ .setattr = aufs_setattr,
19667+ .getattr = aufs_getattr,
0c3ec466 19668+
c1595e42 19669+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19670+ .setxattr = aufs_setxattr,
19671+ .getxattr = aufs_getxattr,
19672+ .listxattr = aufs_listxattr,
19673+ .removexattr = aufs_removexattr,
c1595e42
JR
19674+#endif
19675+
b95c5147
AM
19676+ .readlink = generic_readlink,
19677+ .follow_link = aufs_follow_link,
19678+ .put_link = aufs_put_link,
0c3ec466 19679+
b95c5147
AM
19680+ /* .update_time = aufs_update_time */
19681+ },
19682+ [AuIop_DIR] = {
19683+ .create = aufs_create,
19684+ .lookup = aufs_lookup,
19685+ .link = aufs_link,
19686+ .unlink = aufs_unlink,
19687+ .symlink = aufs_symlink,
19688+ .mkdir = aufs_mkdir,
19689+ .rmdir = aufs_rmdir,
19690+ .mknod = aufs_mknod,
19691+ .rename = aufs_rename,
19692+
19693+ .permission = aufs_permission,
c1595e42 19694+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19695+ .get_acl = aufs_get_acl,
19696+ .set_acl = aufs_set_acl,
c1595e42
JR
19697+#endif
19698+
b95c5147
AM
19699+ .setattr = aufs_setattr,
19700+ .getattr = aufs_getattr,
0c3ec466 19701+
c1595e42 19702+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19703+ .setxattr = aufs_setxattr,
19704+ .getxattr = aufs_getxattr,
19705+ .listxattr = aufs_listxattr,
19706+ .removexattr = aufs_removexattr,
c1595e42
JR
19707+#endif
19708+
b95c5147
AM
19709+ .update_time = aufs_update_time,
19710+ .atomic_open = aufs_atomic_open,
19711+ .tmpfile = aufs_tmpfile
19712+ },
19713+ [AuIop_OTHER] = {
19714+ .permission = aufs_permission,
c1595e42 19715+#ifdef CONFIG_FS_POSIX_ACL
b95c5147
AM
19716+ .get_acl = aufs_get_acl,
19717+ .set_acl = aufs_set_acl,
c1595e42
JR
19718+#endif
19719+
b95c5147
AM
19720+ .setattr = aufs_setattr,
19721+ .getattr = aufs_getattr,
0c3ec466 19722+
c1595e42 19723+#ifdef CONFIG_AUFS_XATTR
b95c5147
AM
19724+ .setxattr = aufs_setxattr,
19725+ .getxattr = aufs_getxattr,
19726+ .listxattr = aufs_listxattr,
19727+ .removexattr = aufs_removexattr,
c1595e42
JR
19728+#endif
19729+
b95c5147
AM
19730+ .update_time = aufs_update_time
19731+ }
4a4d8108 19732+};
7f207e10
AM
19733diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19734--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 19735+++ linux/fs/aufs/i_op_del.c 2016-02-28 11:26:32.573304539 +0100
5527c038 19736@@ -0,0 +1,510 @@
1facf9fc 19737+/*
8cdd5066 19738+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 19739+ *
19740+ * This program, aufs is free software; you can redistribute it and/or modify
19741+ * it under the terms of the GNU General Public License as published by
19742+ * the Free Software Foundation; either version 2 of the License, or
19743+ * (at your option) any later version.
dece6358
AM
19744+ *
19745+ * This program is distributed in the hope that it will be useful,
19746+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19747+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19748+ * GNU General Public License for more details.
19749+ *
19750+ * You should have received a copy of the GNU General Public License
523b37e3 19751+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 19752+ */
19753+
19754+/*
4a4d8108 19755+ * inode operations (del entry)
1308ab2a 19756+ */
dece6358 19757+
1308ab2a 19758+#include "aufs.h"
dece6358 19759+
4a4d8108
AM
19760+/*
19761+ * decide if a new whiteout for @dentry is necessary or not.
19762+ * when it is necessary, prepare the parent dir for the upper branch whose
19763+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19764+ * be done by caller.
19765+ * return value:
19766+ * 0: wh is unnecessary
19767+ * plus: wh is necessary
19768+ * minus: error
19769+ */
19770+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 19771+{
4a4d8108
AM
19772+ int need_wh, err;
19773+ aufs_bindex_t bstart;
19774+ struct super_block *sb;
dece6358 19775+
4a4d8108
AM
19776+ sb = dentry->d_sb;
19777+ bstart = au_dbstart(dentry);
19778+ if (*bcpup < 0) {
19779+ *bcpup = bstart;
5527c038 19780+ if (au_test_ro(sb, bstart, d_inode(dentry))) {
4a4d8108
AM
19781+ err = AuWbrCopyup(au_sbi(sb), dentry);
19782+ *bcpup = err;
19783+ if (unlikely(err < 0))
19784+ goto out;
19785+ }
19786+ } else
19787+ AuDebugOn(bstart < *bcpup
5527c038 19788+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
4a4d8108 19789+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 19790+
4a4d8108
AM
19791+ if (*bcpup != bstart) {
19792+ err = au_cpup_dirs(dentry, *bcpup);
19793+ if (unlikely(err))
19794+ goto out;
19795+ need_wh = 1;
19796+ } else {
027c5e7a 19797+ struct au_dinfo *dinfo, *tmp;
4a4d8108 19798+
027c5e7a
AM
19799+ need_wh = -ENOMEM;
19800+ dinfo = au_di(dentry);
19801+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19802+ if (tmp) {
19803+ au_di_cp(tmp, dinfo);
19804+ au_di_swap(tmp, dinfo);
19805+ /* returns the number of positive dentries */
537831f9 19806+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0);
027c5e7a
AM
19807+ au_di_swap(tmp, dinfo);
19808+ au_rw_write_unlock(&tmp->di_rwsem);
19809+ au_di_free(tmp);
4a4d8108
AM
19810+ }
19811+ }
19812+ AuDbg("need_wh %d\n", need_wh);
19813+ err = need_wh;
19814+
4f0767ce 19815+out:
4a4d8108 19816+ return err;
1facf9fc 19817+}
19818+
4a4d8108
AM
19819+/*
19820+ * simple tests for the del-entry operations.
19821+ * following the checks in vfs, plus the parent-child relationship.
19822+ */
19823+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19824+ struct dentry *h_parent, int isdir)
1facf9fc 19825+{
4a4d8108
AM
19826+ int err;
19827+ umode_t h_mode;
19828+ struct dentry *h_dentry, *h_latest;
1308ab2a 19829+ struct inode *h_inode;
1facf9fc 19830+
4a4d8108 19831+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 19832+ if (d_really_is_positive(dentry)) {
4a4d8108 19833+ err = -ENOENT;
5527c038
JR
19834+ if (unlikely(d_is_negative(h_dentry)))
19835+ goto out;
19836+ h_inode = d_inode(h_dentry);
19837+ if (unlikely(!h_inode->i_nlink))
4a4d8108 19838+ goto out;
1facf9fc 19839+
4a4d8108
AM
19840+ h_mode = h_inode->i_mode;
19841+ if (!isdir) {
19842+ err = -EISDIR;
19843+ if (unlikely(S_ISDIR(h_mode)))
19844+ goto out;
19845+ } else if (unlikely(!S_ISDIR(h_mode))) {
19846+ err = -ENOTDIR;
19847+ goto out;
19848+ }
19849+ } else {
19850+ /* rename(2) case */
19851+ err = -EIO;
5527c038 19852+ if (unlikely(d_is_positive(h_dentry)))
4a4d8108
AM
19853+ goto out;
19854+ }
1facf9fc 19855+
4a4d8108
AM
19856+ err = -ENOENT;
19857+ /* expected parent dir is locked */
19858+ if (unlikely(h_parent != h_dentry->d_parent))
19859+ goto out;
19860+ err = 0;
19861+
19862+ /*
19863+ * rmdir a dir may break the consistency on some filesystem.
19864+ * let's try heavy test.
19865+ */
19866+ err = -EACCES;
076b876e 19867+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
5527c038 19868+ && au_test_h_perm(d_inode(h_parent),
076b876e 19869+ MAY_EXEC | MAY_WRITE)))
4a4d8108
AM
19870+ goto out;
19871+
076b876e 19872+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
4a4d8108
AM
19873+ err = -EIO;
19874+ if (IS_ERR(h_latest))
19875+ goto out;
19876+ if (h_latest == h_dentry)
19877+ err = 0;
19878+ dput(h_latest);
19879+
4f0767ce 19880+out:
4a4d8108 19881+ return err;
1308ab2a 19882+}
1facf9fc 19883+
4a4d8108
AM
19884+/*
19885+ * decide the branch where we operate for @dentry. the branch index will be set
19886+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
19887+ * dir for reverting.
19888+ * when a new whiteout is necessary, create it.
19889+ */
19890+static struct dentry*
19891+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
19892+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 19893+{
4a4d8108
AM
19894+ struct dentry *wh_dentry;
19895+ struct super_block *sb;
19896+ struct path h_path;
19897+ int err, need_wh;
19898+ unsigned int udba;
19899+ aufs_bindex_t bcpup;
dece6358 19900+
4a4d8108
AM
19901+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
19902+ wh_dentry = ERR_PTR(need_wh);
19903+ if (unlikely(need_wh < 0))
19904+ goto out;
19905+
19906+ sb = dentry->d_sb;
19907+ udba = au_opt_udba(sb);
19908+ bcpup = *rbcpup;
19909+ err = au_pin(pin, dentry, bcpup, udba,
19910+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19911+ wh_dentry = ERR_PTR(err);
19912+ if (unlikely(err))
19913+ goto out;
19914+
19915+ h_path.dentry = au_pinned_h_parent(pin);
19916+ if (udba != AuOpt_UDBA_NONE
19917+ && au_dbstart(dentry) == bcpup) {
19918+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
19919+ wh_dentry = ERR_PTR(err);
19920+ if (unlikely(err))
19921+ goto out_unpin;
19922+ }
19923+
19924+ h_path.mnt = au_sbr_mnt(sb, bcpup);
19925+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
19926+ wh_dentry = NULL;
19927+ if (!need_wh)
19928+ goto out; /* success, no need to create whiteout */
19929+
19930+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
19931+ if (IS_ERR(wh_dentry))
19932+ goto out_unpin;
19933+
19934+ /* returns with the parent is locked and wh_dentry is dget-ed */
19935+ goto out; /* success */
19936+
4f0767ce 19937+out_unpin:
4a4d8108 19938+ au_unpin(pin);
4f0767ce 19939+out:
4a4d8108 19940+ return wh_dentry;
1facf9fc 19941+}
19942+
4a4d8108
AM
19943+/*
19944+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
19945+ * in order to be revertible and save time for removing many child whiteouts
19946+ * under the dir.
19947+ * returns 1 when there are too many child whiteout and caller should remove
19948+ * them asynchronously. returns 0 when the number of children is enough small to
19949+ * remove now or the branch fs is a remote fs.
19950+ * otherwise return an error.
19951+ */
19952+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
19953+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 19954+{
4a4d8108
AM
19955+ int rmdir_later, err, dirwh;
19956+ struct dentry *h_dentry;
19957+ struct super_block *sb;
5527c038 19958+ struct inode *inode;
4a4d8108
AM
19959+
19960+ sb = dentry->d_sb;
19961+ SiMustAnyLock(sb);
19962+ h_dentry = au_h_dptr(dentry, bindex);
19963+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
19964+ if (unlikely(err))
19965+ goto out;
19966+
19967+ /* stop monitoring */
5527c038
JR
19968+ inode = d_inode(dentry);
19969+ au_hn_free(au_hi(inode, bindex));
4a4d8108
AM
19970+
19971+ if (!au_test_fs_remote(h_dentry->d_sb)) {
19972+ dirwh = au_sbi(sb)->si_dirwh;
19973+ rmdir_later = (dirwh <= 1);
19974+ if (!rmdir_later)
19975+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
19976+ dirwh);
19977+ if (rmdir_later)
19978+ return rmdir_later;
19979+ }
1facf9fc 19980+
4a4d8108
AM
19981+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
19982+ if (unlikely(err)) {
523b37e3
AM
19983+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
19984+ h_dentry, bindex, err);
4a4d8108
AM
19985+ err = 0;
19986+ }
dece6358 19987+
4f0767ce 19988+out:
4a4d8108
AM
19989+ AuTraceErr(err);
19990+ return err;
19991+}
1308ab2a 19992+
4a4d8108
AM
19993+/*
19994+ * final procedure for deleting a entry.
19995+ * maintain dentry and iattr.
19996+ */
19997+static void epilog(struct inode *dir, struct dentry *dentry,
19998+ aufs_bindex_t bindex)
19999+{
20000+ struct inode *inode;
1308ab2a 20001+
5527c038 20002+ inode = d_inode(dentry);
4a4d8108
AM
20003+ d_drop(dentry);
20004+ inode->i_ctime = dir->i_ctime;
1308ab2a 20005+
b912730e 20006+ au_dir_ts(dir, bindex);
4a4d8108 20007+ dir->i_version++;
1facf9fc 20008+}
20009+
4a4d8108
AM
20010+/*
20011+ * when an error happened, remove the created whiteout and revert everything.
20012+ */
7f207e10
AM
20013+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
20014+ aufs_bindex_t bwh, struct dentry *wh_dentry,
20015+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 20016+{
4a4d8108
AM
20017+ int rerr;
20018+ struct path h_path = {
20019+ .dentry = wh_dentry,
7f207e10 20020+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 20021+ };
dece6358 20022+
7f207e10 20023+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
20024+ if (!rerr) {
20025+ au_set_dbwh(dentry, bwh);
20026+ au_dtime_revert(dt);
20027+ return 0;
20028+ }
dece6358 20029+
523b37e3 20030+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
4a4d8108 20031+ return -EIO;
1facf9fc 20032+}
20033+
4a4d8108 20034+/* ---------------------------------------------------------------------- */
1facf9fc 20035+
4a4d8108 20036+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 20037+{
4a4d8108
AM
20038+ int err;
20039+ aufs_bindex_t bwh, bindex, bstart;
523b37e3 20040+ struct inode *inode, *h_dir, *delegated;
4a4d8108 20041+ struct dentry *parent, *wh_dentry;
c2b27bf2
AM
20042+ /* to reuduce stack size */
20043+ struct {
20044+ struct au_dtime dt;
20045+ struct au_pin pin;
20046+ struct path h_path;
20047+ } *a;
1facf9fc 20048+
4a4d8108 20049+ IMustLock(dir);
027c5e7a 20050+
c2b27bf2
AM
20051+ err = -ENOMEM;
20052+ a = kmalloc(sizeof(*a), GFP_NOFS);
20053+ if (unlikely(!a))
20054+ goto out;
20055+
027c5e7a
AM
20056+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
20057+ if (unlikely(err))
c2b27bf2 20058+ goto out_free;
027c5e7a
AM
20059+ err = au_d_hashed_positive(dentry);
20060+ if (unlikely(err))
20061+ goto out_unlock;
5527c038 20062+ inode = d_inode(dentry);
4a4d8108 20063+ IMustLock(inode);
027c5e7a 20064+ err = -EISDIR;
2000de60 20065+ if (unlikely(d_is_dir(dentry)))
027c5e7a 20066+ goto out_unlock; /* possible? */
1facf9fc 20067+
4a4d8108
AM
20068+ bstart = au_dbstart(dentry);
20069+ bwh = au_dbwh(dentry);
20070+ bindex = -1;
027c5e7a
AM
20071+ parent = dentry->d_parent; /* dir inode is locked */
20072+ di_write_lock_parent(parent);
c2b27bf2
AM
20073+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
20074+ &a->pin);
4a4d8108
AM
20075+ err = PTR_ERR(wh_dentry);
20076+ if (IS_ERR(wh_dentry))
027c5e7a 20077+ goto out_parent;
1facf9fc 20078+
c2b27bf2
AM
20079+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
20080+ a->h_path.dentry = au_h_dptr(dentry, bstart);
20081+ dget(a->h_path.dentry);
4a4d8108 20082+ if (bindex == bstart) {
c2b27bf2 20083+ h_dir = au_pinned_h_dir(&a->pin);
523b37e3
AM
20084+ delegated = NULL;
20085+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
20086+ if (unlikely(err == -EWOULDBLOCK)) {
20087+ pr_warn("cannot retry for NFSv4 delegation"
20088+ " for an internal unlink\n");
20089+ iput(delegated);
20090+ }
4a4d8108
AM
20091+ } else {
20092+ /* dir inode is locked */
5527c038 20093+ h_dir = d_inode(wh_dentry->d_parent);
4a4d8108
AM
20094+ IMustLock(h_dir);
20095+ err = 0;
20096+ }
dece6358 20097+
4a4d8108 20098+ if (!err) {
7f207e10 20099+ vfsub_drop_nlink(inode);
4a4d8108
AM
20100+ epilog(dir, dentry, bindex);
20101+
20102+ /* update target timestamps */
20103+ if (bindex == bstart) {
c2b27bf2
AM
20104+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
20105+ /*ignore*/
5527c038 20106+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
4a4d8108
AM
20107+ } else
20108+ /* todo: this timestamp may be reverted later */
20109+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 20110+ goto out_unpin; /* success */
1facf9fc 20111+ }
20112+
4a4d8108
AM
20113+ /* revert */
20114+ if (wh_dentry) {
20115+ int rerr;
20116+
c2b27bf2
AM
20117+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20118+ &a->dt);
4a4d8108
AM
20119+ if (rerr)
20120+ err = rerr;
dece6358 20121+ }
1facf9fc 20122+
027c5e7a 20123+out_unpin:
c2b27bf2 20124+ au_unpin(&a->pin);
4a4d8108 20125+ dput(wh_dentry);
c2b27bf2 20126+ dput(a->h_path.dentry);
027c5e7a 20127+out_parent:
4a4d8108 20128+ di_write_unlock(parent);
027c5e7a 20129+out_unlock:
4a4d8108 20130+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20131+out_free:
20132+ kfree(a);
027c5e7a 20133+out:
4a4d8108 20134+ return err;
dece6358
AM
20135+}
20136+
4a4d8108 20137+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 20138+{
4a4d8108
AM
20139+ int err, rmdir_later;
20140+ aufs_bindex_t bwh, bindex, bstart;
4a4d8108
AM
20141+ struct inode *inode;
20142+ struct dentry *parent, *wh_dentry, *h_dentry;
20143+ struct au_whtmp_rmdir *args;
c2b27bf2
AM
20144+ /* to reuduce stack size */
20145+ struct {
20146+ struct au_dtime dt;
20147+ struct au_pin pin;
20148+ } *a;
1facf9fc 20149+
4a4d8108 20150+ IMustLock(dir);
027c5e7a 20151+
c2b27bf2
AM
20152+ err = -ENOMEM;
20153+ a = kmalloc(sizeof(*a), GFP_NOFS);
20154+ if (unlikely(!a))
20155+ goto out;
20156+
027c5e7a
AM
20157+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20158+ if (unlikely(err))
c2b27bf2 20159+ goto out_free;
53392da6
AM
20160+ err = au_alive_dir(dentry);
20161+ if (unlikely(err))
027c5e7a 20162+ goto out_unlock;
5527c038 20163+ inode = d_inode(dentry);
4a4d8108 20164+ IMustLock(inode);
027c5e7a 20165+ err = -ENOTDIR;
2000de60 20166+ if (unlikely(!d_is_dir(dentry)))
027c5e7a 20167+ goto out_unlock; /* possible? */
dece6358 20168+
4a4d8108
AM
20169+ err = -ENOMEM;
20170+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20171+ if (unlikely(!args))
20172+ goto out_unlock;
dece6358 20173+
4a4d8108
AM
20174+ parent = dentry->d_parent; /* dir inode is locked */
20175+ di_write_lock_parent(parent);
20176+ err = au_test_empty(dentry, &args->whlist);
20177+ if (unlikely(err))
027c5e7a 20178+ goto out_parent;
1facf9fc 20179+
4a4d8108
AM
20180+ bstart = au_dbstart(dentry);
20181+ bwh = au_dbwh(dentry);
20182+ bindex = -1;
c2b27bf2
AM
20183+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20184+ &a->pin);
4a4d8108
AM
20185+ err = PTR_ERR(wh_dentry);
20186+ if (IS_ERR(wh_dentry))
027c5e7a 20187+ goto out_parent;
1facf9fc 20188+
4a4d8108
AM
20189+ h_dentry = au_h_dptr(dentry, bstart);
20190+ dget(h_dentry);
20191+ rmdir_later = 0;
20192+ if (bindex == bstart) {
20193+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
20194+ if (err > 0) {
20195+ rmdir_later = err;
20196+ err = 0;
20197+ }
20198+ } else {
20199+ /* stop monitoring */
20200+ au_hn_free(au_hi(inode, bstart));
20201+
20202+ /* dir inode is locked */
5527c038 20203+ IMustLock(d_inode(wh_dentry->d_parent));
1facf9fc 20204+ err = 0;
20205+ }
20206+
4a4d8108 20207+ if (!err) {
027c5e7a 20208+ vfsub_dead_dir(inode);
4a4d8108
AM
20209+ au_set_dbdiropq(dentry, -1);
20210+ epilog(dir, dentry, bindex);
1308ab2a 20211+
4a4d8108
AM
20212+ if (rmdir_later) {
20213+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
20214+ args = NULL;
20215+ }
1308ab2a 20216+
4a4d8108 20217+ goto out_unpin; /* success */
1facf9fc 20218+ }
20219+
4a4d8108
AM
20220+ /* revert */
20221+ AuLabel(revert);
20222+ if (wh_dentry) {
20223+ int rerr;
1308ab2a 20224+
c2b27bf2
AM
20225+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20226+ &a->dt);
4a4d8108
AM
20227+ if (rerr)
20228+ err = rerr;
1facf9fc 20229+ }
20230+
4f0767ce 20231+out_unpin:
c2b27bf2 20232+ au_unpin(&a->pin);
4a4d8108
AM
20233+ dput(wh_dentry);
20234+ dput(h_dentry);
027c5e7a 20235+out_parent:
4a4d8108
AM
20236+ di_write_unlock(parent);
20237+ if (args)
20238+ au_whtmp_rmdir_free(args);
4f0767ce 20239+out_unlock:
4a4d8108 20240+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20241+out_free:
20242+ kfree(a);
4f0767ce 20243+out:
4a4d8108
AM
20244+ AuTraceErr(err);
20245+ return err;
dece6358 20246+}
7f207e10
AM
20247diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20248--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 20249+++ linux/fs/aufs/i_op_ren.c 2016-02-28 11:26:32.573304539 +0100
b95c5147 20250@@ -0,0 +1,1015 @@
1facf9fc 20251+/*
8cdd5066 20252+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 20253+ *
20254+ * This program, aufs is free software; you can redistribute it and/or modify
20255+ * it under the terms of the GNU General Public License as published by
20256+ * the Free Software Foundation; either version 2 of the License, or
20257+ * (at your option) any later version.
dece6358
AM
20258+ *
20259+ * This program is distributed in the hope that it will be useful,
20260+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20261+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20262+ * GNU General Public License for more details.
20263+ *
20264+ * You should have received a copy of the GNU General Public License
523b37e3 20265+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 20266+ */
20267+
20268+/*
4a4d8108
AM
20269+ * inode operation (rename entry)
20270+ * todo: this is crazy monster
1facf9fc 20271+ */
20272+
20273+#include "aufs.h"
20274+
4a4d8108
AM
20275+enum { AuSRC, AuDST, AuSrcDst };
20276+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 20277+
4a4d8108
AM
20278+#define AuRen_ISDIR 1
20279+#define AuRen_ISSAMEDIR (1 << 1)
20280+#define AuRen_WHSRC (1 << 2)
20281+#define AuRen_WHDST (1 << 3)
20282+#define AuRen_MNT_WRITE (1 << 4)
20283+#define AuRen_DT_DSTDIR (1 << 5)
20284+#define AuRen_DIROPQ (1 << 6)
4a4d8108 20285+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
20286+#define au_fset_ren(flags, name) \
20287+ do { (flags) |= AuRen_##name; } while (0)
20288+#define au_fclr_ren(flags, name) \
20289+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 20290+
4a4d8108
AM
20291+struct au_ren_args {
20292+ struct {
20293+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20294+ *wh_dentry;
20295+ struct inode *dir, *inode;
20296+ struct au_hinode *hdir;
20297+ struct au_dtime dt[AuParentChild];
20298+ aufs_bindex_t bstart;
20299+ } sd[AuSrcDst];
1facf9fc 20300+
4a4d8108
AM
20301+#define src_dentry sd[AuSRC].dentry
20302+#define src_dir sd[AuSRC].dir
20303+#define src_inode sd[AuSRC].inode
20304+#define src_h_dentry sd[AuSRC].h_dentry
20305+#define src_parent sd[AuSRC].parent
20306+#define src_h_parent sd[AuSRC].h_parent
20307+#define src_wh_dentry sd[AuSRC].wh_dentry
20308+#define src_hdir sd[AuSRC].hdir
20309+#define src_h_dir sd[AuSRC].hdir->hi_inode
20310+#define src_dt sd[AuSRC].dt
20311+#define src_bstart sd[AuSRC].bstart
1facf9fc 20312+
4a4d8108
AM
20313+#define dst_dentry sd[AuDST].dentry
20314+#define dst_dir sd[AuDST].dir
20315+#define dst_inode sd[AuDST].inode
20316+#define dst_h_dentry sd[AuDST].h_dentry
20317+#define dst_parent sd[AuDST].parent
20318+#define dst_h_parent sd[AuDST].h_parent
20319+#define dst_wh_dentry sd[AuDST].wh_dentry
20320+#define dst_hdir sd[AuDST].hdir
20321+#define dst_h_dir sd[AuDST].hdir->hi_inode
20322+#define dst_dt sd[AuDST].dt
20323+#define dst_bstart sd[AuDST].bstart
20324+
20325+ struct dentry *h_trap;
20326+ struct au_branch *br;
20327+ struct au_hinode *src_hinode;
20328+ struct path h_path;
20329+ struct au_nhash whlist;
027c5e7a 20330+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 20331+
1308ab2a 20332+ unsigned int flags;
1facf9fc 20333+
4a4d8108
AM
20334+ struct au_whtmp_rmdir *thargs;
20335+ struct dentry *h_dst;
20336+};
1308ab2a 20337+
4a4d8108 20338+/* ---------------------------------------------------------------------- */
1308ab2a 20339+
4a4d8108
AM
20340+/*
20341+ * functions for reverting.
20342+ * when an error happened in a single rename systemcall, we should revert
79b8bda9 20343+ * everything as if nothing happened.
4a4d8108
AM
20344+ * we don't need to revert the copied-up/down the parent dir since they are
20345+ * harmless.
20346+ */
1facf9fc 20347+
4a4d8108
AM
20348+#define RevertFailure(fmt, ...) do { \
20349+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20350+ ##__VA_ARGS__, err, rerr); \
20351+ err = -EIO; \
20352+} while (0)
1facf9fc 20353+
4a4d8108 20354+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 20355+{
4a4d8108 20356+ int rerr;
1facf9fc 20357+
4a4d8108
AM
20358+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20359+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
20360+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 20361+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108 20362+ if (rerr)
523b37e3 20363+ RevertFailure("remove diropq %pd", a->src_dentry);
4a4d8108 20364+}
1facf9fc 20365+
4a4d8108
AM
20366+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20367+{
20368+ int rerr;
523b37e3 20369+ struct inode *delegated;
1facf9fc 20370+
b4510431
AM
20371+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20372+ a->src_h_parent);
4a4d8108
AM
20373+ rerr = PTR_ERR(a->h_path.dentry);
20374+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20375+ RevertFailure("lkup one %pd", a->src_dentry);
4a4d8108 20376+ return;
1facf9fc 20377+ }
20378+
523b37e3 20379+ delegated = NULL;
4a4d8108
AM
20380+ rerr = vfsub_rename(a->dst_h_dir,
20381+ au_h_dptr(a->src_dentry, a->btgt),
523b37e3
AM
20382+ a->src_h_dir, &a->h_path, &delegated);
20383+ if (unlikely(rerr == -EWOULDBLOCK)) {
20384+ pr_warn("cannot retry for NFSv4 delegation"
20385+ " for an internal rename\n");
20386+ iput(delegated);
20387+ }
4a4d8108
AM
20388+ d_drop(a->h_path.dentry);
20389+ dput(a->h_path.dentry);
20390+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20391+ if (rerr)
523b37e3 20392+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20393+}
20394+
4a4d8108 20395+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 20396+{
4a4d8108 20397+ int rerr;
523b37e3 20398+ struct inode *delegated;
dece6358 20399+
b4510431
AM
20400+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20401+ a->dst_h_parent);
4a4d8108
AM
20402+ rerr = PTR_ERR(a->h_path.dentry);
20403+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20404+ RevertFailure("lkup one %pd", a->dst_dentry);
4a4d8108
AM
20405+ return;
20406+ }
5527c038 20407+ if (d_is_positive(a->h_path.dentry)) {
4a4d8108
AM
20408+ d_drop(a->h_path.dentry);
20409+ dput(a->h_path.dentry);
20410+ return;
dece6358
AM
20411+ }
20412+
523b37e3
AM
20413+ delegated = NULL;
20414+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20415+ &delegated);
20416+ if (unlikely(rerr == -EWOULDBLOCK)) {
20417+ pr_warn("cannot retry for NFSv4 delegation"
20418+ " for an internal rename\n");
20419+ iput(delegated);
20420+ }
4a4d8108
AM
20421+ d_drop(a->h_path.dentry);
20422+ dput(a->h_path.dentry);
20423+ if (!rerr)
20424+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20425+ else
523b37e3 20426+ RevertFailure("rename %pd", a->h_dst);
4a4d8108 20427+}
1308ab2a 20428+
4a4d8108
AM
20429+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20430+{
20431+ int rerr;
1308ab2a 20432+
4a4d8108
AM
20433+ a->h_path.dentry = a->src_wh_dentry;
20434+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 20435+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108 20436+ if (rerr)
523b37e3 20437+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20438+}
4a4d8108 20439+#undef RevertFailure
1facf9fc 20440+
1308ab2a 20441+/* ---------------------------------------------------------------------- */
20442+
4a4d8108
AM
20443+/*
20444+ * when we have to copyup the renaming entry, do it with the rename-target name
20445+ * in order to minimize the cost (the later actual rename is unnecessary).
20446+ * otherwise rename it on the target branch.
20447+ */
20448+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20449+{
dece6358 20450+ int err;
4a4d8108 20451+ struct dentry *d;
523b37e3 20452+ struct inode *delegated;
1facf9fc 20453+
4a4d8108
AM
20454+ d = a->src_dentry;
20455+ if (au_dbstart(d) == a->btgt) {
20456+ a->h_path.dentry = a->dst_h_dentry;
20457+ if (au_ftest_ren(a->flags, DIROPQ)
20458+ && au_dbdiropq(d) == a->btgt)
20459+ au_fclr_ren(a->flags, DIROPQ);
20460+ AuDebugOn(au_dbstart(d) != a->btgt);
523b37e3 20461+ delegated = NULL;
4a4d8108 20462+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
523b37e3
AM
20463+ a->dst_h_dir, &a->h_path, &delegated);
20464+ if (unlikely(err == -EWOULDBLOCK)) {
20465+ pr_warn("cannot retry for NFSv4 delegation"
20466+ " for an internal rename\n");
20467+ iput(delegated);
20468+ }
c2b27bf2 20469+ } else
86dc4139 20470+ BUG();
1308ab2a 20471+
027c5e7a
AM
20472+ if (!err && a->h_dst)
20473+ /* it will be set to dinfo later */
20474+ dget(a->h_dst);
1facf9fc 20475+
dece6358
AM
20476+ return err;
20477+}
1facf9fc 20478+
4a4d8108
AM
20479+/* cf. aufs_rmdir() */
20480+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 20481+{
4a4d8108
AM
20482+ int err;
20483+ struct inode *dir;
1facf9fc 20484+
4a4d8108
AM
20485+ dir = a->dst_dir;
20486+ SiMustAnyLock(dir->i_sb);
20487+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20488+ au_sbi(dir->i_sb)->si_dirwh)
20489+ || au_test_fs_remote(a->h_dst->d_sb)) {
20490+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20491+ if (unlikely(err))
523b37e3
AM
20492+ pr_warn("failed removing whtmp dir %pd (%d), "
20493+ "ignored.\n", a->h_dst, err);
4a4d8108
AM
20494+ } else {
20495+ au_nhash_wh_free(&a->thargs->whlist);
20496+ a->thargs->whlist = a->whlist;
20497+ a->whlist.nh_num = 0;
20498+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20499+ dput(a->h_dst);
20500+ a->thargs = NULL;
20501+ }
20502+
20503+ return 0;
1308ab2a 20504+}
1facf9fc 20505+
4a4d8108
AM
20506+/* make it 'opaque' dir. */
20507+static int au_ren_diropq(struct au_ren_args *a)
20508+{
20509+ int err;
20510+ struct dentry *diropq;
1facf9fc 20511+
4a4d8108 20512+ err = 0;
027c5e7a 20513+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
20514+ a->src_hinode = au_hi(a->src_inode, a->btgt);
20515+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20516+ diropq = au_diropq_create(a->src_dentry, a->btgt);
20517+ au_hn_imtx_unlock(a->src_hinode);
20518+ if (IS_ERR(diropq))
20519+ err = PTR_ERR(diropq);
076b876e
AM
20520+ else
20521+ dput(diropq);
1facf9fc 20522+
4a4d8108
AM
20523+ return err;
20524+}
1facf9fc 20525+
4a4d8108
AM
20526+static int do_rename(struct au_ren_args *a)
20527+{
20528+ int err;
20529+ struct dentry *d, *h_d;
1facf9fc 20530+
4a4d8108
AM
20531+ /* prepare workqueue args for asynchronous rmdir */
20532+ h_d = a->dst_h_dentry;
5527c038 20533+ if (au_ftest_ren(a->flags, ISDIR) && d_is_positive(h_d)) {
4a4d8108
AM
20534+ err = -ENOMEM;
20535+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
20536+ if (unlikely(!a->thargs))
20537+ goto out;
20538+ a->h_dst = dget(h_d);
20539+ }
1facf9fc 20540+
4a4d8108
AM
20541+ /* create whiteout for src_dentry */
20542+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
20543+ a->src_bwh = au_dbwh(a->src_dentry);
20544+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
20545+ a->src_wh_dentry
20546+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
20547+ err = PTR_ERR(a->src_wh_dentry);
20548+ if (IS_ERR(a->src_wh_dentry))
20549+ goto out_thargs;
20550+ }
1facf9fc 20551+
4a4d8108
AM
20552+ /* lookup whiteout for dentry */
20553+ if (au_ftest_ren(a->flags, WHDST)) {
20554+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
20555+ a->br);
20556+ err = PTR_ERR(h_d);
20557+ if (IS_ERR(h_d))
20558+ goto out_whsrc;
5527c038 20559+ if (d_is_negative(h_d))
4a4d8108
AM
20560+ dput(h_d);
20561+ else
20562+ a->dst_wh_dentry = h_d;
20563+ }
1facf9fc 20564+
4a4d8108
AM
20565+ /* rename dentry to tmpwh */
20566+ if (a->thargs) {
20567+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20568+ if (unlikely(err))
20569+ goto out_whdst;
dece6358 20570+
4a4d8108
AM
20571+ d = a->dst_dentry;
20572+ au_set_h_dptr(d, a->btgt, NULL);
86dc4139 20573+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
4a4d8108
AM
20574+ if (unlikely(err))
20575+ goto out_whtmp;
20576+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20577+ }
1facf9fc 20578+
5527c038 20579+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_bstart != a->btgt);
1facf9fc 20580+
4a4d8108
AM
20581+ /* rename by vfs_rename or cpup */
20582+ d = a->dst_dentry;
20583+ if (au_ftest_ren(a->flags, ISDIR)
20584+ && (a->dst_wh_dentry
20585+ || au_dbdiropq(d) == a->btgt
20586+ /* hide the lower to keep xino */
20587+ || a->btgt < au_dbend(d)
20588+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
20589+ au_fset_ren(a->flags, DIROPQ);
20590+ err = au_ren_or_cpup(a);
20591+ if (unlikely(err))
20592+ /* leave the copied-up one */
20593+ goto out_whtmp;
1308ab2a 20594+
4a4d8108
AM
20595+ /* make dir opaque */
20596+ if (au_ftest_ren(a->flags, DIROPQ)) {
20597+ err = au_ren_diropq(a);
20598+ if (unlikely(err))
20599+ goto out_rename;
20600+ }
1308ab2a 20601+
4a4d8108
AM
20602+ /* update target timestamps */
20603+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
20604+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20605+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
5527c038 20606+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
1facf9fc 20607+
4a4d8108
AM
20608+ /* remove whiteout for dentry */
20609+ if (a->dst_wh_dentry) {
20610+ a->h_path.dentry = a->dst_wh_dentry;
20611+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20612+ a->dst_dentry);
20613+ if (unlikely(err))
20614+ goto out_diropq;
20615+ }
1facf9fc 20616+
4a4d8108
AM
20617+ /* remove whtmp */
20618+ if (a->thargs)
20619+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 20620+
076b876e 20621+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
4a4d8108
AM
20622+ err = 0;
20623+ goto out_success;
20624+
4f0767ce 20625+out_diropq:
4a4d8108
AM
20626+ if (au_ftest_ren(a->flags, DIROPQ))
20627+ au_ren_rev_diropq(err, a);
4f0767ce 20628+out_rename:
7e9cd9fe 20629+ au_ren_rev_rename(err, a);
027c5e7a 20630+ dput(a->h_dst);
4f0767ce 20631+out_whtmp:
4a4d8108
AM
20632+ if (a->thargs)
20633+ au_ren_rev_whtmp(err, a);
4f0767ce 20634+out_whdst:
4a4d8108
AM
20635+ dput(a->dst_wh_dentry);
20636+ a->dst_wh_dentry = NULL;
4f0767ce 20637+out_whsrc:
4a4d8108
AM
20638+ if (a->src_wh_dentry)
20639+ au_ren_rev_whsrc(err, a);
4f0767ce 20640+out_success:
4a4d8108
AM
20641+ dput(a->src_wh_dentry);
20642+ dput(a->dst_wh_dentry);
4f0767ce 20643+out_thargs:
4a4d8108
AM
20644+ if (a->thargs) {
20645+ dput(a->h_dst);
20646+ au_whtmp_rmdir_free(a->thargs);
20647+ a->thargs = NULL;
20648+ }
4f0767ce 20649+out:
4a4d8108 20650+ return err;
dece6358 20651+}
1facf9fc 20652+
1308ab2a 20653+/* ---------------------------------------------------------------------- */
1facf9fc 20654+
4a4d8108
AM
20655+/*
20656+ * test if @dentry dir can be rename destination or not.
20657+ * success means, it is a logically empty dir.
20658+ */
20659+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 20660+{
4a4d8108 20661+ return au_test_empty(dentry, whlist);
1308ab2a 20662+}
1facf9fc 20663+
4a4d8108
AM
20664+/*
20665+ * test if @dentry dir can be rename source or not.
20666+ * if it can, return 0 and @children is filled.
20667+ * success means,
20668+ * - it is a logically empty dir.
20669+ * - or, it exists on writable branch and has no children including whiteouts
20670+ * on the lower branch.
20671+ */
20672+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20673+{
20674+ int err;
20675+ unsigned int rdhash;
20676+ aufs_bindex_t bstart;
1facf9fc 20677+
4a4d8108
AM
20678+ bstart = au_dbstart(dentry);
20679+ if (bstart != btgt) {
20680+ struct au_nhash whlist;
dece6358 20681+
4a4d8108
AM
20682+ SiMustAnyLock(dentry->d_sb);
20683+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20684+ if (!rdhash)
20685+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20686+ dentry));
20687+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20688+ if (unlikely(err))
20689+ goto out;
20690+ err = au_test_empty(dentry, &whlist);
20691+ au_nhash_wh_free(&whlist);
20692+ goto out;
20693+ }
dece6358 20694+
4a4d8108
AM
20695+ if (bstart == au_dbtaildir(dentry))
20696+ return 0; /* success */
dece6358 20697+
4a4d8108 20698+ err = au_test_empty_lower(dentry);
1facf9fc 20699+
4f0767ce 20700+out:
4a4d8108
AM
20701+ if (err == -ENOTEMPTY) {
20702+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20703+ " is not supported\n");
20704+ err = -EXDEV;
20705+ }
20706+ return err;
20707+}
1308ab2a 20708+
4a4d8108
AM
20709+/* side effect: sets whlist and h_dentry */
20710+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 20711+{
4a4d8108
AM
20712+ int err;
20713+ unsigned int rdhash;
20714+ struct dentry *d;
1facf9fc 20715+
4a4d8108
AM
20716+ d = a->dst_dentry;
20717+ SiMustAnyLock(d->d_sb);
1facf9fc 20718+
4a4d8108
AM
20719+ err = 0;
20720+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
20721+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20722+ if (!rdhash)
20723+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20724+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20725+ if (unlikely(err))
20726+ goto out;
1308ab2a 20727+
4a4d8108
AM
20728+ au_set_dbstart(d, a->dst_bstart);
20729+ err = may_rename_dstdir(d, &a->whlist);
20730+ au_set_dbstart(d, a->btgt);
20731+ }
20732+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
20733+ if (unlikely(err))
20734+ goto out;
20735+
20736+ d = a->src_dentry;
20737+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
20738+ if (au_ftest_ren(a->flags, ISDIR)) {
20739+ err = may_rename_srcdir(d, a->btgt);
20740+ if (unlikely(err)) {
20741+ au_nhash_wh_free(&a->whlist);
20742+ a->whlist.nh_num = 0;
20743+ }
20744+ }
4f0767ce 20745+out:
4a4d8108 20746+ return err;
1facf9fc 20747+}
20748+
4a4d8108 20749+/* ---------------------------------------------------------------------- */
1facf9fc 20750+
4a4d8108
AM
20751+/*
20752+ * simple tests for rename.
20753+ * following the checks in vfs, plus the parent-child relationship.
20754+ */
20755+static int au_may_ren(struct au_ren_args *a)
20756+{
20757+ int err, isdir;
20758+ struct inode *h_inode;
1facf9fc 20759+
4a4d8108
AM
20760+ if (a->src_bstart == a->btgt) {
20761+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20762+ au_ftest_ren(a->flags, ISDIR));
20763+ if (unlikely(err))
20764+ goto out;
20765+ err = -EINVAL;
20766+ if (unlikely(a->src_h_dentry == a->h_trap))
20767+ goto out;
20768+ }
1facf9fc 20769+
4a4d8108
AM
20770+ err = 0;
20771+ if (a->dst_bstart != a->btgt)
20772+ goto out;
1facf9fc 20773+
027c5e7a
AM
20774+ err = -ENOTEMPTY;
20775+ if (unlikely(a->dst_h_dentry == a->h_trap))
20776+ goto out;
20777+
4a4d8108 20778+ err = -EIO;
4a4d8108 20779+ isdir = !!au_ftest_ren(a->flags, ISDIR);
5527c038
JR
20780+ if (d_really_is_negative(a->dst_dentry)) {
20781+ if (d_is_negative(a->dst_h_dentry))
20782+ err = au_may_add(a->dst_dentry, a->btgt,
20783+ a->dst_h_parent, isdir);
4a4d8108 20784+ } else {
5527c038 20785+ if (unlikely(d_is_negative(a->dst_h_dentry)))
4a4d8108 20786+ goto out;
5527c038
JR
20787+ h_inode = d_inode(a->dst_h_dentry);
20788+ if (h_inode->i_nlink)
20789+ err = au_may_del(a->dst_dentry, a->btgt,
20790+ a->dst_h_parent, isdir);
4a4d8108 20791+ }
1facf9fc 20792+
4f0767ce 20793+out:
4a4d8108
AM
20794+ if (unlikely(err == -ENOENT || err == -EEXIST))
20795+ err = -EIO;
20796+ AuTraceErr(err);
20797+ return err;
20798+}
1facf9fc 20799+
1308ab2a 20800+/* ---------------------------------------------------------------------- */
1facf9fc 20801+
4a4d8108
AM
20802+/*
20803+ * locking order
20804+ * (VFS)
20805+ * - src_dir and dir by lock_rename()
20806+ * - inode if exitsts
20807+ * (aufs)
20808+ * - lock all
20809+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
20810+ * + si_read_lock
20811+ * + di_write_lock2_child()
20812+ * + di_write_lock_child()
20813+ * + ii_write_lock_child()
20814+ * + di_write_lock_child2()
20815+ * + ii_write_lock_child2()
20816+ * + src_parent and parent
20817+ * + di_write_lock_parent()
20818+ * + ii_write_lock_parent()
20819+ * + di_write_lock_parent2()
20820+ * + ii_write_lock_parent2()
20821+ * + lower src_dir and dir by vfsub_lock_rename()
20822+ * + verify the every relationships between child and parent. if any
20823+ * of them failed, unlock all and return -EBUSY.
20824+ */
20825+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 20826+{
4a4d8108
AM
20827+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
20828+ a->dst_h_parent, a->dst_hdir);
86dc4139
AM
20829+ if (au_ftest_ren(a->flags, MNT_WRITE))
20830+ vfsub_mnt_drop_write(au_br_mnt(a->br));
1308ab2a 20831+}
20832+
4a4d8108 20833+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 20834+{
4a4d8108
AM
20835+ int err;
20836+ unsigned int udba;
1308ab2a 20837+
4a4d8108
AM
20838+ err = 0;
20839+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
20840+ a->src_hdir = au_hi(a->src_dir, a->btgt);
20841+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
20842+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
86dc4139
AM
20843+
20844+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
20845+ if (unlikely(err))
20846+ goto out;
20847+ au_fset_ren(a->flags, MNT_WRITE);
4a4d8108
AM
20848+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
20849+ a->dst_h_parent, a->dst_hdir);
20850+ udba = au_opt_udba(a->src_dentry->d_sb);
5527c038
JR
20851+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
20852+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
4a4d8108
AM
20853+ err = au_busy_or_stale();
20854+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
20855+ err = au_h_verify(a->src_h_dentry, udba,
5527c038 20856+ d_inode(a->src_h_parent), a->src_h_parent,
4a4d8108
AM
20857+ a->br);
20858+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
20859+ err = au_h_verify(a->dst_h_dentry, udba,
5527c038 20860+ d_inode(a->dst_h_parent), a->dst_h_parent,
4a4d8108 20861+ a->br);
86dc4139 20862+ if (!err)
4a4d8108 20863+ goto out; /* success */
4a4d8108
AM
20864+
20865+ err = au_busy_or_stale();
4a4d8108 20866+ au_ren_unlock(a);
86dc4139 20867+
4f0767ce 20868+out:
4a4d8108 20869+ return err;
1facf9fc 20870+}
20871+
20872+/* ---------------------------------------------------------------------- */
20873+
4a4d8108 20874+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 20875+{
4a4d8108 20876+ struct inode *dir;
dece6358 20877+
4a4d8108
AM
20878+ dir = a->dst_dir;
20879+ dir->i_version++;
20880+ if (au_ftest_ren(a->flags, ISDIR)) {
20881+ /* is this updating defined in POSIX? */
20882+ au_cpup_attr_timesizes(a->src_inode);
20883+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 20884+ }
027c5e7a 20885+
b912730e 20886+ au_dir_ts(dir, a->btgt);
dece6358 20887+
4a4d8108
AM
20888+ if (au_ftest_ren(a->flags, ISSAMEDIR))
20889+ return;
dece6358 20890+
4a4d8108
AM
20891+ dir = a->src_dir;
20892+ dir->i_version++;
20893+ if (au_ftest_ren(a->flags, ISDIR))
20894+ au_cpup_attr_nlink(dir, /*force*/1);
b912730e 20895+ au_dir_ts(dir, a->btgt);
1facf9fc 20896+}
20897+
4a4d8108 20898+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 20899+{
4a4d8108
AM
20900+ aufs_bindex_t bend, bindex;
20901+ struct dentry *d, *h_d;
20902+ struct inode *i, *h_i;
20903+ struct super_block *sb;
dece6358 20904+
027c5e7a
AM
20905+ d = a->dst_dentry;
20906+ d_drop(d);
20907+ if (a->h_dst)
20908+ /* already dget-ed by au_ren_or_cpup() */
20909+ au_set_h_dptr(d, a->btgt, a->h_dst);
20910+
20911+ i = a->dst_inode;
20912+ if (i) {
20913+ if (!au_ftest_ren(a->flags, ISDIR))
20914+ vfsub_drop_nlink(i);
20915+ else {
20916+ vfsub_dead_dir(i);
20917+ au_cpup_attr_timesizes(i);
20918+ }
20919+ au_update_dbrange(d, /*do_put_zero*/1);
20920+ } else {
20921+ bend = a->btgt;
20922+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
20923+ au_set_h_dptr(d, bindex, NULL);
20924+ bend = au_dbend(d);
20925+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
20926+ au_set_h_dptr(d, bindex, NULL);
20927+ au_update_dbrange(d, /*do_put_zero*/0);
20928+ }
20929+
4a4d8108
AM
20930+ d = a->src_dentry;
20931+ au_set_dbwh(d, -1);
20932+ bend = au_dbend(d);
20933+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20934+ h_d = au_h_dptr(d, bindex);
20935+ if (h_d)
20936+ au_set_h_dptr(d, bindex, NULL);
20937+ }
20938+ au_set_dbend(d, a->btgt);
20939+
20940+ sb = d->d_sb;
20941+ i = a->src_inode;
20942+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
20943+ return; /* success */
20944+
20945+ bend = au_ibend(i);
20946+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20947+ h_i = au_h_iptr(i, bindex);
20948+ if (h_i) {
20949+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
20950+ /* ignore this error */
20951+ au_set_h_iptr(i, bindex, NULL, 0);
20952+ }
20953+ }
20954+ au_set_ibend(i, a->btgt);
1308ab2a 20955+}
dece6358 20956+
4a4d8108
AM
20957+/* ---------------------------------------------------------------------- */
20958+
20959+/* mainly for link(2) and rename(2) */
20960+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 20961+{
4a4d8108
AM
20962+ aufs_bindex_t bdiropq, bwh;
20963+ struct dentry *parent;
20964+ struct au_branch *br;
20965+
20966+ parent = dentry->d_parent;
5527c038 20967+ IMustLock(d_inode(parent)); /* dir is locked */
4a4d8108
AM
20968+
20969+ bdiropq = au_dbdiropq(parent);
20970+ bwh = au_dbwh(dentry);
20971+ br = au_sbr(dentry->d_sb, btgt);
20972+ if (au_br_rdonly(br)
20973+ || (0 <= bdiropq && bdiropq < btgt)
20974+ || (0 <= bwh && bwh < btgt))
20975+ btgt = -1;
20976+
20977+ AuDbg("btgt %d\n", btgt);
20978+ return btgt;
1facf9fc 20979+}
20980+
4a4d8108
AM
20981+/* sets src_bstart, dst_bstart and btgt */
20982+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 20983+{
4a4d8108
AM
20984+ int err;
20985+ struct au_wr_dir_args wr_dir_args = {
20986+ /* .force_btgt = -1, */
20987+ .flags = AuWrDir_ADD_ENTRY
20988+ };
dece6358 20989+
4a4d8108
AM
20990+ a->src_bstart = au_dbstart(a->src_dentry);
20991+ a->dst_bstart = au_dbstart(a->dst_dentry);
20992+ if (au_ftest_ren(a->flags, ISDIR))
20993+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
20994+ wr_dir_args.force_btgt = a->src_bstart;
20995+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
20996+ wr_dir_args.force_btgt = a->dst_bstart;
20997+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
20998+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
20999+ a->btgt = err;
dece6358 21000+
4a4d8108 21001+ return err;
1facf9fc 21002+}
21003+
4a4d8108 21004+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 21005+{
4a4d8108
AM
21006+ a->h_path.dentry = a->src_h_parent;
21007+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
21008+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
21009+ a->h_path.dentry = a->dst_h_parent;
21010+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
21011+ }
1facf9fc 21012+
4a4d8108
AM
21013+ au_fclr_ren(a->flags, DT_DSTDIR);
21014+ if (!au_ftest_ren(a->flags, ISDIR))
21015+ return;
dece6358 21016+
4a4d8108
AM
21017+ a->h_path.dentry = a->src_h_dentry;
21018+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
5527c038 21019+ if (d_is_positive(a->dst_h_dentry)) {
4a4d8108
AM
21020+ au_fset_ren(a->flags, DT_DSTDIR);
21021+ a->h_path.dentry = a->dst_h_dentry;
21022+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
21023+ }
1308ab2a 21024+}
dece6358 21025+
4a4d8108 21026+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 21027+{
4a4d8108
AM
21028+ struct dentry *h_d;
21029+ struct mutex *h_mtx;
21030+
21031+ au_dtime_revert(a->src_dt + AuPARENT);
21032+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
21033+ au_dtime_revert(a->dst_dt + AuPARENT);
21034+
21035+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
21036+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
5527c038 21037+ h_mtx = &d_inode(h_d)->i_mutex;
4a4d8108
AM
21038+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
21039+ au_dtime_revert(a->src_dt + AuCHILD);
21040+ mutex_unlock(h_mtx);
21041+
21042+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
21043+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
5527c038 21044+ h_mtx = &d_inode(h_d)->i_mutex;
4a4d8108
AM
21045+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
21046+ au_dtime_revert(a->dst_dt + AuCHILD);
21047+ mutex_unlock(h_mtx);
1facf9fc 21048+ }
21049+ }
21050+}
21051+
4a4d8108
AM
21052+/* ---------------------------------------------------------------------- */
21053+
21054+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
21055+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 21056+{
e49829fe 21057+ int err, flags;
4a4d8108
AM
21058+ /* reduce stack space */
21059+ struct au_ren_args *a;
21060+
523b37e3 21061+ AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry);
4a4d8108
AM
21062+ IMustLock(_src_dir);
21063+ IMustLock(_dst_dir);
21064+
21065+ err = -ENOMEM;
21066+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
21067+ a = kzalloc(sizeof(*a), GFP_NOFS);
21068+ if (unlikely(!a))
21069+ goto out;
21070+
21071+ a->src_dir = _src_dir;
21072+ a->src_dentry = _src_dentry;
5527c038
JR
21073+ a->src_inode = NULL;
21074+ if (d_really_is_positive(a->src_dentry))
21075+ a->src_inode = d_inode(a->src_dentry);
4a4d8108
AM
21076+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
21077+ a->dst_dir = _dst_dir;
21078+ a->dst_dentry = _dst_dentry;
5527c038
JR
21079+ a->dst_inode = NULL;
21080+ if (d_really_is_positive(a->dst_dentry))
21081+ a->dst_inode = d_inode(a->dst_dentry);
4a4d8108
AM
21082+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
21083+ if (a->dst_inode) {
21084+ IMustLock(a->dst_inode);
21085+ au_igrab(a->dst_inode);
1facf9fc 21086+ }
1facf9fc 21087+
4a4d8108 21088+ err = -ENOTDIR;
027c5e7a 21089+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
2000de60 21090+ if (d_is_dir(a->src_dentry)) {
4a4d8108 21091+ au_fset_ren(a->flags, ISDIR);
5527c038 21092+ if (unlikely(d_really_is_positive(a->dst_dentry)
2000de60 21093+ && !d_is_dir(a->dst_dentry)))
4a4d8108 21094+ goto out_free;
b95c5147
AM
21095+ flags |= AuLock_DIRS;
21096+ }
21097+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, flags);
e49829fe
JR
21098+ if (unlikely(err))
21099+ goto out_free;
1facf9fc 21100+
027c5e7a
AM
21101+ err = au_d_hashed_positive(a->src_dentry);
21102+ if (unlikely(err))
21103+ goto out_unlock;
21104+ err = -ENOENT;
21105+ if (a->dst_inode) {
21106+ /*
21107+ * If it is a dir, VFS unhash dst_dentry before this
21108+ * function. It means we cannot rely upon d_unhashed().
21109+ */
21110+ if (unlikely(!a->dst_inode->i_nlink))
21111+ goto out_unlock;
21112+ if (!S_ISDIR(a->dst_inode->i_mode)) {
21113+ err = au_d_hashed_positive(a->dst_dentry);
21114+ if (unlikely(err))
21115+ goto out_unlock;
21116+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21117+ goto out_unlock;
21118+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21119+ goto out_unlock;
21120+
7eafdf33
AM
21121+ /*
21122+ * is it possible?
79b8bda9 21123+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
7eafdf33
AM
21124+ * there may exist a problem somewhere else.
21125+ */
21126+ err = -EINVAL;
5527c038 21127+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
7eafdf33
AM
21128+ goto out_unlock;
21129+
4a4d8108
AM
21130+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
21131+ di_write_lock_parent(a->dst_parent);
1facf9fc 21132+
4a4d8108
AM
21133+ /* which branch we process */
21134+ err = au_ren_wbr(a);
21135+ if (unlikely(err < 0))
027c5e7a 21136+ goto out_parent;
4a4d8108 21137+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
86dc4139 21138+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21139+
4a4d8108
AM
21140+ /* are they available to be renamed */
21141+ err = au_ren_may_dir(a);
21142+ if (unlikely(err))
21143+ goto out_children;
1facf9fc 21144+
4a4d8108
AM
21145+ /* prepare the writable parent dir on the same branch */
21146+ if (a->dst_bstart == a->btgt) {
21147+ au_fset_ren(a->flags, WHDST);
21148+ } else {
21149+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21150+ if (unlikely(err))
21151+ goto out_children;
21152+ }
1facf9fc 21153+
4a4d8108
AM
21154+ if (a->src_dir != a->dst_dir) {
21155+ /*
21156+ * this temporary unlock is safe,
21157+ * because both dir->i_mutex are locked.
21158+ */
21159+ di_write_unlock(a->dst_parent);
21160+ di_write_lock_parent(a->src_parent);
21161+ err = au_wr_dir_need_wh(a->src_dentry,
21162+ au_ftest_ren(a->flags, ISDIR),
21163+ &a->btgt);
21164+ di_write_unlock(a->src_parent);
21165+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
21166+ au_fclr_ren(a->flags, ISSAMEDIR);
21167+ } else
21168+ err = au_wr_dir_need_wh(a->src_dentry,
21169+ au_ftest_ren(a->flags, ISDIR),
21170+ &a->btgt);
21171+ if (unlikely(err < 0))
21172+ goto out_children;
21173+ if (err)
21174+ au_fset_ren(a->flags, WHSRC);
1facf9fc 21175+
86dc4139
AM
21176+ /* cpup src */
21177+ if (a->src_bstart != a->btgt) {
86dc4139
AM
21178+ struct au_pin pin;
21179+
21180+ err = au_pin(&pin, a->src_dentry, a->btgt,
21181+ au_opt_udba(a->src_dentry->d_sb),
21182+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 21183+ if (!err) {
c2b27bf2
AM
21184+ struct au_cp_generic cpg = {
21185+ .dentry = a->src_dentry,
21186+ .bdst = a->btgt,
21187+ .bsrc = a->src_bstart,
21188+ .len = -1,
21189+ .pin = &pin,
21190+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21191+ };
367653fa 21192+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
c2b27bf2 21193+ err = au_sio_cpup_simple(&cpg);
367653fa 21194+ au_unpin(&pin);
86dc4139 21195+ }
86dc4139
AM
21196+ if (unlikely(err))
21197+ goto out_children;
21198+ a->src_bstart = a->btgt;
21199+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21200+ au_fset_ren(a->flags, WHSRC);
21201+ }
21202+
4a4d8108
AM
21203+ /* lock them all */
21204+ err = au_ren_lock(a);
21205+ if (unlikely(err))
86dc4139 21206+ /* leave the copied-up one */
4a4d8108 21207+ goto out_children;
1facf9fc 21208+
4a4d8108
AM
21209+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21210+ err = au_may_ren(a);
21211+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21212+ err = -ENAMETOOLONG;
21213+ if (unlikely(err))
21214+ goto out_hdir;
1facf9fc 21215+
4a4d8108
AM
21216+ /* store timestamps to be revertible */
21217+ au_ren_dt(a);
1facf9fc 21218+
4a4d8108
AM
21219+ /* here we go */
21220+ err = do_rename(a);
21221+ if (unlikely(err))
21222+ goto out_dt;
21223+
21224+ /* update dir attributes */
21225+ au_ren_refresh_dir(a);
21226+
21227+ /* dput/iput all lower dentries */
21228+ au_ren_refresh(a);
21229+
21230+ goto out_hdir; /* success */
21231+
4f0767ce 21232+out_dt:
4a4d8108 21233+ au_ren_rev_dt(err, a);
4f0767ce 21234+out_hdir:
4a4d8108 21235+ au_ren_unlock(a);
4f0767ce 21236+out_children:
4a4d8108 21237+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
21238+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
21239+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
21240+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
21241+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 21242+ }
027c5e7a 21243+out_parent:
4a4d8108
AM
21244+ if (!err)
21245+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
21246+ else {
21247+ au_update_dbstart(a->dst_dentry);
21248+ if (!a->dst_inode)
21249+ d_drop(a->dst_dentry);
21250+ }
4a4d8108
AM
21251+ if (au_ftest_ren(a->flags, ISSAMEDIR))
21252+ di_write_unlock(a->dst_parent);
21253+ else
21254+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 21255+out_unlock:
4a4d8108 21256+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 21257+out_free:
4a4d8108
AM
21258+ iput(a->dst_inode);
21259+ if (a->thargs)
21260+ au_whtmp_rmdir_free(a->thargs);
21261+ kfree(a);
4f0767ce 21262+out:
4a4d8108
AM
21263+ AuTraceErr(err);
21264+ return err;
1308ab2a 21265+}
7f207e10
AM
21266diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21267--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
8cdd5066 21268+++ linux/fs/aufs/Kconfig 2016-02-28 11:26:32.569971135 +0100
c1595e42 21269@@ -0,0 +1,185 @@
4a4d8108
AM
21270+config AUFS_FS
21271+ tristate "Aufs (Advanced multi layered unification filesystem) support"
4a4d8108
AM
21272+ help
21273+ Aufs is a stackable unification filesystem such as Unionfs,
21274+ which unifies several directories and provides a merged single
21275+ directory.
21276+ In the early days, aufs was entirely re-designed and
21277+ re-implemented Unionfs Version 1.x series. Introducing many
21278+ original ideas, approaches and improvements, it becomes totally
21279+ different from Unionfs while keeping the basic features.
1facf9fc 21280+
4a4d8108
AM
21281+if AUFS_FS
21282+choice
21283+ prompt "Maximum number of branches"
21284+ default AUFS_BRANCH_MAX_127
21285+ help
21286+ Specifies the maximum number of branches (or member directories)
21287+ in a single aufs. The larger value consumes more system
21288+ resources and has a minor impact to performance.
21289+config AUFS_BRANCH_MAX_127
21290+ bool "127"
21291+ help
21292+ Specifies the maximum number of branches (or member directories)
21293+ in a single aufs. The larger value consumes more system
21294+ resources and has a minor impact to performance.
21295+config AUFS_BRANCH_MAX_511
21296+ bool "511"
21297+ help
21298+ Specifies the maximum number of branches (or member directories)
21299+ in a single aufs. The larger value consumes more system
21300+ resources and has a minor impact to performance.
21301+config AUFS_BRANCH_MAX_1023
21302+ bool "1023"
21303+ help
21304+ Specifies the maximum number of branches (or member directories)
21305+ in a single aufs. The larger value consumes more system
21306+ resources and has a minor impact to performance.
21307+config AUFS_BRANCH_MAX_32767
21308+ bool "32767"
21309+ help
21310+ Specifies the maximum number of branches (or member directories)
21311+ in a single aufs. The larger value consumes more system
21312+ resources and has a minor impact to performance.
21313+endchoice
1facf9fc 21314+
e49829fe
JR
21315+config AUFS_SBILIST
21316+ bool
21317+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21318+ default y
21319+ help
21320+ Automatic configuration for internal use.
21321+ When aufs supports Magic SysRq or /proc, enabled automatically.
21322+
4a4d8108
AM
21323+config AUFS_HNOTIFY
21324+ bool "Detect direct branch access (bypassing aufs)"
21325+ help
21326+ If you want to modify files on branches directly, eg. bypassing aufs,
21327+ and want aufs to detect the changes of them fully, then enable this
21328+ option and use 'udba=notify' mount option.
7f207e10 21329+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
21330+ It will have a negative impact to the performance.
21331+ See detail in aufs.5.
dece6358 21332+
4a4d8108
AM
21333+choice
21334+ prompt "method" if AUFS_HNOTIFY
21335+ default AUFS_HFSNOTIFY
21336+config AUFS_HFSNOTIFY
21337+ bool "fsnotify"
21338+ select FSNOTIFY
4a4d8108 21339+endchoice
1facf9fc 21340+
4a4d8108
AM
21341+config AUFS_EXPORT
21342+ bool "NFS-exportable aufs"
2cbb1c4b 21343+ depends on EXPORTFS
4a4d8108
AM
21344+ help
21345+ If you want to export your mounted aufs via NFS, then enable this
21346+ option. There are several requirements for this configuration.
21347+ See detail in aufs.5.
1facf9fc 21348+
4a4d8108
AM
21349+config AUFS_INO_T_64
21350+ bool
21351+ depends on AUFS_EXPORT
21352+ depends on 64BIT && !(ALPHA || S390)
21353+ default y
21354+ help
21355+ Automatic configuration for internal use.
21356+ /* typedef unsigned long/int __kernel_ino_t */
21357+ /* alpha and s390x are int */
1facf9fc 21358+
c1595e42
JR
21359+config AUFS_XATTR
21360+ bool "support for XATTR/EA (including Security Labels)"
21361+ help
21362+ If your branch fs supports XATTR/EA and you want to make them
21363+ available in aufs too, then enable this opsion and specify the
21364+ branch attributes for EA.
21365+ See detail in aufs.5.
21366+
076b876e
AM
21367+config AUFS_FHSM
21368+ bool "File-based Hierarchical Storage Management"
21369+ help
21370+ Hierarchical Storage Management (or HSM) is a well-known feature
21371+ in the storage world. Aufs provides this feature as file-based.
21372+ with multiple branches.
21373+ These multiple branches are prioritized, ie. the topmost one
21374+ should be the fastest drive and be used heavily.
21375+
4a4d8108
AM
21376+config AUFS_RDU
21377+ bool "Readdir in userspace"
21378+ help
21379+ Aufs has two methods to provide a merged view for a directory,
21380+ by a user-space library and by kernel-space natively. The latter
21381+ is always enabled but sometimes large and slow.
21382+ If you enable this option, install the library in aufs2-util
21383+ package, and set some environment variables for your readdir(3),
21384+ then the work will be handled in user-space which generally
21385+ shows better performance in most cases.
21386+ See detail in aufs.5.
1facf9fc 21387+
4a4d8108
AM
21388+config AUFS_SHWH
21389+ bool "Show whiteouts"
21390+ help
21391+ If you want to make the whiteouts in aufs visible, then enable
21392+ this option and specify 'shwh' mount option. Although it may
21393+ sounds like philosophy or something, but in technically it
21394+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 21395+
4a4d8108
AM
21396+config AUFS_BR_RAMFS
21397+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21398+ help
21399+ If you want to use ramfs as an aufs branch fs, then enable this
21400+ option. Generally tmpfs is recommended.
21401+ Aufs prohibited them to be a branch fs by default, because
21402+ initramfs becomes unusable after switch_root or something
21403+ generally. If you sets initramfs as an aufs branch and boot your
21404+ system by switch_root, you will meet a problem easily since the
21405+ files in initramfs may be inaccessible.
21406+ Unless you are going to use ramfs as an aufs branch fs without
21407+ switch_root or something, leave it N.
1facf9fc 21408+
4a4d8108
AM
21409+config AUFS_BR_FUSE
21410+ bool "Fuse fs as an aufs branch"
21411+ depends on FUSE_FS
21412+ select AUFS_POLL
21413+ help
21414+ If you want to use fuse-based userspace filesystem as an aufs
21415+ branch fs, then enable this option.
21416+ It implements the internal poll(2) operation which is
21417+ implemented by fuse only (curretnly).
1facf9fc 21418+
4a4d8108
AM
21419+config AUFS_POLL
21420+ bool
21421+ help
21422+ Automatic configuration for internal use.
1facf9fc 21423+
4a4d8108
AM
21424+config AUFS_BR_HFSPLUS
21425+ bool "Hfsplus as an aufs branch"
21426+ depends on HFSPLUS_FS
21427+ default y
21428+ help
21429+ If you want to use hfsplus fs as an aufs branch fs, then enable
21430+ this option. This option introduces a small overhead at
21431+ copying-up a file on hfsplus.
1facf9fc 21432+
4a4d8108
AM
21433+config AUFS_BDEV_LOOP
21434+ bool
21435+ depends on BLK_DEV_LOOP
21436+ default y
21437+ help
21438+ Automatic configuration for internal use.
21439+ Convert =[ym] into =y.
1308ab2a 21440+
4a4d8108
AM
21441+config AUFS_DEBUG
21442+ bool "Debug aufs"
21443+ help
21444+ Enable this to compile aufs internal debug code.
21445+ It will have a negative impact to the performance.
21446+
21447+config AUFS_MAGIC_SYSRQ
21448+ bool
21449+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21450+ default y
21451+ help
21452+ Automatic configuration for internal use.
21453+ When aufs supports Magic SysRq, enabled automatically.
21454+endif
7f207e10
AM
21455diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21456--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 21457+++ linux/fs/aufs/loop.c 2016-02-28 11:26:32.573304539 +0100
79b8bda9 21458@@ -0,0 +1,146 @@
1facf9fc 21459+/*
8cdd5066 21460+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21461+ *
21462+ * This program, aufs is free software; you can redistribute it and/or modify
21463+ * it under the terms of the GNU General Public License as published by
21464+ * the Free Software Foundation; either version 2 of the License, or
21465+ * (at your option) any later version.
dece6358
AM
21466+ *
21467+ * This program is distributed in the hope that it will be useful,
21468+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21469+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21470+ * GNU General Public License for more details.
21471+ *
21472+ * You should have received a copy of the GNU General Public License
523b37e3 21473+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21474+ */
21475+
21476+/*
21477+ * support for loopback block device as a branch
21478+ */
21479+
1facf9fc 21480+#include "aufs.h"
21481+
392086de
AM
21482+/* added into drivers/block/loop.c */
21483+static struct file *(*backing_file_func)(struct super_block *sb);
21484+
1facf9fc 21485+/*
21486+ * test if two lower dentries have overlapping branches.
21487+ */
b752ccd1 21488+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 21489+{
b752ccd1 21490+ struct super_block *h_sb;
392086de
AM
21491+ struct file *backing_file;
21492+
21493+ if (unlikely(!backing_file_func)) {
21494+ /* don't load "loop" module here */
21495+ backing_file_func = symbol_get(loop_backing_file);
21496+ if (unlikely(!backing_file_func))
21497+ /* "loop" module is not loaded */
21498+ return 0;
21499+ }
1facf9fc 21500+
b752ccd1 21501+ h_sb = h_adding->d_sb;
392086de
AM
21502+ backing_file = backing_file_func(h_sb);
21503+ if (!backing_file)
1facf9fc 21504+ return 0;
21505+
2000de60 21506+ h_adding = backing_file->f_path.dentry;
b752ccd1
AM
21507+ /*
21508+ * h_adding can be local NFS.
21509+ * in this case aufs cannot detect the loop.
21510+ */
21511+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 21512+ return 1;
b752ccd1 21513+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 21514+}
21515+
21516+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21517+int au_test_loopback_kthread(void)
21518+{
b752ccd1
AM
21519+ int ret;
21520+ struct task_struct *tsk = current;
a2a7ad62 21521+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
21522+
21523+ ret = 0;
21524+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
21525+ get_task_comm(comm, tsk);
21526+ c = comm[4];
b752ccd1 21527+ ret = ('0' <= c && c <= '9'
a2a7ad62 21528+ && !strncmp(comm, "loop", 4));
b752ccd1 21529+ }
1facf9fc 21530+
b752ccd1 21531+ return ret;
1facf9fc 21532+}
87a755f4
AM
21533+
21534+/* ---------------------------------------------------------------------- */
21535+
21536+#define au_warn_loopback_step 16
21537+static int au_warn_loopback_nelem = au_warn_loopback_step;
21538+static unsigned long *au_warn_loopback_array;
21539+
21540+void au_warn_loopback(struct super_block *h_sb)
21541+{
21542+ int i, new_nelem;
21543+ unsigned long *a, magic;
21544+ static DEFINE_SPINLOCK(spin);
21545+
21546+ magic = h_sb->s_magic;
21547+ spin_lock(&spin);
21548+ a = au_warn_loopback_array;
21549+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21550+ if (a[i] == magic) {
21551+ spin_unlock(&spin);
21552+ return;
21553+ }
21554+
21555+ /* h_sb is new to us, print it */
21556+ if (i < au_warn_loopback_nelem) {
21557+ a[i] = magic;
21558+ goto pr;
21559+ }
21560+
21561+ /* expand the array */
21562+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21563+ a = au_kzrealloc(au_warn_loopback_array,
21564+ au_warn_loopback_nelem * sizeof(unsigned long),
21565+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
21566+ if (a) {
21567+ au_warn_loopback_nelem = new_nelem;
21568+ au_warn_loopback_array = a;
21569+ a[i] = magic;
21570+ goto pr;
21571+ }
21572+
21573+ spin_unlock(&spin);
21574+ AuWarn1("realloc failed, ignored\n");
21575+ return;
21576+
21577+pr:
21578+ spin_unlock(&spin);
0c3ec466
AM
21579+ pr_warn("you may want to try another patch for loopback file "
21580+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
21581+}
21582+
21583+int au_loopback_init(void)
21584+{
21585+ int err;
21586+ struct super_block *sb __maybe_unused;
21587+
79b8bda9 21588+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
21589+
21590+ err = 0;
21591+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21592+ sizeof(unsigned long), GFP_NOFS);
21593+ if (unlikely(!au_warn_loopback_array))
21594+ err = -ENOMEM;
21595+
21596+ return err;
21597+}
21598+
21599+void au_loopback_fin(void)
21600+{
79b8bda9
AM
21601+ if (backing_file_func)
21602+ symbol_put(loop_backing_file);
87a755f4
AM
21603+ kfree(au_warn_loopback_array);
21604+}
7f207e10
AM
21605diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21606--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 21607+++ linux/fs/aufs/loop.h 2016-02-28 11:26:32.573304539 +0100
523b37e3 21608@@ -0,0 +1,52 @@
1facf9fc 21609+/*
8cdd5066 21610+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21611+ *
21612+ * This program, aufs is free software; you can redistribute it and/or modify
21613+ * it under the terms of the GNU General Public License as published by
21614+ * the Free Software Foundation; either version 2 of the License, or
21615+ * (at your option) any later version.
dece6358
AM
21616+ *
21617+ * This program is distributed in the hope that it will be useful,
21618+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21619+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21620+ * GNU General Public License for more details.
21621+ *
21622+ * You should have received a copy of the GNU General Public License
523b37e3 21623+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21624+ */
21625+
21626+/*
21627+ * support for loopback mount as a branch
21628+ */
21629+
21630+#ifndef __AUFS_LOOP_H__
21631+#define __AUFS_LOOP_H__
21632+
21633+#ifdef __KERNEL__
21634+
dece6358
AM
21635+struct dentry;
21636+struct super_block;
1facf9fc 21637+
21638+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
21639+/* drivers/block/loop.c */
21640+struct file *loop_backing_file(struct super_block *sb);
21641+
1facf9fc 21642+/* loop.c */
b752ccd1 21643+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 21644+int au_test_loopback_kthread(void);
87a755f4
AM
21645+void au_warn_loopback(struct super_block *h_sb);
21646+
21647+int au_loopback_init(void);
21648+void au_loopback_fin(void);
1facf9fc 21649+#else
4a4d8108 21650+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 21651+ struct dentry *h_adding)
4a4d8108 21652+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
21653+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21654+
21655+AuStubInt0(au_loopback_init, void)
21656+AuStubVoid(au_loopback_fin, void)
1facf9fc 21657+#endif /* BLK_DEV_LOOP */
21658+
21659+#endif /* __KERNEL__ */
21660+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
21661diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21662--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
8cdd5066 21663+++ linux/fs/aufs/magic.mk 2016-02-28 11:26:32.573304539 +0100
7e9cd9fe 21664@@ -0,0 +1,30 @@
1facf9fc 21665+
21666+# defined in ${srctree}/fs/fuse/inode.c
21667+# tristate
21668+ifdef CONFIG_FUSE_FS
21669+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21670+endif
21671+
1facf9fc 21672+# defined in ${srctree}/fs/xfs/xfs_sb.h
21673+# tristate
21674+ifdef CONFIG_XFS_FS
21675+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21676+endif
21677+
21678+# defined in ${srctree}/fs/configfs/mount.c
21679+# tristate
21680+ifdef CONFIG_CONFIGFS_FS
21681+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21682+endif
21683+
1facf9fc 21684+# defined in ${srctree}/fs/ubifs/ubifs.h
21685+# tristate
21686+ifdef CONFIG_UBIFS_FS
21687+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21688+endif
4a4d8108
AM
21689+
21690+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
21691+# tristate
21692+ifdef CONFIG_HFSPLUS_FS
21693+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
21694+endif
7f207e10
AM
21695diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
21696--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
8cdd5066 21697+++ linux/fs/aufs/Makefile 2016-02-28 11:26:32.569971135 +0100
c1595e42 21698@@ -0,0 +1,44 @@
4a4d8108
AM
21699+
21700+include ${src}/magic.mk
21701+ifeq (${CONFIG_AUFS_FS},m)
21702+include ${src}/conf.mk
21703+endif
21704+-include ${src}/priv_def.mk
21705+
21706+# cf. include/linux/kernel.h
21707+# enable pr_debug
21708+ccflags-y += -DDEBUG
f6c5ef8b
AM
21709+# sparse requires the full pathname
21710+ifdef M
523b37e3 21711+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
f6c5ef8b 21712+else
523b37e3 21713+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
f6c5ef8b 21714+endif
4a4d8108
AM
21715+
21716+obj-$(CONFIG_AUFS_FS) += aufs.o
21717+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
21718+ wkq.o vfsub.o dcsub.o \
e49829fe 21719+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
21720+ dinfo.o dentry.o \
21721+ dynop.o \
21722+ finfo.o file.o f_op.o \
21723+ dir.o vdir.o \
21724+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
c2b27bf2 21725+ mvdown.o ioctl.o
4a4d8108
AM
21726+
21727+# all are boolean
e49829fe 21728+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
21729+aufs-$(CONFIG_SYSFS) += sysfs.o
21730+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
21731+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
21732+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
21733+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108 21734+aufs-$(CONFIG_AUFS_EXPORT) += export.o
c1595e42
JR
21735+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
21736+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
076b876e 21737+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
4a4d8108
AM
21738+aufs-$(CONFIG_AUFS_POLL) += poll.o
21739+aufs-$(CONFIG_AUFS_RDU) += rdu.o
4a4d8108
AM
21740+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
21741+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
21742+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
21743diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
21744--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 21745+++ linux/fs/aufs/module.c 2016-02-28 11:26:32.573304539 +0100
b95c5147 21746@@ -0,0 +1,221 @@
1facf9fc 21747+/*
8cdd5066 21748+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21749+ *
21750+ * This program, aufs is free software; you can redistribute it and/or modify
21751+ * it under the terms of the GNU General Public License as published by
21752+ * the Free Software Foundation; either version 2 of the License, or
21753+ * (at your option) any later version.
dece6358
AM
21754+ *
21755+ * This program is distributed in the hope that it will be useful,
21756+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21757+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21758+ * GNU General Public License for more details.
21759+ *
21760+ * You should have received a copy of the GNU General Public License
523b37e3 21761+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21762+ */
21763+
21764+/*
21765+ * module global variables and operations
21766+ */
21767+
21768+#include <linux/module.h>
21769+#include <linux/seq_file.h>
21770+#include "aufs.h"
21771+
21772+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
21773+{
21774+ if (new_sz <= nused)
21775+ return p;
21776+
21777+ p = krealloc(p, new_sz, gfp);
21778+ if (p)
21779+ memset(p + nused, 0, new_sz - nused);
21780+ return p;
21781+}
21782+
21783+/* ---------------------------------------------------------------------- */
21784+
21785+/*
21786+ * aufs caches
21787+ */
21788+struct kmem_cache *au_cachep[AuCache_Last];
21789+static int __init au_cache_init(void)
21790+{
4a4d8108 21791+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 21792+ if (au_cachep[AuCache_DINFO])
027c5e7a 21793+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
21794+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
21795+ au_icntnr_init_once);
1facf9fc 21796+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
21797+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
21798+ au_fi_init_once);
1facf9fc 21799+ if (au_cachep[AuCache_FINFO])
21800+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
21801+ if (au_cachep[AuCache_VDIR])
21802+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
21803+ if (au_cachep[AuCache_DEHSTR])
21804+ return 0;
21805+
21806+ return -ENOMEM;
21807+}
21808+
21809+static void au_cache_fin(void)
21810+{
21811+ int i;
4a4d8108 21812+
537831f9
AM
21813+ /*
21814+ * Make sure all delayed rcu free inodes are flushed before we
21815+ * destroy cache.
21816+ */
21817+ rcu_barrier();
21818+
7eafdf33
AM
21819+ /* excluding AuCache_HNOTIFY */
21820+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
79b8bda9
AM
21821+ for (i = 0; i < AuCache_HNOTIFY; i++) {
21822+ kmem_cache_destroy(au_cachep[i]);
21823+ au_cachep[i] = NULL;
21824+ }
1facf9fc 21825+}
21826+
21827+/* ---------------------------------------------------------------------- */
21828+
21829+int au_dir_roflags;
21830+
e49829fe 21831+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
21832+/*
21833+ * iterate_supers_type() doesn't protect us from
21834+ * remounting (branch management)
21835+ */
e49829fe
JR
21836+struct au_splhead au_sbilist;
21837+#endif
21838+
9dbd164d
AM
21839+struct lock_class_key au_lc_key[AuLcKey_Last];
21840+
1facf9fc 21841+/*
21842+ * functions for module interface.
21843+ */
21844+MODULE_LICENSE("GPL");
21845+/* MODULE_LICENSE("GPL v2"); */
dece6358 21846+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 21847+MODULE_DESCRIPTION(AUFS_NAME
21848+ " -- Advanced multi layered unification filesystem");
21849+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 21850+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 21851+
1facf9fc 21852+/* this module parameter has no meaning when SYSFS is disabled */
21853+int sysaufs_brs = 1;
21854+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
21855+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
21856+
076b876e 21857+/* this module parameter has no meaning when USER_NS is disabled */
8cdd5066 21858+bool au_userns;
076b876e
AM
21859+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
21860+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
21861+
1facf9fc 21862+/* ---------------------------------------------------------------------- */
21863+
21864+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
21865+
21866+int au_seq_path(struct seq_file *seq, struct path *path)
21867+{
79b8bda9
AM
21868+ int err;
21869+
21870+ err = seq_path(seq, path, au_esc_chars);
21871+ if (err > 0)
21872+ err = 0;
21873+ else if (err < 0)
21874+ err = -ENOMEM;
21875+
21876+ return err;
1facf9fc 21877+}
21878+
21879+/* ---------------------------------------------------------------------- */
21880+
21881+static int __init aufs_init(void)
21882+{
21883+ int err, i;
21884+ char *p;
21885+
21886+ p = au_esc_chars;
21887+ for (i = 1; i <= ' '; i++)
21888+ *p++ = i;
21889+ *p++ = '\\';
21890+ *p++ = '\x7f';
21891+ *p = 0;
21892+
21893+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
21894+
b95c5147
AM
21895+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
21896+ for (i = 0; i < AuIop_Last; i++)
21897+ aufs_iop_nogetattr[i].getattr = NULL;
21898+
e49829fe 21899+ au_sbilist_init();
1facf9fc 21900+ sysaufs_brs_init();
21901+ au_debug_init();
4a4d8108 21902+ au_dy_init();
1facf9fc 21903+ err = sysaufs_init();
21904+ if (unlikely(err))
21905+ goto out;
e49829fe 21906+ err = au_procfs_init();
4f0767ce 21907+ if (unlikely(err))
953406b4 21908+ goto out_sysaufs;
e49829fe
JR
21909+ err = au_wkq_init();
21910+ if (unlikely(err))
21911+ goto out_procfs;
87a755f4 21912+ err = au_loopback_init();
1facf9fc 21913+ if (unlikely(err))
21914+ goto out_wkq;
87a755f4
AM
21915+ err = au_hnotify_init();
21916+ if (unlikely(err))
21917+ goto out_loopback;
1facf9fc 21918+ err = au_sysrq_init();
21919+ if (unlikely(err))
21920+ goto out_hin;
21921+ err = au_cache_init();
21922+ if (unlikely(err))
21923+ goto out_sysrq;
076b876e
AM
21924+
21925+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 21926+ err = register_filesystem(&aufs_fs_type);
21927+ if (unlikely(err))
21928+ goto out_cache;
076b876e 21929+
4a4d8108
AM
21930+ /* since we define pr_fmt, call printk directly */
21931+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 21932+ goto out; /* success */
21933+
4f0767ce 21934+out_cache:
1facf9fc 21935+ au_cache_fin();
4f0767ce 21936+out_sysrq:
1facf9fc 21937+ au_sysrq_fin();
4f0767ce 21938+out_hin:
4a4d8108 21939+ au_hnotify_fin();
87a755f4
AM
21940+out_loopback:
21941+ au_loopback_fin();
4f0767ce 21942+out_wkq:
1facf9fc 21943+ au_wkq_fin();
e49829fe
JR
21944+out_procfs:
21945+ au_procfs_fin();
4f0767ce 21946+out_sysaufs:
1facf9fc 21947+ sysaufs_fin();
4a4d8108 21948+ au_dy_fin();
4f0767ce 21949+out:
1facf9fc 21950+ return err;
21951+}
21952+
21953+static void __exit aufs_exit(void)
21954+{
21955+ unregister_filesystem(&aufs_fs_type);
21956+ au_cache_fin();
21957+ au_sysrq_fin();
4a4d8108 21958+ au_hnotify_fin();
87a755f4 21959+ au_loopback_fin();
1facf9fc 21960+ au_wkq_fin();
e49829fe 21961+ au_procfs_fin();
1facf9fc 21962+ sysaufs_fin();
4a4d8108 21963+ au_dy_fin();
1facf9fc 21964+}
21965+
21966+module_init(aufs_init);
21967+module_exit(aufs_exit);
7f207e10
AM
21968diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
21969--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
21970+++ linux/fs/aufs/module.h 2016-02-28 11:26:32.573304539 +0100
21971@@ -0,0 +1,105 @@
1facf9fc 21972+/*
8cdd5066 21973+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21974+ *
21975+ * This program, aufs is free software; you can redistribute it and/or modify
21976+ * it under the terms of the GNU General Public License as published by
21977+ * the Free Software Foundation; either version 2 of the License, or
21978+ * (at your option) any later version.
dece6358
AM
21979+ *
21980+ * This program is distributed in the hope that it will be useful,
21981+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21982+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21983+ * GNU General Public License for more details.
21984+ *
21985+ * You should have received a copy of the GNU General Public License
523b37e3 21986+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21987+ */
21988+
21989+/*
21990+ * module initialization and module-global
21991+ */
21992+
21993+#ifndef __AUFS_MODULE_H__
21994+#define __AUFS_MODULE_H__
21995+
21996+#ifdef __KERNEL__
21997+
21998+#include <linux/slab.h>
21999+
dece6358
AM
22000+struct path;
22001+struct seq_file;
22002+
1facf9fc 22003+/* module parameters */
1facf9fc 22004+extern int sysaufs_brs;
8cdd5066 22005+extern bool au_userns;
1facf9fc 22006+
22007+/* ---------------------------------------------------------------------- */
22008+
22009+extern int au_dir_roflags;
22010+
9dbd164d
AM
22011+enum {
22012+ AuLcNonDir_FIINFO,
22013+ AuLcNonDir_DIINFO,
22014+ AuLcNonDir_IIINFO,
22015+
22016+ AuLcDir_FIINFO,
22017+ AuLcDir_DIINFO,
22018+ AuLcDir_IIINFO,
22019+
22020+ AuLcSymlink_DIINFO,
22021+ AuLcSymlink_IIINFO,
22022+
22023+ AuLcKey_Last
22024+};
22025+extern struct lock_class_key au_lc_key[AuLcKey_Last];
22026+
1facf9fc 22027+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
22028+int au_seq_path(struct seq_file *seq, struct path *path);
22029+
e49829fe
JR
22030+#ifdef CONFIG_PROC_FS
22031+/* procfs.c */
22032+int __init au_procfs_init(void);
22033+void au_procfs_fin(void);
22034+#else
22035+AuStubInt0(au_procfs_init, void);
22036+AuStubVoid(au_procfs_fin, void);
22037+#endif
22038+
4f0767ce
JR
22039+/* ---------------------------------------------------------------------- */
22040+
22041+/* kmem cache */
1facf9fc 22042+enum {
22043+ AuCache_DINFO,
22044+ AuCache_ICNTNR,
22045+ AuCache_FINFO,
22046+ AuCache_VDIR,
22047+ AuCache_DEHSTR,
7eafdf33 22048+ AuCache_HNOTIFY, /* must be last */
1facf9fc 22049+ AuCache_Last
22050+};
22051+
4a4d8108
AM
22052+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
22053+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
22054+#define AuCacheCtor(type, ctor) \
22055+ kmem_cache_create(#type, sizeof(struct type), \
22056+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 22057+
22058+extern struct kmem_cache *au_cachep[];
22059+
22060+#define AuCacheFuncs(name, index) \
4a4d8108 22061+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 22062+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 22063+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 22064+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
22065+
22066+AuCacheFuncs(dinfo, DINFO);
22067+AuCacheFuncs(icntnr, ICNTNR);
22068+AuCacheFuncs(finfo, FINFO);
22069+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
22070+AuCacheFuncs(vdir_dehstr, DEHSTR);
22071+#ifdef CONFIG_AUFS_HNOTIFY
22072+AuCacheFuncs(hnotify, HNOTIFY);
22073+#endif
1facf9fc 22074+
4a4d8108
AM
22075+#endif /* __KERNEL__ */
22076+#endif /* __AUFS_MODULE_H__ */
c2b27bf2
AM
22077diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
22078--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 22079+++ linux/fs/aufs/mvdown.c 2016-02-28 11:26:32.573304539 +0100
79b8bda9 22080@@ -0,0 +1,703 @@
c2b27bf2 22081+/*
8cdd5066 22082+ * Copyright (C) 2011-2016 Junjiro R. Okajima
c2b27bf2
AM
22083+ *
22084+ * This program, aufs is free software; you can redistribute it and/or modify
22085+ * it under the terms of the GNU General Public License as published by
22086+ * the Free Software Foundation; either version 2 of the License, or
22087+ * (at your option) any later version.
22088+ *
22089+ * This program is distributed in the hope that it will be useful,
22090+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22091+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22092+ * GNU General Public License for more details.
22093+ *
22094+ * You should have received a copy of the GNU General Public License
523b37e3
AM
22095+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22096+ */
22097+
22098+/*
22099+ * move-down, opposite of copy-up
c2b27bf2
AM
22100+ */
22101+
22102+#include "aufs.h"
22103+
c2b27bf2
AM
22104+struct au_mvd_args {
22105+ struct {
c2b27bf2
AM
22106+ struct super_block *h_sb;
22107+ struct dentry *h_parent;
22108+ struct au_hinode *hdir;
392086de 22109+ struct inode *h_dir, *h_inode;
c1595e42 22110+ struct au_pin pin;
c2b27bf2
AM
22111+ } info[AUFS_MVDOWN_NARRAY];
22112+
22113+ struct aufs_mvdown mvdown;
22114+ struct dentry *dentry, *parent;
22115+ struct inode *inode, *dir;
22116+ struct super_block *sb;
22117+ aufs_bindex_t bopq, bwh, bfound;
22118+ unsigned char rename_lock;
c2b27bf2
AM
22119+};
22120+
392086de 22121+#define mvd_errno mvdown.au_errno
076b876e
AM
22122+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
22123+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
22124+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
22125+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 22126+
392086de
AM
22127+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
22128+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
22129+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
22130+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
22131+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 22132+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
22133+
22134+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22135+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22136+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22137+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22138+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 22139+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
22140+
22141+#define AU_MVD_PR(flag, ...) do { \
22142+ if (flag) \
22143+ pr_err(__VA_ARGS__); \
22144+ } while (0)
22145+
076b876e
AM
22146+static int find_lower_writable(struct au_mvd_args *a)
22147+{
22148+ struct super_block *sb;
22149+ aufs_bindex_t bindex, bend;
22150+ struct au_branch *br;
22151+
22152+ sb = a->sb;
22153+ bindex = a->mvd_bsrc;
22154+ bend = au_sbend(sb);
22155+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
22156+ for (bindex++; bindex <= bend; bindex++) {
22157+ br = au_sbr(sb, bindex);
22158+ if (au_br_fhsm(br->br_perm)
22159+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22160+ return bindex;
22161+ }
22162+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
22163+ for (bindex++; bindex <= bend; bindex++) {
22164+ br = au_sbr(sb, bindex);
22165+ if (!au_br_rdonly(br))
22166+ return bindex;
22167+ }
22168+ else
22169+ for (bindex++; bindex <= bend; bindex++) {
22170+ br = au_sbr(sb, bindex);
22171+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22172+ if (au_br_rdonly(br))
22173+ a->mvdown.flags
22174+ |= AUFS_MVDOWN_ROLOWER_R;
22175+ return bindex;
22176+ }
22177+ }
22178+
22179+ return -1;
22180+}
22181+
c2b27bf2 22182+/* make the parent dir on bdst */
392086de 22183+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22184+{
22185+ int err;
22186+
22187+ err = 0;
22188+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22189+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22190+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22191+ a->mvd_h_dst_parent = NULL;
22192+ if (au_dbend(a->parent) >= a->mvd_bdst)
22193+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22194+ if (!a->mvd_h_dst_parent) {
22195+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22196+ if (unlikely(err)) {
392086de 22197+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
22198+ goto out;
22199+ }
22200+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22201+ }
22202+
22203+out:
22204+ AuTraceErr(err);
22205+ return err;
22206+}
22207+
22208+/* lock them all */
392086de 22209+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22210+{
22211+ int err;
22212+ struct dentry *h_trap;
22213+
22214+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22215+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
22216+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22217+ au_opt_udba(a->sb),
22218+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22219+ AuTraceErr(err);
22220+ if (unlikely(err)) {
22221+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22222+ goto out;
22223+ }
22224+
c2b27bf2
AM
22225+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22226+ a->rename_lock = 0;
c1595e42
JR
22227+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22228+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22229+ au_opt_udba(a->sb),
22230+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22231+ err = au_do_pin(&a->mvd_pin_src);
22232+ AuTraceErr(err);
5527c038 22233+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22234+ if (unlikely(err)) {
22235+ AU_MVD_PR(dmsg, "pin_src failed\n");
22236+ goto out_dst;
22237+ }
22238+ goto out; /* success */
c2b27bf2
AM
22239+ }
22240+
c2b27bf2 22241+ a->rename_lock = 1;
c1595e42
JR
22242+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22243+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22244+ au_opt_udba(a->sb),
22245+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22246+ AuTraceErr(err);
5527c038 22247+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
c1595e42
JR
22248+ if (unlikely(err)) {
22249+ AU_MVD_PR(dmsg, "pin_src failed\n");
22250+ au_pin_hdir_lock(&a->mvd_pin_dst);
22251+ goto out_dst;
22252+ }
22253+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
22254+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22255+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22256+ if (h_trap) {
22257+ err = (h_trap != a->mvd_h_src_parent);
22258+ if (err)
22259+ err = (h_trap != a->mvd_h_dst_parent);
22260+ }
22261+ BUG_ON(err); /* it should never happen */
c1595e42
JR
22262+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22263+ err = -EBUSY;
22264+ AuTraceErr(err);
22265+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22266+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22267+ au_pin_hdir_lock(&a->mvd_pin_src);
22268+ au_unpin(&a->mvd_pin_src);
22269+ au_pin_hdir_lock(&a->mvd_pin_dst);
22270+ goto out_dst;
22271+ }
22272+ goto out; /* success */
c2b27bf2 22273+
c1595e42
JR
22274+out_dst:
22275+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22276+out:
22277+ AuTraceErr(err);
22278+ return err;
22279+}
22280+
392086de 22281+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 22282+{
c1595e42
JR
22283+ if (!a->rename_lock)
22284+ au_unpin(&a->mvd_pin_src);
22285+ else {
c2b27bf2
AM
22286+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22287+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
22288+ au_pin_hdir_lock(&a->mvd_pin_src);
22289+ au_unpin(&a->mvd_pin_src);
22290+ au_pin_hdir_lock(&a->mvd_pin_dst);
22291+ }
22292+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22293+}
22294+
22295+/* copy-down the file */
392086de 22296+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22297+{
22298+ int err;
22299+ struct au_cp_generic cpg = {
22300+ .dentry = a->dentry,
22301+ .bdst = a->mvd_bdst,
22302+ .bsrc = a->mvd_bsrc,
22303+ .len = -1,
c1595e42 22304+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
22305+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22306+ };
22307+
22308+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
22309+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22310+ au_fset_cpup(cpg.flags, OVERWRITE);
22311+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22312+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
22313+ err = au_sio_cpdown_simple(&cpg);
22314+ if (unlikely(err))
392086de 22315+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
22316+
22317+ AuTraceErr(err);
22318+ return err;
22319+}
22320+
22321+/*
22322+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22323+ * were sleeping
22324+ */
392086de 22325+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22326+{
22327+ int err;
22328+ struct path h_path;
22329+ struct au_branch *br;
523b37e3 22330+ struct inode *delegated;
c2b27bf2
AM
22331+
22332+ br = au_sbr(a->sb, a->mvd_bdst);
22333+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22334+ err = PTR_ERR(h_path.dentry);
22335+ if (IS_ERR(h_path.dentry)) {
392086de 22336+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
22337+ goto out;
22338+ }
22339+
22340+ err = 0;
5527c038 22341+ if (d_is_positive(h_path.dentry)) {
c2b27bf2 22342+ h_path.mnt = au_br_mnt(br);
523b37e3 22343+ delegated = NULL;
5527c038 22344+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
523b37e3
AM
22345+ &delegated, /*force*/0);
22346+ if (unlikely(err == -EWOULDBLOCK)) {
22347+ pr_warn("cannot retry for NFSv4 delegation"
22348+ " for an internal unlink\n");
22349+ iput(delegated);
22350+ }
c2b27bf2 22351+ if (unlikely(err))
392086de 22352+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
22353+ }
22354+ dput(h_path.dentry);
22355+
22356+out:
22357+ AuTraceErr(err);
22358+ return err;
22359+}
22360+
22361+/*
22362+ * unlink the topmost h_dentry
c2b27bf2 22363+ */
392086de 22364+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22365+{
22366+ int err;
22367+ struct path h_path;
523b37e3 22368+ struct inode *delegated;
c2b27bf2
AM
22369+
22370+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22371+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
22372+ delegated = NULL;
22373+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22374+ if (unlikely(err == -EWOULDBLOCK)) {
22375+ pr_warn("cannot retry for NFSv4 delegation"
22376+ " for an internal unlink\n");
22377+ iput(delegated);
22378+ }
c2b27bf2 22379+ if (unlikely(err))
392086de 22380+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
22381+
22382+ AuTraceErr(err);
22383+ return err;
22384+}
22385+
076b876e
AM
22386+/* Since mvdown succeeded, we ignore an error of this function */
22387+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22388+{
22389+ int err;
22390+ struct au_branch *br;
22391+
22392+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22393+ br = au_sbr(a->sb, a->mvd_bsrc);
22394+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22395+ if (!err) {
22396+ br = au_sbr(a->sb, a->mvd_bdst);
22397+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22398+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22399+ }
22400+ if (!err)
22401+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22402+ else
22403+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22404+}
22405+
c2b27bf2
AM
22406+/*
22407+ * copy-down the file and unlink the bsrc file.
22408+ * - unlink the bdst whout if exist
22409+ * - copy-down the file (with whtmp name and rename)
22410+ * - unlink the bsrc file
22411+ */
392086de 22412+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22413+{
22414+ int err;
22415+
392086de 22416+ err = au_do_mkdir(dmsg, a);
c2b27bf2 22417+ if (!err)
392086de 22418+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
22419+ if (unlikely(err))
22420+ goto out;
22421+
22422+ /*
22423+ * do not revert the activities we made on bdst since they should be
22424+ * harmless in aufs.
22425+ */
22426+
392086de 22427+ err = au_do_cpdown(dmsg, a);
c2b27bf2 22428+ if (!err)
392086de
AM
22429+ err = au_do_unlink_wh(dmsg, a);
22430+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22431+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
22432+ if (unlikely(err))
22433+ goto out_unlock;
22434+
c1595e42
JR
22435+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22436+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
22437+ if (find_lower_writable(a) < 0)
22438+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22439+
22440+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22441+ au_do_stfs(dmsg, a);
22442+
c2b27bf2 22443+ /* maintain internal array */
392086de
AM
22444+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22445+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
22446+ au_set_dbstart(a->dentry, a->mvd_bdst);
22447+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
22448+ au_set_ibstart(a->inode, a->mvd_bdst);
79b8bda9
AM
22449+ } else {
22450+ /* hide the lower */
22451+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
22452+ au_set_dbend(a->dentry, a->mvd_bsrc);
22453+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
22454+ au_set_ibend(a->inode, a->mvd_bsrc);
392086de 22455+ }
c2b27bf2
AM
22456+ if (au_dbend(a->dentry) < a->mvd_bdst)
22457+ au_set_dbend(a->dentry, a->mvd_bdst);
c2b27bf2
AM
22458+ if (au_ibend(a->inode) < a->mvd_bdst)
22459+ au_set_ibend(a->inode, a->mvd_bdst);
22460+
22461+out_unlock:
392086de 22462+ au_do_unlock(dmsg, a);
c2b27bf2
AM
22463+out:
22464+ AuTraceErr(err);
22465+ return err;
22466+}
22467+
22468+/* ---------------------------------------------------------------------- */
22469+
c2b27bf2 22470+/* make sure the file is idle */
392086de 22471+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22472+{
22473+ int err, plinked;
c2b27bf2
AM
22474+
22475+ err = 0;
c2b27bf2
AM
22476+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
22477+ if (au_dbstart(a->dentry) == a->mvd_bsrc
c1595e42 22478+ && au_dcount(a->dentry) == 1
c2b27bf2 22479+ && atomic_read(&a->inode->i_count) == 1
392086de 22480+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
22481+ && (!plinked || !au_plink_test(a->inode))
22482+ && a->inode->i_nlink == 1)
22483+ goto out;
22484+
22485+ err = -EBUSY;
392086de 22486+ AU_MVD_PR(dmsg,
c1595e42
JR
22487+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
22488+ a->mvd_bsrc, au_dbstart(a->dentry), au_dcount(a->dentry),
c2b27bf2 22489+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 22490+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
22491+ plinked, plinked ? au_plink_test(a->inode) : 0);
22492+
22493+out:
22494+ AuTraceErr(err);
22495+ return err;
22496+}
22497+
22498+/* make sure the parent dir is fine */
392086de 22499+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
22500+ struct au_mvd_args *a)
22501+{
22502+ int err;
22503+ aufs_bindex_t bindex;
22504+
22505+ err = 0;
22506+ if (unlikely(au_alive_dir(a->parent))) {
22507+ err = -ENOENT;
392086de 22508+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
22509+ goto out;
22510+ }
22511+
22512+ a->bopq = au_dbdiropq(a->parent);
22513+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22514+ AuDbg("b%d\n", bindex);
22515+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22516+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22517+ err = -EINVAL;
392086de
AM
22518+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22519+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
22520+ a->bopq, a->mvd_bdst);
22521+ }
22522+
22523+out:
22524+ AuTraceErr(err);
22525+ return err;
22526+}
22527+
392086de 22528+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
22529+ struct au_mvd_args *a)
22530+{
22531+ int err;
22532+ struct au_dinfo *dinfo, *tmp;
22533+
22534+ /* lookup the next lower positive entry */
22535+ err = -ENOMEM;
22536+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
22537+ if (unlikely(!tmp))
22538+ goto out;
22539+
22540+ a->bfound = -1;
22541+ a->bwh = -1;
22542+ dinfo = au_di(a->dentry);
22543+ au_di_cp(tmp, dinfo);
22544+ au_di_swap(tmp, dinfo);
22545+
22546+ /* returns the number of positive dentries */
22547+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1, /*type*/0);
22548+ if (!err)
22549+ a->bwh = au_dbwh(a->dentry);
22550+ else if (err > 0)
22551+ a->bfound = au_dbstart(a->dentry);
22552+
22553+ au_di_swap(tmp, dinfo);
22554+ au_rw_write_unlock(&tmp->di_rwsem);
22555+ au_di_free(tmp);
22556+ if (unlikely(err < 0))
392086de 22557+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
22558+
22559+ /*
22560+ * here, we have these cases.
22561+ * bfound == -1
22562+ * no positive dentry under bsrc. there are more sub-cases.
22563+ * bwh < 0
22564+ * there no whiteout, we can safely move-down.
22565+ * bwh <= bsrc
22566+ * impossible
22567+ * bsrc < bwh && bwh < bdst
22568+ * there is a whiteout on RO branch. cannot proceed.
22569+ * bwh == bdst
22570+ * there is a whiteout on the RW target branch. it should
22571+ * be removed.
22572+ * bdst < bwh
22573+ * there is a whiteout somewhere unrelated branch.
22574+ * -1 < bfound && bfound <= bsrc
22575+ * impossible.
22576+ * bfound < bdst
22577+ * found, but it is on RO branch between bsrc and bdst. cannot
22578+ * proceed.
22579+ * bfound == bdst
22580+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
22581+ * error.
22582+ * bdst < bfound
22583+ * found, after we create the file on bdst, it will be hidden.
22584+ */
22585+
22586+ AuDebugOn(a->bfound == -1
22587+ && a->bwh != -1
22588+ && a->bwh <= a->mvd_bsrc);
22589+ AuDebugOn(-1 < a->bfound
22590+ && a->bfound <= a->mvd_bsrc);
22591+
22592+ err = -EINVAL;
22593+ if (a->bfound == -1
22594+ && a->mvd_bsrc < a->bwh
22595+ && a->bwh != -1
22596+ && a->bwh < a->mvd_bdst) {
392086de
AM
22597+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
22598+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
22599+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
22600+ goto out;
22601+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
22602+ a->mvd_errno = EAU_MVDOWN_UPPER;
22603+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
22604+ a->mvd_bdst, a->bfound);
22605+ goto out;
22606+ }
22607+
22608+ err = 0; /* success */
22609+
22610+out:
22611+ AuTraceErr(err);
22612+ return err;
22613+}
22614+
392086de 22615+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22616+{
22617+ int err;
22618+
392086de
AM
22619+ err = 0;
22620+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22621+ && a->bfound == a->mvd_bdst)
22622+ err = -EEXIST;
c2b27bf2
AM
22623+ AuTraceErr(err);
22624+ return err;
22625+}
22626+
392086de 22627+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22628+{
22629+ int err;
22630+ struct au_branch *br;
22631+
22632+ err = -EISDIR;
22633+ if (unlikely(S_ISDIR(a->inode->i_mode)))
22634+ goto out;
22635+
22636+ err = -EINVAL;
392086de
AM
22637+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
22638+ a->mvd_bsrc = au_ibstart(a->inode);
22639+ else {
22640+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
22641+ if (unlikely(a->mvd_bsrc < 0
22642+ || (a->mvd_bsrc < au_dbstart(a->dentry)
22643+ || au_dbend(a->dentry) < a->mvd_bsrc
22644+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
22645+ || (a->mvd_bsrc < au_ibstart(a->inode)
22646+ || au_ibend(a->inode) < a->mvd_bsrc
22647+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
22648+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
22649+ AU_MVD_PR(dmsg, "no upper\n");
22650+ goto out;
22651+ }
22652+ }
c2b27bf2 22653+ if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) {
392086de
AM
22654+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22655+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
22656+ goto out;
22657+ }
392086de 22658+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
22659+ br = au_sbr(a->sb, a->mvd_bsrc);
22660+ err = au_br_rdonly(br);
392086de
AM
22661+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
22662+ if (unlikely(err))
22663+ goto out;
22664+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
22665+ || IS_APPEND(a->mvd_h_src_inode))) {
22666+ if (err)
22667+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
22668+ /* go on */
22669+ } else
c2b27bf2
AM
22670+ goto out;
22671+
22672+ err = -EINVAL;
392086de
AM
22673+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
22674+ a->mvd_bdst = find_lower_writable(a);
22675+ if (unlikely(a->mvd_bdst < 0)) {
22676+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22677+ AU_MVD_PR(dmsg, "no writable lower branch\n");
22678+ goto out;
22679+ }
22680+ } else {
22681+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
22682+ if (unlikely(a->mvd_bdst < 0
22683+ || au_sbend(a->sb) < a->mvd_bdst)) {
22684+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
22685+ AU_MVD_PR(dmsg, "no lower brid\n");
22686+ goto out;
22687+ }
c2b27bf2
AM
22688+ }
22689+
392086de 22690+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 22691+ if (!err)
392086de 22692+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 22693+ if (!err)
392086de 22694+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 22695+ if (!err)
392086de 22696+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
22697+ if (!err)
22698+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
22699+
22700+out:
22701+ AuTraceErr(err);
22702+ return err;
22703+}
22704+
22705+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
22706+{
392086de
AM
22707+ int err, e;
22708+ unsigned char dmsg;
22709+ struct au_mvd_args *args;
79b8bda9 22710+ struct inode *inode;
c2b27bf2 22711+
79b8bda9 22712+ inode = d_inode(dentry);
c2b27bf2
AM
22713+ err = -EPERM;
22714+ if (unlikely(!capable(CAP_SYS_ADMIN)))
22715+ goto out;
22716+
392086de
AM
22717+ err = -ENOMEM;
22718+ args = kmalloc(sizeof(*args), GFP_NOFS);
22719+ if (unlikely(!args))
22720+ goto out;
22721+
22722+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
22723+ if (!err)
22724+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
22725+ if (unlikely(err)) {
22726+ err = -EFAULT;
392086de
AM
22727+ AuTraceErr(err);
22728+ goto out_free;
c2b27bf2 22729+ }
392086de
AM
22730+ AuDbg("flags 0x%x\n", args->mvdown.flags);
22731+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
22732+ args->mvdown.au_errno = 0;
22733+ args->dentry = dentry;
79b8bda9 22734+ args->inode = inode;
392086de 22735+ args->sb = dentry->d_sb;
c2b27bf2 22736+
392086de
AM
22737+ err = -ENOENT;
22738+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
22739+ args->parent = dget_parent(dentry);
5527c038 22740+ args->dir = d_inode(args->parent);
392086de
AM
22741+ mutex_lock_nested(&args->dir->i_mutex, I_MUTEX_PARENT);
22742+ dput(args->parent);
22743+ if (unlikely(args->parent != dentry->d_parent)) {
22744+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
22745+ goto out_dir;
22746+ }
22747+
79b8bda9 22748+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
b95c5147 22749+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
22750+ if (unlikely(err))
22751+ goto out_inode;
22752+
392086de
AM
22753+ di_write_lock_parent(args->parent);
22754+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
22755+ if (unlikely(err))
22756+ goto out_parent;
22757+
392086de 22758+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
22759+ if (unlikely(err))
22760+ goto out_parent;
c2b27bf2 22761+
392086de 22762+ au_cpup_attr_timesizes(args->dir);
79b8bda9
AM
22763+ au_cpup_attr_timesizes(inode);
22764+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
22765+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
22766+ /* au_digen_dec(dentry); */
22767+
22768+out_parent:
392086de 22769+ di_write_unlock(args->parent);
c2b27bf2
AM
22770+ aufs_read_unlock(dentry, AuLock_DW);
22771+out_inode:
79b8bda9 22772+ mutex_unlock(&inode->i_mutex);
c2b27bf2 22773+out_dir:
392086de
AM
22774+ mutex_unlock(&args->dir->i_mutex);
22775+out_free:
22776+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
22777+ if (unlikely(e))
22778+ err = -EFAULT;
22779+ kfree(args);
c2b27bf2
AM
22780+out:
22781+ AuTraceErr(err);
22782+ return err;
22783+}
22784diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
22785--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 22786+++ linux/fs/aufs/opts.c 2016-02-28 11:26:32.573304539 +0100
79b8bda9 22787@@ -0,0 +1,1859 @@
1facf9fc 22788+/*
8cdd5066 22789+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 22790+ *
22791+ * This program, aufs is free software; you can redistribute it and/or modify
22792+ * it under the terms of the GNU General Public License as published by
22793+ * the Free Software Foundation; either version 2 of the License, or
22794+ * (at your option) any later version.
dece6358
AM
22795+ *
22796+ * This program is distributed in the hope that it will be useful,
22797+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22798+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22799+ * GNU General Public License for more details.
22800+ *
22801+ * You should have received a copy of the GNU General Public License
523b37e3 22802+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22803+ */
22804+
22805+/*
22806+ * mount options/flags
22807+ */
22808+
dece6358 22809+#include <linux/namei.h>
1facf9fc 22810+#include <linux/types.h> /* a distribution requires */
22811+#include <linux/parser.h>
22812+#include "aufs.h"
22813+
22814+/* ---------------------------------------------------------------------- */
22815+
22816+enum {
22817+ Opt_br,
7e9cd9fe
AM
22818+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
22819+ Opt_idel, Opt_imod,
22820+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 22821+ Opt_rdblk_def, Opt_rdhash_def,
7e9cd9fe 22822+ Opt_xino, Opt_noxino,
1facf9fc 22823+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
22824+ Opt_trunc_xino_path, Opt_itrunc_xino,
22825+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 22826+ Opt_shwh, Opt_noshwh,
1facf9fc 22827+ Opt_plink, Opt_noplink, Opt_list_plink,
22828+ Opt_udba,
4a4d8108 22829+ Opt_dio, Opt_nodio,
1facf9fc 22830+ Opt_diropq_a, Opt_diropq_w,
22831+ Opt_warn_perm, Opt_nowarn_perm,
22832+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 22833+ Opt_fhsm_sec,
1facf9fc 22834+ Opt_verbose, Opt_noverbose,
22835+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 22836+ Opt_dirperm1, Opt_nodirperm1,
c1595e42 22837+ Opt_acl, Opt_noacl,
1facf9fc 22838+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
22839+};
22840+
22841+static match_table_t options = {
22842+ {Opt_br, "br=%s"},
22843+ {Opt_br, "br:%s"},
22844+
22845+ {Opt_add, "add=%d:%s"},
22846+ {Opt_add, "add:%d:%s"},
22847+ {Opt_add, "ins=%d:%s"},
22848+ {Opt_add, "ins:%d:%s"},
22849+ {Opt_append, "append=%s"},
22850+ {Opt_append, "append:%s"},
22851+ {Opt_prepend, "prepend=%s"},
22852+ {Opt_prepend, "prepend:%s"},
22853+
22854+ {Opt_del, "del=%s"},
22855+ {Opt_del, "del:%s"},
22856+ /* {Opt_idel, "idel:%d"}, */
22857+ {Opt_mod, "mod=%s"},
22858+ {Opt_mod, "mod:%s"},
22859+ /* {Opt_imod, "imod:%d:%s"}, */
22860+
22861+ {Opt_dirwh, "dirwh=%d"},
22862+
22863+ {Opt_xino, "xino=%s"},
22864+ {Opt_noxino, "noxino"},
22865+ {Opt_trunc_xino, "trunc_xino"},
22866+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
22867+ {Opt_notrunc_xino, "notrunc_xino"},
22868+ {Opt_trunc_xino_path, "trunc_xino=%s"},
22869+ {Opt_itrunc_xino, "itrunc_xino=%d"},
22870+ /* {Opt_zxino, "zxino=%s"}, */
22871+ {Opt_trunc_xib, "trunc_xib"},
22872+ {Opt_notrunc_xib, "notrunc_xib"},
22873+
e49829fe 22874+#ifdef CONFIG_PROC_FS
1facf9fc 22875+ {Opt_plink, "plink"},
e49829fe
JR
22876+#else
22877+ {Opt_ignore_silent, "plink"},
22878+#endif
22879+
1facf9fc 22880+ {Opt_noplink, "noplink"},
e49829fe 22881+
1facf9fc 22882+#ifdef CONFIG_AUFS_DEBUG
22883+ {Opt_list_plink, "list_plink"},
22884+#endif
22885+
22886+ {Opt_udba, "udba=%s"},
22887+
4a4d8108
AM
22888+ {Opt_dio, "dio"},
22889+ {Opt_nodio, "nodio"},
22890+
076b876e
AM
22891+#ifdef CONFIG_AUFS_FHSM
22892+ {Opt_fhsm_sec, "fhsm_sec=%d"},
22893+#else
22894+ {Opt_ignore_silent, "fhsm_sec=%d"},
22895+#endif
22896+
1facf9fc 22897+ {Opt_diropq_a, "diropq=always"},
22898+ {Opt_diropq_a, "diropq=a"},
22899+ {Opt_diropq_w, "diropq=whiteouted"},
22900+ {Opt_diropq_w, "diropq=w"},
22901+
22902+ {Opt_warn_perm, "warn_perm"},
22903+ {Opt_nowarn_perm, "nowarn_perm"},
22904+
22905+ /* keep them temporary */
1facf9fc 22906+ {Opt_ignore_silent, "nodlgt"},
1facf9fc 22907+ {Opt_ignore_silent, "clean_plink"},
22908+
dece6358
AM
22909+#ifdef CONFIG_AUFS_SHWH
22910+ {Opt_shwh, "shwh"},
22911+#endif
22912+ {Opt_noshwh, "noshwh"},
22913+
076b876e
AM
22914+ {Opt_dirperm1, "dirperm1"},
22915+ {Opt_nodirperm1, "nodirperm1"},
22916+
1facf9fc 22917+ {Opt_verbose, "verbose"},
22918+ {Opt_verbose, "v"},
22919+ {Opt_noverbose, "noverbose"},
22920+ {Opt_noverbose, "quiet"},
22921+ {Opt_noverbose, "q"},
22922+ {Opt_noverbose, "silent"},
22923+
22924+ {Opt_sum, "sum"},
22925+ {Opt_nosum, "nosum"},
22926+ {Opt_wsum, "wsum"},
22927+
22928+ {Opt_rdcache, "rdcache=%d"},
22929+ {Opt_rdblk, "rdblk=%d"},
dece6358 22930+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 22931+ {Opt_rdhash, "rdhash=%d"},
dece6358 22932+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 22933+
22934+ {Opt_wbr_create, "create=%s"},
22935+ {Opt_wbr_create, "create_policy=%s"},
22936+ {Opt_wbr_copyup, "cpup=%s"},
22937+ {Opt_wbr_copyup, "copyup=%s"},
22938+ {Opt_wbr_copyup, "copyup_policy=%s"},
22939+
c1595e42
JR
22940+ /* generic VFS flag */
22941+#ifdef CONFIG_FS_POSIX_ACL
22942+ {Opt_acl, "acl"},
22943+ {Opt_noacl, "noacl"},
22944+#else
22945+ {Opt_ignore_silent, "acl"},
22946+ {Opt_ignore_silent, "noacl"},
22947+#endif
22948+
1facf9fc 22949+ /* internal use for the scripts */
22950+ {Opt_ignore_silent, "si=%s"},
22951+
22952+ {Opt_br, "dirs=%s"},
22953+ {Opt_ignore, "debug=%d"},
22954+ {Opt_ignore, "delete=whiteout"},
22955+ {Opt_ignore, "delete=all"},
22956+ {Opt_ignore, "imap=%s"},
22957+
1308ab2a 22958+ /* temporary workaround, due to old mount(8)? */
22959+ {Opt_ignore_silent, "relatime"},
22960+
1facf9fc 22961+ {Opt_err, NULL}
22962+};
22963+
22964+/* ---------------------------------------------------------------------- */
22965+
076b876e 22966+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 22967+{
076b876e
AM
22968+ struct match_token *p;
22969+
22970+ p = tbl;
22971+ while (p->pattern) {
22972+ if (p->token == val)
22973+ return p->pattern;
22974+ p++;
1facf9fc 22975+ }
22976+ BUG();
22977+ return "??";
22978+}
22979+
076b876e
AM
22980+static const char *au_optstr(int *val, match_table_t tbl)
22981+{
22982+ struct match_token *p;
22983+ int v;
22984+
22985+ v = *val;
2000de60
JR
22986+ if (!v)
22987+ goto out;
076b876e 22988+ p = tbl;
2000de60
JR
22989+ while (p->pattern) {
22990+ if (p->token
22991+ && (v & p->token) == p->token) {
076b876e
AM
22992+ *val &= ~p->token;
22993+ return p->pattern;
22994+ }
22995+ p++;
22996+ }
2000de60
JR
22997+
22998+out:
076b876e
AM
22999+ return NULL;
23000+}
23001+
1facf9fc 23002+/* ---------------------------------------------------------------------- */
23003+
1e00d052 23004+static match_table_t brperm = {
1facf9fc 23005+ {AuBrPerm_RO, AUFS_BRPERM_RO},
23006+ {AuBrPerm_RR, AUFS_BRPERM_RR},
23007+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
23008+ {0, NULL}
23009+};
1facf9fc 23010+
86dc4139 23011+static match_table_t brattr = {
076b876e
AM
23012+ /* general */
23013+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
23014+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 23015+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 23016+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
2000de60 23017+#ifdef CONFIG_AUFS_FHSM
076b876e 23018+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
2000de60
JR
23019+#endif
23020+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
23021+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
23022+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
23023+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
23024+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
23025+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
23026+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
2000de60 23027+#endif
076b876e
AM
23028+
23029+ /* ro/rr branch */
1e00d052 23030+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
23031+
23032+ /* rw branch */
23033+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 23034+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 23035+
1e00d052 23036+ {0, NULL}
1facf9fc 23037+};
23038+
1e00d052
AM
23039+static int br_attr_val(char *str, match_table_t table, substring_t args[])
23040+{
23041+ int attr, v;
23042+ char *p;
23043+
23044+ attr = 0;
23045+ do {
23046+ p = strchr(str, '+');
23047+ if (p)
23048+ *p = 0;
23049+ v = match_token(str, table, args);
076b876e
AM
23050+ if (v) {
23051+ if (v & AuBrAttr_CMOO_Mask)
23052+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 23053+ attr |= v;
076b876e 23054+ } else {
1e00d052
AM
23055+ if (p)
23056+ *p = '+';
0c3ec466 23057+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
23058+ break;
23059+ }
23060+ if (p)
23061+ str = p + 1;
23062+ } while (p);
23063+
23064+ return attr;
23065+}
23066+
076b876e
AM
23067+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
23068+{
23069+ int sz;
23070+ const char *p;
23071+ char *q;
23072+
076b876e
AM
23073+ q = str->a;
23074+ *q = 0;
23075+ p = au_optstr(&perm, brattr);
23076+ if (p) {
23077+ sz = strlen(p);
23078+ memcpy(q, p, sz + 1);
23079+ q += sz;
23080+ } else
23081+ goto out;
23082+
23083+ do {
23084+ p = au_optstr(&perm, brattr);
23085+ if (p) {
23086+ *q++ = '+';
23087+ sz = strlen(p);
23088+ memcpy(q, p, sz + 1);
23089+ q += sz;
23090+ }
23091+ } while (p);
23092+
23093+out:
c1595e42 23094+ return q - str->a;
076b876e
AM
23095+}
23096+
4a4d8108 23097+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 23098+{
076b876e
AM
23099+ int val, bad, sz;
23100+ char *p;
1facf9fc 23101+ substring_t args[MAX_OPT_ARGS];
076b876e 23102+ au_br_perm_str_t attr;
1facf9fc 23103+
1e00d052
AM
23104+ p = strchr(perm, '+');
23105+ if (p)
23106+ *p = 0;
23107+ val = match_token(perm, brperm, args);
23108+ if (!val) {
23109+ if (p)
23110+ *p = '+';
0c3ec466 23111+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
23112+ val = AuBrPerm_RO;
23113+ goto out;
23114+ }
23115+ if (!p)
23116+ goto out;
23117+
076b876e
AM
23118+ val |= br_attr_val(p + 1, brattr, args);
23119+
23120+ bad = 0;
86dc4139 23121+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
23122+ case AuBrPerm_RO:
23123+ case AuBrPerm_RR:
076b876e
AM
23124+ bad = val & AuBrWAttr_Mask;
23125+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
23126+ break;
23127+ case AuBrPerm_RW:
076b876e
AM
23128+ bad = val & AuBrRAttr_Mask;
23129+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
23130+ break;
23131+ }
c1595e42
JR
23132+
23133+ /*
23134+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23135+ * does not treat it as an error, just warning.
23136+ * this is a tiny guard for the user operation.
23137+ */
23138+ if (val & AuBrAttr_UNPIN) {
23139+ bad |= AuBrAttr_UNPIN;
23140+ val &= ~AuBrAttr_UNPIN;
23141+ }
23142+
076b876e
AM
23143+ if (unlikely(bad)) {
23144+ sz = au_do_optstr_br_attr(&attr, bad);
23145+ AuDebugOn(!sz);
23146+ pr_warn("ignored branch attribute %s\n", attr.a);
23147+ }
1e00d052
AM
23148+
23149+out:
1facf9fc 23150+ return val;
23151+}
23152+
076b876e 23153+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 23154+{
076b876e
AM
23155+ au_br_perm_str_t attr;
23156+ const char *p;
23157+ char *q;
1e00d052
AM
23158+ int sz;
23159+
076b876e
AM
23160+ q = str->a;
23161+ p = au_optstr(&perm, brperm);
23162+ AuDebugOn(!p || !*p);
23163+ sz = strlen(p);
23164+ memcpy(q, p, sz + 1);
23165+ q += sz;
1e00d052 23166+
076b876e
AM
23167+ sz = au_do_optstr_br_attr(&attr, perm);
23168+ if (sz) {
23169+ *q++ = '+';
23170+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
23171+ }
23172+
076b876e 23173+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 23174+}
23175+
23176+/* ---------------------------------------------------------------------- */
23177+
23178+static match_table_t udbalevel = {
23179+ {AuOpt_UDBA_REVAL, "reval"},
23180+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
23181+#ifdef CONFIG_AUFS_HNOTIFY
23182+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23183+#ifdef CONFIG_AUFS_HFSNOTIFY
23184+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 23185+#endif
1facf9fc 23186+#endif
23187+ {-1, NULL}
23188+};
23189+
4a4d8108 23190+static int noinline_for_stack udba_val(char *str)
1facf9fc 23191+{
23192+ substring_t args[MAX_OPT_ARGS];
23193+
7f207e10 23194+ return match_token(str, udbalevel, args);
1facf9fc 23195+}
23196+
23197+const char *au_optstr_udba(int udba)
23198+{
076b876e 23199+ return au_parser_pattern(udba, udbalevel);
1facf9fc 23200+}
23201+
23202+/* ---------------------------------------------------------------------- */
23203+
23204+static match_table_t au_wbr_create_policy = {
23205+ {AuWbrCreate_TDP, "tdp"},
23206+ {AuWbrCreate_TDP, "top-down-parent"},
23207+ {AuWbrCreate_RR, "rr"},
23208+ {AuWbrCreate_RR, "round-robin"},
23209+ {AuWbrCreate_MFS, "mfs"},
23210+ {AuWbrCreate_MFS, "most-free-space"},
23211+ {AuWbrCreate_MFSV, "mfs:%d"},
23212+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23213+
23214+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23215+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23216+ {AuWbrCreate_PMFS, "pmfs"},
23217+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
23218+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23219+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 23220+
23221+ {-1, NULL}
23222+};
23223+
dece6358
AM
23224+/*
23225+ * cf. linux/lib/parser.c and cmdline.c
23226+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 23227+ * kstrto...().
dece6358 23228+ */
4a4d8108
AM
23229+static int noinline_for_stack
23230+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 23231+{
23232+ int err;
23233+ unsigned int len;
23234+ char a[32];
23235+
23236+ err = -ERANGE;
23237+ len = s->to - s->from;
23238+ if (len + 1 <= sizeof(a)) {
23239+ memcpy(a, s->from, len);
23240+ a[len] = '\0';
9dbd164d 23241+ err = kstrtoull(a, 0, result);
1facf9fc 23242+ }
23243+ return err;
23244+}
23245+
23246+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23247+ struct au_opt_wbr_create *create)
23248+{
23249+ int err;
23250+ unsigned long long ull;
23251+
23252+ err = 0;
23253+ if (!au_match_ull(arg, &ull))
23254+ create->mfsrr_watermark = ull;
23255+ else {
4a4d8108 23256+ pr_err("bad integer in %s\n", str);
1facf9fc 23257+ err = -EINVAL;
23258+ }
23259+
23260+ return err;
23261+}
23262+
23263+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23264+ struct au_opt_wbr_create *create)
23265+{
23266+ int n, err;
23267+
23268+ err = 0;
027c5e7a 23269+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 23270+ create->mfs_second = n;
23271+ else {
4a4d8108 23272+ pr_err("bad integer in %s\n", str);
1facf9fc 23273+ err = -EINVAL;
23274+ }
23275+
23276+ return err;
23277+}
23278+
4a4d8108
AM
23279+static int noinline_for_stack
23280+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 23281+{
23282+ int err, e;
23283+ substring_t args[MAX_OPT_ARGS];
23284+
23285+ err = match_token(str, au_wbr_create_policy, args);
23286+ create->wbr_create = err;
23287+ switch (err) {
23288+ case AuWbrCreate_MFSRRV:
392086de 23289+ case AuWbrCreate_PMFSRRV:
1facf9fc 23290+ e = au_wbr_mfs_wmark(&args[0], str, create);
23291+ if (!e)
23292+ e = au_wbr_mfs_sec(&args[1], str, create);
23293+ if (unlikely(e))
23294+ err = e;
23295+ break;
23296+ case AuWbrCreate_MFSRR:
392086de 23297+ case AuWbrCreate_PMFSRR:
1facf9fc 23298+ e = au_wbr_mfs_wmark(&args[0], str, create);
23299+ if (unlikely(e)) {
23300+ err = e;
23301+ break;
23302+ }
23303+ /*FALLTHROUGH*/
23304+ case AuWbrCreate_MFS:
23305+ case AuWbrCreate_PMFS:
027c5e7a 23306+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 23307+ break;
23308+ case AuWbrCreate_MFSV:
23309+ case AuWbrCreate_PMFSV:
23310+ e = au_wbr_mfs_sec(&args[0], str, create);
23311+ if (unlikely(e))
23312+ err = e;
23313+ break;
23314+ }
23315+
23316+ return err;
23317+}
23318+
23319+const char *au_optstr_wbr_create(int wbr_create)
23320+{
076b876e 23321+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 23322+}
23323+
23324+static match_table_t au_wbr_copyup_policy = {
23325+ {AuWbrCopyup_TDP, "tdp"},
23326+ {AuWbrCopyup_TDP, "top-down-parent"},
23327+ {AuWbrCopyup_BUP, "bup"},
23328+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23329+ {AuWbrCopyup_BU, "bu"},
23330+ {AuWbrCopyup_BU, "bottom-up"},
23331+ {-1, NULL}
23332+};
23333+
4a4d8108 23334+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 23335+{
23336+ substring_t args[MAX_OPT_ARGS];
23337+
23338+ return match_token(str, au_wbr_copyup_policy, args);
23339+}
23340+
23341+const char *au_optstr_wbr_copyup(int wbr_copyup)
23342+{
076b876e 23343+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 23344+}
23345+
23346+/* ---------------------------------------------------------------------- */
23347+
23348+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23349+
23350+static void dump_opts(struct au_opts *opts)
23351+{
23352+#ifdef CONFIG_AUFS_DEBUG
23353+ /* reduce stack space */
23354+ union {
23355+ struct au_opt_add *add;
23356+ struct au_opt_del *del;
23357+ struct au_opt_mod *mod;
23358+ struct au_opt_xino *xino;
23359+ struct au_opt_xino_itrunc *xino_itrunc;
23360+ struct au_opt_wbr_create *create;
23361+ } u;
23362+ struct au_opt *opt;
23363+
23364+ opt = opts->opt;
23365+ while (opt->type != Opt_tail) {
23366+ switch (opt->type) {
23367+ case Opt_add:
23368+ u.add = &opt->add;
23369+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23370+ u.add->bindex, u.add->pathname, u.add->perm,
23371+ u.add->path.dentry);
23372+ break;
23373+ case Opt_del:
23374+ case Opt_idel:
23375+ u.del = &opt->del;
23376+ AuDbg("del {%s, %p}\n",
23377+ u.del->pathname, u.del->h_path.dentry);
23378+ break;
23379+ case Opt_mod:
23380+ case Opt_imod:
23381+ u.mod = &opt->mod;
23382+ AuDbg("mod {%s, 0x%x, %p}\n",
23383+ u.mod->path, u.mod->perm, u.mod->h_root);
23384+ break;
23385+ case Opt_append:
23386+ u.add = &opt->add;
23387+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23388+ u.add->bindex, u.add->pathname, u.add->perm,
23389+ u.add->path.dentry);
23390+ break;
23391+ case Opt_prepend:
23392+ u.add = &opt->add;
23393+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23394+ u.add->bindex, u.add->pathname, u.add->perm,
23395+ u.add->path.dentry);
23396+ break;
23397+ case Opt_dirwh:
23398+ AuDbg("dirwh %d\n", opt->dirwh);
23399+ break;
23400+ case Opt_rdcache:
23401+ AuDbg("rdcache %d\n", opt->rdcache);
23402+ break;
23403+ case Opt_rdblk:
23404+ AuDbg("rdblk %u\n", opt->rdblk);
23405+ break;
dece6358
AM
23406+ case Opt_rdblk_def:
23407+ AuDbg("rdblk_def\n");
23408+ break;
1facf9fc 23409+ case Opt_rdhash:
23410+ AuDbg("rdhash %u\n", opt->rdhash);
23411+ break;
dece6358
AM
23412+ case Opt_rdhash_def:
23413+ AuDbg("rdhash_def\n");
23414+ break;
1facf9fc 23415+ case Opt_xino:
23416+ u.xino = &opt->xino;
523b37e3 23417+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 23418+ break;
23419+ case Opt_trunc_xino:
23420+ AuLabel(trunc_xino);
23421+ break;
23422+ case Opt_notrunc_xino:
23423+ AuLabel(notrunc_xino);
23424+ break;
23425+ case Opt_trunc_xino_path:
23426+ case Opt_itrunc_xino:
23427+ u.xino_itrunc = &opt->xino_itrunc;
23428+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23429+ break;
1facf9fc 23430+ case Opt_noxino:
23431+ AuLabel(noxino);
23432+ break;
23433+ case Opt_trunc_xib:
23434+ AuLabel(trunc_xib);
23435+ break;
23436+ case Opt_notrunc_xib:
23437+ AuLabel(notrunc_xib);
23438+ break;
dece6358
AM
23439+ case Opt_shwh:
23440+ AuLabel(shwh);
23441+ break;
23442+ case Opt_noshwh:
23443+ AuLabel(noshwh);
23444+ break;
076b876e
AM
23445+ case Opt_dirperm1:
23446+ AuLabel(dirperm1);
23447+ break;
23448+ case Opt_nodirperm1:
23449+ AuLabel(nodirperm1);
23450+ break;
1facf9fc 23451+ case Opt_plink:
23452+ AuLabel(plink);
23453+ break;
23454+ case Opt_noplink:
23455+ AuLabel(noplink);
23456+ break;
23457+ case Opt_list_plink:
23458+ AuLabel(list_plink);
23459+ break;
23460+ case Opt_udba:
23461+ AuDbg("udba %d, %s\n",
23462+ opt->udba, au_optstr_udba(opt->udba));
23463+ break;
4a4d8108
AM
23464+ case Opt_dio:
23465+ AuLabel(dio);
23466+ break;
23467+ case Opt_nodio:
23468+ AuLabel(nodio);
23469+ break;
1facf9fc 23470+ case Opt_diropq_a:
23471+ AuLabel(diropq_a);
23472+ break;
23473+ case Opt_diropq_w:
23474+ AuLabel(diropq_w);
23475+ break;
23476+ case Opt_warn_perm:
23477+ AuLabel(warn_perm);
23478+ break;
23479+ case Opt_nowarn_perm:
23480+ AuLabel(nowarn_perm);
23481+ break;
1facf9fc 23482+ case Opt_verbose:
23483+ AuLabel(verbose);
23484+ break;
23485+ case Opt_noverbose:
23486+ AuLabel(noverbose);
23487+ break;
23488+ case Opt_sum:
23489+ AuLabel(sum);
23490+ break;
23491+ case Opt_nosum:
23492+ AuLabel(nosum);
23493+ break;
23494+ case Opt_wsum:
23495+ AuLabel(wsum);
23496+ break;
23497+ case Opt_wbr_create:
23498+ u.create = &opt->wbr_create;
23499+ AuDbg("create %d, %s\n", u.create->wbr_create,
23500+ au_optstr_wbr_create(u.create->wbr_create));
23501+ switch (u.create->wbr_create) {
23502+ case AuWbrCreate_MFSV:
23503+ case AuWbrCreate_PMFSV:
23504+ AuDbg("%d sec\n", u.create->mfs_second);
23505+ break;
23506+ case AuWbrCreate_MFSRR:
23507+ AuDbg("%llu watermark\n",
23508+ u.create->mfsrr_watermark);
23509+ break;
23510+ case AuWbrCreate_MFSRRV:
392086de 23511+ case AuWbrCreate_PMFSRRV:
1facf9fc 23512+ AuDbg("%llu watermark, %d sec\n",
23513+ u.create->mfsrr_watermark,
23514+ u.create->mfs_second);
23515+ break;
23516+ }
23517+ break;
23518+ case Opt_wbr_copyup:
23519+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
23520+ au_optstr_wbr_copyup(opt->wbr_copyup));
23521+ break;
076b876e
AM
23522+ case Opt_fhsm_sec:
23523+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
23524+ break;
c1595e42
JR
23525+ case Opt_acl:
23526+ AuLabel(acl);
23527+ break;
23528+ case Opt_noacl:
23529+ AuLabel(noacl);
23530+ break;
1facf9fc 23531+ default:
23532+ BUG();
23533+ }
23534+ opt++;
23535+ }
23536+#endif
23537+}
23538+
23539+void au_opts_free(struct au_opts *opts)
23540+{
23541+ struct au_opt *opt;
23542+
23543+ opt = opts->opt;
23544+ while (opt->type != Opt_tail) {
23545+ switch (opt->type) {
23546+ case Opt_add:
23547+ case Opt_append:
23548+ case Opt_prepend:
23549+ path_put(&opt->add.path);
23550+ break;
23551+ case Opt_del:
23552+ case Opt_idel:
23553+ path_put(&opt->del.h_path);
23554+ break;
23555+ case Opt_mod:
23556+ case Opt_imod:
23557+ dput(opt->mod.h_root);
23558+ break;
23559+ case Opt_xino:
23560+ fput(opt->xino.file);
23561+ break;
23562+ }
23563+ opt++;
23564+ }
23565+}
23566+
23567+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
23568+ aufs_bindex_t bindex)
23569+{
23570+ int err;
23571+ struct au_opt_add *add = &opt->add;
23572+ char *p;
23573+
23574+ add->bindex = bindex;
1e00d052 23575+ add->perm = AuBrPerm_RO;
1facf9fc 23576+ add->pathname = opt_str;
23577+ p = strchr(opt_str, '=');
23578+ if (p) {
23579+ *p++ = 0;
23580+ if (*p)
23581+ add->perm = br_perm_val(p);
23582+ }
23583+
23584+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
23585+ if (!err) {
23586+ if (!p) {
23587+ add->perm = AuBrPerm_RO;
23588+ if (au_test_fs_rr(add->path.dentry->d_sb))
23589+ add->perm = AuBrPerm_RR;
23590+ else if (!bindex && !(sb_flags & MS_RDONLY))
23591+ add->perm = AuBrPerm_RW;
23592+ }
23593+ opt->type = Opt_add;
23594+ goto out;
23595+ }
4a4d8108 23596+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 23597+ err = -EINVAL;
23598+
4f0767ce 23599+out:
1facf9fc 23600+ return err;
23601+}
23602+
23603+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
23604+{
23605+ int err;
23606+
23607+ del->pathname = args[0].from;
23608+ AuDbg("del path %s\n", del->pathname);
23609+
23610+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
23611+ if (unlikely(err))
4a4d8108 23612+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 23613+
23614+ return err;
23615+}
23616+
23617+#if 0 /* reserved for future use */
23618+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
23619+ struct au_opt_del *del, substring_t args[])
23620+{
23621+ int err;
23622+ struct dentry *root;
23623+
23624+ err = -EINVAL;
23625+ root = sb->s_root;
23626+ aufs_read_lock(root, AuLock_FLUSH);
23627+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23628+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23629+ goto out;
23630+ }
23631+
23632+ err = 0;
23633+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
23634+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
23635+
4f0767ce 23636+out:
1facf9fc 23637+ aufs_read_unlock(root, !AuLock_IR);
23638+ return err;
23639+}
23640+#endif
23641+
4a4d8108
AM
23642+static int noinline_for_stack
23643+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 23644+{
23645+ int err;
23646+ struct path path;
23647+ char *p;
23648+
23649+ err = -EINVAL;
23650+ mod->path = args[0].from;
23651+ p = strchr(mod->path, '=');
23652+ if (unlikely(!p)) {
4a4d8108 23653+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 23654+ goto out;
23655+ }
23656+
23657+ *p++ = 0;
23658+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
23659+ if (unlikely(err)) {
4a4d8108 23660+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 23661+ goto out;
23662+ }
23663+
23664+ mod->perm = br_perm_val(p);
23665+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
23666+ mod->h_root = dget(path.dentry);
23667+ path_put(&path);
23668+
4f0767ce 23669+out:
1facf9fc 23670+ return err;
23671+}
23672+
23673+#if 0 /* reserved for future use */
23674+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
23675+ struct au_opt_mod *mod, substring_t args[])
23676+{
23677+ int err;
23678+ struct dentry *root;
23679+
23680+ err = -EINVAL;
23681+ root = sb->s_root;
23682+ aufs_read_lock(root, AuLock_FLUSH);
23683+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23684+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23685+ goto out;
23686+ }
23687+
23688+ err = 0;
23689+ mod->perm = br_perm_val(args[1].from);
23690+ AuDbg("mod path %s, perm 0x%x, %s\n",
23691+ mod->path, mod->perm, args[1].from);
23692+ mod->h_root = dget(au_h_dptr(root, bindex));
23693+
4f0767ce 23694+out:
1facf9fc 23695+ aufs_read_unlock(root, !AuLock_IR);
23696+ return err;
23697+}
23698+#endif
23699+
23700+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
23701+ substring_t args[])
23702+{
23703+ int err;
23704+ struct file *file;
23705+
23706+ file = au_xino_create(sb, args[0].from, /*silent*/0);
23707+ err = PTR_ERR(file);
23708+ if (IS_ERR(file))
23709+ goto out;
23710+
23711+ err = -EINVAL;
2000de60 23712+ if (unlikely(file->f_path.dentry->d_sb == sb)) {
1facf9fc 23713+ fput(file);
4a4d8108 23714+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 23715+ goto out;
23716+ }
23717+
23718+ err = 0;
23719+ xino->file = file;
23720+ xino->path = args[0].from;
23721+
4f0767ce 23722+out:
1facf9fc 23723+ return err;
23724+}
23725+
4a4d8108
AM
23726+static int noinline_for_stack
23727+au_opts_parse_xino_itrunc_path(struct super_block *sb,
23728+ struct au_opt_xino_itrunc *xino_itrunc,
23729+ substring_t args[])
1facf9fc 23730+{
23731+ int err;
23732+ aufs_bindex_t bend, bindex;
23733+ struct path path;
23734+ struct dentry *root;
23735+
23736+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
23737+ if (unlikely(err)) {
4a4d8108 23738+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 23739+ goto out;
23740+ }
23741+
23742+ xino_itrunc->bindex = -1;
23743+ root = sb->s_root;
23744+ aufs_read_lock(root, AuLock_FLUSH);
23745+ bend = au_sbend(sb);
23746+ for (bindex = 0; bindex <= bend; bindex++) {
23747+ if (au_h_dptr(root, bindex) == path.dentry) {
23748+ xino_itrunc->bindex = bindex;
23749+ break;
23750+ }
23751+ }
23752+ aufs_read_unlock(root, !AuLock_IR);
23753+ path_put(&path);
23754+
23755+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 23756+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 23757+ err = -EINVAL;
23758+ }
23759+
4f0767ce 23760+out:
1facf9fc 23761+ return err;
23762+}
23763+
23764+/* called without aufs lock */
23765+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
23766+{
23767+ int err, n, token;
23768+ aufs_bindex_t bindex;
23769+ unsigned char skipped;
23770+ struct dentry *root;
23771+ struct au_opt *opt, *opt_tail;
23772+ char *opt_str;
23773+ /* reduce the stack space */
23774+ union {
23775+ struct au_opt_xino_itrunc *xino_itrunc;
23776+ struct au_opt_wbr_create *create;
23777+ } u;
23778+ struct {
23779+ substring_t args[MAX_OPT_ARGS];
23780+ } *a;
23781+
23782+ err = -ENOMEM;
23783+ a = kmalloc(sizeof(*a), GFP_NOFS);
23784+ if (unlikely(!a))
23785+ goto out;
23786+
23787+ root = sb->s_root;
23788+ err = 0;
23789+ bindex = 0;
23790+ opt = opts->opt;
23791+ opt_tail = opt + opts->max_opt - 1;
23792+ opt->type = Opt_tail;
23793+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
23794+ err = -EINVAL;
23795+ skipped = 0;
23796+ token = match_token(opt_str, options, a->args);
23797+ switch (token) {
23798+ case Opt_br:
23799+ err = 0;
23800+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
23801+ && *opt_str) {
23802+ err = opt_add(opt, opt_str, opts->sb_flags,
23803+ bindex++);
23804+ if (unlikely(!err && ++opt > opt_tail)) {
23805+ err = -E2BIG;
23806+ break;
23807+ }
23808+ opt->type = Opt_tail;
23809+ skipped = 1;
23810+ }
23811+ break;
23812+ case Opt_add:
23813+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23814+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23815+ break;
23816+ }
23817+ bindex = n;
23818+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
23819+ bindex);
23820+ if (!err)
23821+ opt->type = token;
23822+ break;
23823+ case Opt_append:
23824+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23825+ /*dummy bindex*/1);
23826+ if (!err)
23827+ opt->type = token;
23828+ break;
23829+ case Opt_prepend:
23830+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23831+ /*bindex*/0);
23832+ if (!err)
23833+ opt->type = token;
23834+ break;
23835+ case Opt_del:
23836+ err = au_opts_parse_del(&opt->del, a->args);
23837+ if (!err)
23838+ opt->type = token;
23839+ break;
23840+#if 0 /* reserved for future use */
23841+ case Opt_idel:
23842+ del->pathname = "(indexed)";
23843+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 23844+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23845+ break;
23846+ }
23847+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
23848+ if (!err)
23849+ opt->type = token;
23850+ break;
23851+#endif
23852+ case Opt_mod:
23853+ err = au_opts_parse_mod(&opt->mod, a->args);
23854+ if (!err)
23855+ opt->type = token;
23856+ break;
23857+#ifdef IMOD /* reserved for future use */
23858+ case Opt_imod:
23859+ u.mod->path = "(indexed)";
23860+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23861+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23862+ break;
23863+ }
23864+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
23865+ if (!err)
23866+ opt->type = token;
23867+ break;
23868+#endif
23869+ case Opt_xino:
23870+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
23871+ if (!err)
23872+ opt->type = token;
23873+ break;
23874+
23875+ case Opt_trunc_xino_path:
23876+ err = au_opts_parse_xino_itrunc_path
23877+ (sb, &opt->xino_itrunc, a->args);
23878+ if (!err)
23879+ opt->type = token;
23880+ break;
23881+
23882+ case Opt_itrunc_xino:
23883+ u.xino_itrunc = &opt->xino_itrunc;
23884+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23885+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23886+ break;
23887+ }
23888+ u.xino_itrunc->bindex = n;
23889+ aufs_read_lock(root, AuLock_FLUSH);
23890+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 23891+ pr_err("out of bounds, %d\n", n);
1facf9fc 23892+ aufs_read_unlock(root, !AuLock_IR);
23893+ break;
23894+ }
23895+ aufs_read_unlock(root, !AuLock_IR);
23896+ err = 0;
23897+ opt->type = token;
23898+ break;
23899+
23900+ case Opt_dirwh:
23901+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
23902+ break;
23903+ err = 0;
23904+ opt->type = token;
23905+ break;
23906+
23907+ case Opt_rdcache:
027c5e7a
AM
23908+ if (unlikely(match_int(&a->args[0], &n))) {
23909+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23910+ break;
027c5e7a
AM
23911+ }
23912+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
23913+ pr_err("rdcache must be smaller than %d\n",
23914+ AUFS_RDCACHE_MAX);
23915+ break;
23916+ }
23917+ opt->rdcache = n;
1facf9fc 23918+ err = 0;
23919+ opt->type = token;
23920+ break;
23921+ case Opt_rdblk:
23922+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23923+ || n < 0
1facf9fc 23924+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 23925+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23926+ break;
23927+ }
1308ab2a 23928+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
23929+ pr_err("rdblk must be larger than %d\n",
23930+ NAME_MAX);
1facf9fc 23931+ break;
23932+ }
23933+ opt->rdblk = n;
23934+ err = 0;
23935+ opt->type = token;
23936+ break;
23937+ case Opt_rdhash:
23938+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23939+ || n < 0
1facf9fc 23940+ || n * sizeof(struct hlist_head)
23941+ > KMALLOC_MAX_SIZE)) {
4a4d8108 23942+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23943+ break;
23944+ }
23945+ opt->rdhash = n;
23946+ err = 0;
23947+ opt->type = token;
23948+ break;
23949+
23950+ case Opt_trunc_xino:
23951+ case Opt_notrunc_xino:
23952+ case Opt_noxino:
23953+ case Opt_trunc_xib:
23954+ case Opt_notrunc_xib:
dece6358
AM
23955+ case Opt_shwh:
23956+ case Opt_noshwh:
076b876e
AM
23957+ case Opt_dirperm1:
23958+ case Opt_nodirperm1:
1facf9fc 23959+ case Opt_plink:
23960+ case Opt_noplink:
23961+ case Opt_list_plink:
4a4d8108
AM
23962+ case Opt_dio:
23963+ case Opt_nodio:
1facf9fc 23964+ case Opt_diropq_a:
23965+ case Opt_diropq_w:
23966+ case Opt_warn_perm:
23967+ case Opt_nowarn_perm:
1facf9fc 23968+ case Opt_verbose:
23969+ case Opt_noverbose:
23970+ case Opt_sum:
23971+ case Opt_nosum:
23972+ case Opt_wsum:
dece6358
AM
23973+ case Opt_rdblk_def:
23974+ case Opt_rdhash_def:
c1595e42
JR
23975+ case Opt_acl:
23976+ case Opt_noacl:
1facf9fc 23977+ err = 0;
23978+ opt->type = token;
23979+ break;
23980+
23981+ case Opt_udba:
23982+ opt->udba = udba_val(a->args[0].from);
23983+ if (opt->udba >= 0) {
23984+ err = 0;
23985+ opt->type = token;
23986+ } else
4a4d8108 23987+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23988+ break;
23989+
23990+ case Opt_wbr_create:
23991+ u.create = &opt->wbr_create;
23992+ u.create->wbr_create
23993+ = au_wbr_create_val(a->args[0].from, u.create);
23994+ if (u.create->wbr_create >= 0) {
23995+ err = 0;
23996+ opt->type = token;
23997+ } else
4a4d8108 23998+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 23999+ break;
24000+ case Opt_wbr_copyup:
24001+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
24002+ if (opt->wbr_copyup >= 0) {
24003+ err = 0;
24004+ opt->type = token;
24005+ } else
4a4d8108 24006+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24007+ break;
24008+
076b876e
AM
24009+ case Opt_fhsm_sec:
24010+ if (unlikely(match_int(&a->args[0], &n)
24011+ || n < 0)) {
24012+ pr_err("bad integer in %s\n", opt_str);
24013+ break;
24014+ }
24015+ if (sysaufs_brs) {
24016+ opt->fhsm_second = n;
24017+ opt->type = token;
24018+ } else
24019+ pr_warn("ignored %s\n", opt_str);
24020+ err = 0;
24021+ break;
24022+
1facf9fc 24023+ case Opt_ignore:
0c3ec466 24024+ pr_warn("ignored %s\n", opt_str);
1facf9fc 24025+ /*FALLTHROUGH*/
24026+ case Opt_ignore_silent:
24027+ skipped = 1;
24028+ err = 0;
24029+ break;
24030+ case Opt_err:
4a4d8108 24031+ pr_err("unknown option %s\n", opt_str);
1facf9fc 24032+ break;
24033+ }
24034+
24035+ if (!err && !skipped) {
24036+ if (unlikely(++opt > opt_tail)) {
24037+ err = -E2BIG;
24038+ opt--;
24039+ opt->type = Opt_tail;
24040+ break;
24041+ }
24042+ opt->type = Opt_tail;
24043+ }
24044+ }
24045+
24046+ kfree(a);
24047+ dump_opts(opts);
24048+ if (unlikely(err))
24049+ au_opts_free(opts);
24050+
4f0767ce 24051+out:
1facf9fc 24052+ return err;
24053+}
24054+
24055+static int au_opt_wbr_create(struct super_block *sb,
24056+ struct au_opt_wbr_create *create)
24057+{
24058+ int err;
24059+ struct au_sbinfo *sbinfo;
24060+
dece6358
AM
24061+ SiMustWriteLock(sb);
24062+
1facf9fc 24063+ err = 1; /* handled */
24064+ sbinfo = au_sbi(sb);
24065+ if (sbinfo->si_wbr_create_ops->fin) {
24066+ err = sbinfo->si_wbr_create_ops->fin(sb);
24067+ if (!err)
24068+ err = 1;
24069+ }
24070+
24071+ sbinfo->si_wbr_create = create->wbr_create;
24072+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
24073+ switch (create->wbr_create) {
24074+ case AuWbrCreate_MFSRRV:
24075+ case AuWbrCreate_MFSRR:
392086de
AM
24076+ case AuWbrCreate_PMFSRR:
24077+ case AuWbrCreate_PMFSRRV:
1facf9fc 24078+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
24079+ /*FALLTHROUGH*/
24080+ case AuWbrCreate_MFS:
24081+ case AuWbrCreate_MFSV:
24082+ case AuWbrCreate_PMFS:
24083+ case AuWbrCreate_PMFSV:
e49829fe
JR
24084+ sbinfo->si_wbr_mfs.mfs_expire
24085+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 24086+ break;
24087+ }
24088+
24089+ if (sbinfo->si_wbr_create_ops->init)
24090+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
24091+
24092+ return err;
24093+}
24094+
24095+/*
24096+ * returns,
24097+ * plus: processed without an error
24098+ * zero: unprocessed
24099+ */
24100+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
24101+ struct au_opts *opts)
24102+{
24103+ int err;
24104+ struct au_sbinfo *sbinfo;
24105+
dece6358
AM
24106+ SiMustWriteLock(sb);
24107+
1facf9fc 24108+ err = 1; /* handled */
24109+ sbinfo = au_sbi(sb);
24110+ switch (opt->type) {
24111+ case Opt_udba:
24112+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
24113+ sbinfo->si_mntflags |= opt->udba;
24114+ opts->given_udba |= opt->udba;
24115+ break;
24116+
24117+ case Opt_plink:
24118+ au_opt_set(sbinfo->si_mntflags, PLINK);
24119+ break;
24120+ case Opt_noplink:
24121+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 24122+ au_plink_put(sb, /*verbose*/1);
1facf9fc 24123+ au_opt_clr(sbinfo->si_mntflags, PLINK);
24124+ break;
24125+ case Opt_list_plink:
24126+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24127+ au_plink_list(sb);
24128+ break;
24129+
4a4d8108
AM
24130+ case Opt_dio:
24131+ au_opt_set(sbinfo->si_mntflags, DIO);
24132+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24133+ break;
24134+ case Opt_nodio:
24135+ au_opt_clr(sbinfo->si_mntflags, DIO);
24136+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24137+ break;
24138+
076b876e
AM
24139+ case Opt_fhsm_sec:
24140+ au_fhsm_set(sbinfo, opt->fhsm_second);
24141+ break;
24142+
1facf9fc 24143+ case Opt_diropq_a:
24144+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24145+ break;
24146+ case Opt_diropq_w:
24147+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24148+ break;
24149+
24150+ case Opt_warn_perm:
24151+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24152+ break;
24153+ case Opt_nowarn_perm:
24154+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24155+ break;
24156+
1facf9fc 24157+ case Opt_verbose:
24158+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24159+ break;
24160+ case Opt_noverbose:
24161+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24162+ break;
24163+
24164+ case Opt_sum:
24165+ au_opt_set(sbinfo->si_mntflags, SUM);
24166+ break;
24167+ case Opt_wsum:
24168+ au_opt_clr(sbinfo->si_mntflags, SUM);
24169+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24170+ case Opt_nosum:
24171+ au_opt_clr(sbinfo->si_mntflags, SUM);
24172+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24173+ break;
24174+
24175+ case Opt_wbr_create:
24176+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24177+ break;
24178+ case Opt_wbr_copyup:
24179+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24180+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24181+ break;
24182+
24183+ case Opt_dirwh:
24184+ sbinfo->si_dirwh = opt->dirwh;
24185+ break;
24186+
24187+ case Opt_rdcache:
e49829fe
JR
24188+ sbinfo->si_rdcache
24189+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 24190+ break;
24191+ case Opt_rdblk:
24192+ sbinfo->si_rdblk = opt->rdblk;
24193+ break;
dece6358
AM
24194+ case Opt_rdblk_def:
24195+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24196+ break;
1facf9fc 24197+ case Opt_rdhash:
24198+ sbinfo->si_rdhash = opt->rdhash;
24199+ break;
dece6358
AM
24200+ case Opt_rdhash_def:
24201+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24202+ break;
24203+
24204+ case Opt_shwh:
24205+ au_opt_set(sbinfo->si_mntflags, SHWH);
24206+ break;
24207+ case Opt_noshwh:
24208+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24209+ break;
1facf9fc 24210+
076b876e
AM
24211+ case Opt_dirperm1:
24212+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24213+ break;
24214+ case Opt_nodirperm1:
24215+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24216+ break;
24217+
1facf9fc 24218+ case Opt_trunc_xino:
24219+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24220+ break;
24221+ case Opt_notrunc_xino:
24222+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24223+ break;
24224+
24225+ case Opt_trunc_xino_path:
24226+ case Opt_itrunc_xino:
24227+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24228+ if (!err)
24229+ err = 1;
24230+ break;
24231+
24232+ case Opt_trunc_xib:
24233+ au_fset_opts(opts->flags, TRUNC_XIB);
24234+ break;
24235+ case Opt_notrunc_xib:
24236+ au_fclr_opts(opts->flags, TRUNC_XIB);
24237+ break;
24238+
c1595e42
JR
24239+ case Opt_acl:
24240+ sb->s_flags |= MS_POSIXACL;
24241+ break;
24242+ case Opt_noacl:
24243+ sb->s_flags &= ~MS_POSIXACL;
24244+ break;
24245+
1facf9fc 24246+ default:
24247+ err = 0;
24248+ break;
24249+ }
24250+
24251+ return err;
24252+}
24253+
24254+/*
24255+ * returns tri-state.
24256+ * plus: processed without an error
24257+ * zero: unprocessed
24258+ * minus: error
24259+ */
24260+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24261+ struct au_opts *opts)
24262+{
24263+ int err, do_refresh;
24264+
24265+ err = 0;
24266+ switch (opt->type) {
24267+ case Opt_append:
24268+ opt->add.bindex = au_sbend(sb) + 1;
24269+ if (opt->add.bindex < 0)
24270+ opt->add.bindex = 0;
24271+ goto add;
24272+ case Opt_prepend:
24273+ opt->add.bindex = 0;
f6b6e03d 24274+ add: /* indented label */
1facf9fc 24275+ case Opt_add:
24276+ err = au_br_add(sb, &opt->add,
24277+ au_ftest_opts(opts->flags, REMOUNT));
24278+ if (!err) {
24279+ err = 1;
027c5e7a 24280+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24281+ }
24282+ break;
24283+
24284+ case Opt_del:
24285+ case Opt_idel:
24286+ err = au_br_del(sb, &opt->del,
24287+ au_ftest_opts(opts->flags, REMOUNT));
24288+ if (!err) {
24289+ err = 1;
24290+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 24291+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24292+ }
24293+ break;
24294+
24295+ case Opt_mod:
24296+ case Opt_imod:
24297+ err = au_br_mod(sb, &opt->mod,
24298+ au_ftest_opts(opts->flags, REMOUNT),
24299+ &do_refresh);
24300+ if (!err) {
24301+ err = 1;
027c5e7a
AM
24302+ if (do_refresh)
24303+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24304+ }
24305+ break;
24306+ }
24307+
24308+ return err;
24309+}
24310+
24311+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24312+ struct au_opt_xino **opt_xino,
24313+ struct au_opts *opts)
24314+{
24315+ int err;
24316+ aufs_bindex_t bend, bindex;
24317+ struct dentry *root, *parent, *h_root;
24318+
24319+ err = 0;
24320+ switch (opt->type) {
24321+ case Opt_xino:
24322+ err = au_xino_set(sb, &opt->xino,
24323+ !!au_ftest_opts(opts->flags, REMOUNT));
24324+ if (unlikely(err))
24325+ break;
24326+
24327+ *opt_xino = &opt->xino;
24328+ au_xino_brid_set(sb, -1);
24329+
24330+ /* safe d_parent access */
2000de60 24331+ parent = opt->xino.file->f_path.dentry->d_parent;
1facf9fc 24332+ root = sb->s_root;
24333+ bend = au_sbend(sb);
24334+ for (bindex = 0; bindex <= bend; bindex++) {
24335+ h_root = au_h_dptr(root, bindex);
24336+ if (h_root == parent) {
24337+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24338+ break;
24339+ }
24340+ }
24341+ break;
24342+
24343+ case Opt_noxino:
24344+ au_xino_clr(sb);
24345+ au_xino_brid_set(sb, -1);
24346+ *opt_xino = (void *)-1;
24347+ break;
24348+ }
24349+
24350+ return err;
24351+}
24352+
24353+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24354+ unsigned int pending)
24355+{
076b876e 24356+ int err, fhsm;
1facf9fc 24357+ aufs_bindex_t bindex, bend;
79b8bda9 24358+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 24359+ struct au_branch *br;
24360+ struct au_wbr *wbr;
79b8bda9 24361+ struct dentry *root, *dentry;
1facf9fc 24362+ struct inode *dir, *h_dir;
24363+ struct au_sbinfo *sbinfo;
24364+ struct au_hinode *hdir;
24365+
dece6358
AM
24366+ SiMustAnyLock(sb);
24367+
1facf9fc 24368+ sbinfo = au_sbi(sb);
24369+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24370+
dece6358
AM
24371+ if (!(sb_flags & MS_RDONLY)) {
24372+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 24373+ pr_warn("first branch should be rw\n");
dece6358 24374+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
0c3ec466 24375+ pr_warn("shwh should be used with ro\n");
dece6358 24376+ }
1facf9fc 24377+
4a4d8108 24378+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 24379+ && !au_opt_test(sbinfo->si_mntflags, XINO))
0c3ec466 24380+ pr_warn("udba=*notify requires xino\n");
1facf9fc 24381+
076b876e
AM
24382+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
24383+ pr_warn("dirperm1 breaks the protection"
24384+ " by the permission bits on the lower branch\n");
24385+
1facf9fc 24386+ err = 0;
076b876e 24387+ fhsm = 0;
1facf9fc 24388+ root = sb->s_root;
5527c038 24389+ dir = d_inode(root);
1facf9fc 24390+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
79b8bda9
AM
24391+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24392+ UDBA_NONE);
1facf9fc 24393+ bend = au_sbend(sb);
24394+ for (bindex = 0; !err && bindex <= bend; bindex++) {
24395+ skip = 0;
24396+ h_dir = au_h_iptr(dir, bindex);
24397+ br = au_sbr(sb, bindex);
1facf9fc 24398+
c1595e42
JR
24399+ if ((br->br_perm & AuBrAttr_ICEX)
24400+ && !h_dir->i_op->listxattr)
24401+ br->br_perm &= ~AuBrAttr_ICEX;
24402+#if 0
24403+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24404+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24405+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24406+#endif
24407+
24408+ do_free = 0;
1facf9fc 24409+ wbr = br->br_wbr;
24410+ if (wbr)
24411+ wbr_wh_read_lock(wbr);
24412+
1e00d052 24413+ if (!au_br_writable(br->br_perm)) {
1facf9fc 24414+ do_free = !!wbr;
24415+ skip = (!wbr
24416+ || (!wbr->wbr_whbase
24417+ && !wbr->wbr_plink
24418+ && !wbr->wbr_orph));
1e00d052 24419+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 24420+ /* skip = (!br->br_whbase && !br->br_orph); */
24421+ skip = (!wbr || !wbr->wbr_whbase);
24422+ if (skip && wbr) {
24423+ if (do_plink)
24424+ skip = !!wbr->wbr_plink;
24425+ else
24426+ skip = !wbr->wbr_plink;
24427+ }
1e00d052 24428+ } else {
1facf9fc 24429+ /* skip = (br->br_whbase && br->br_ohph); */
24430+ skip = (wbr && wbr->wbr_whbase);
24431+ if (skip) {
24432+ if (do_plink)
24433+ skip = !!wbr->wbr_plink;
24434+ else
24435+ skip = !wbr->wbr_plink;
24436+ }
1facf9fc 24437+ }
24438+ if (wbr)
24439+ wbr_wh_read_unlock(wbr);
24440+
79b8bda9
AM
24441+ if (can_no_dreval) {
24442+ dentry = br->br_path.dentry;
24443+ spin_lock(&dentry->d_lock);
24444+ if (dentry->d_flags &
24445+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24446+ can_no_dreval = 0;
24447+ spin_unlock(&dentry->d_lock);
24448+ }
24449+
076b876e
AM
24450+ if (au_br_fhsm(br->br_perm)) {
24451+ fhsm++;
24452+ AuDebugOn(!br->br_fhsm);
24453+ }
24454+
1facf9fc 24455+ if (skip)
24456+ continue;
24457+
24458+ hdir = au_hi(dir, bindex);
4a4d8108 24459+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 24460+ if (wbr)
24461+ wbr_wh_write_lock(wbr);
86dc4139 24462+ err = au_wh_init(br, sb);
1facf9fc 24463+ if (wbr)
24464+ wbr_wh_write_unlock(wbr);
4a4d8108 24465+ au_hn_imtx_unlock(hdir);
1facf9fc 24466+
24467+ if (!err && do_free) {
24468+ kfree(wbr);
24469+ br->br_wbr = NULL;
24470+ }
24471+ }
24472+
79b8bda9
AM
24473+ if (can_no_dreval)
24474+ au_fset_si(sbinfo, NO_DREVAL);
24475+ else
24476+ au_fclr_si(sbinfo, NO_DREVAL);
24477+
c1595e42 24478+ if (fhsm >= 2) {
076b876e 24479+ au_fset_si(sbinfo, FHSM);
c1595e42
JR
24480+ for (bindex = bend; bindex >= 0; bindex--) {
24481+ br = au_sbr(sb, bindex);
24482+ if (au_br_fhsm(br->br_perm)) {
24483+ au_fhsm_set_bottom(sb, bindex);
24484+ break;
24485+ }
24486+ }
24487+ } else {
076b876e 24488+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
24489+ au_fhsm_set_bottom(sb, -1);
24490+ }
076b876e 24491+
1facf9fc 24492+ return err;
24493+}
24494+
24495+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24496+{
24497+ int err;
24498+ unsigned int tmp;
027c5e7a 24499+ aufs_bindex_t bindex, bend;
1facf9fc 24500+ struct au_opt *opt;
24501+ struct au_opt_xino *opt_xino, xino;
24502+ struct au_sbinfo *sbinfo;
027c5e7a 24503+ struct au_branch *br;
076b876e 24504+ struct inode *dir;
1facf9fc 24505+
dece6358
AM
24506+ SiMustWriteLock(sb);
24507+
1facf9fc 24508+ err = 0;
24509+ opt_xino = NULL;
24510+ opt = opts->opt;
24511+ while (err >= 0 && opt->type != Opt_tail)
24512+ err = au_opt_simple(sb, opt++, opts);
24513+ if (err > 0)
24514+ err = 0;
24515+ else if (unlikely(err < 0))
24516+ goto out;
24517+
24518+ /* disable xino and udba temporary */
24519+ sbinfo = au_sbi(sb);
24520+ tmp = sbinfo->si_mntflags;
24521+ au_opt_clr(sbinfo->si_mntflags, XINO);
24522+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
24523+
24524+ opt = opts->opt;
24525+ while (err >= 0 && opt->type != Opt_tail)
24526+ err = au_opt_br(sb, opt++, opts);
24527+ if (err > 0)
24528+ err = 0;
24529+ else if (unlikely(err < 0))
24530+ goto out;
24531+
24532+ bend = au_sbend(sb);
24533+ if (unlikely(bend < 0)) {
24534+ err = -EINVAL;
4a4d8108 24535+ pr_err("no branches\n");
1facf9fc 24536+ goto out;
24537+ }
24538+
24539+ if (au_opt_test(tmp, XINO))
24540+ au_opt_set(sbinfo->si_mntflags, XINO);
24541+ opt = opts->opt;
24542+ while (!err && opt->type != Opt_tail)
24543+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
24544+ if (unlikely(err))
24545+ goto out;
24546+
24547+ err = au_opts_verify(sb, sb->s_flags, tmp);
24548+ if (unlikely(err))
24549+ goto out;
24550+
24551+ /* restore xino */
24552+ if (au_opt_test(tmp, XINO) && !opt_xino) {
24553+ xino.file = au_xino_def(sb);
24554+ err = PTR_ERR(xino.file);
24555+ if (IS_ERR(xino.file))
24556+ goto out;
24557+
24558+ err = au_xino_set(sb, &xino, /*remount*/0);
24559+ fput(xino.file);
24560+ if (unlikely(err))
24561+ goto out;
24562+ }
24563+
24564+ /* restore udba */
027c5e7a 24565+ tmp &= AuOptMask_UDBA;
1facf9fc 24566+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
24567+ sbinfo->si_mntflags |= tmp;
24568+ bend = au_sbend(sb);
24569+ for (bindex = 0; bindex <= bend; bindex++) {
24570+ br = au_sbr(sb, bindex);
24571+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
24572+ if (unlikely(err))
24573+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
24574+ bindex, err);
24575+ /* go on even if err */
24576+ }
4a4d8108 24577+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
5527c038 24578+ dir = d_inode(sb->s_root);
4a4d8108 24579+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 24580+ }
24581+
4f0767ce 24582+out:
1facf9fc 24583+ return err;
24584+}
24585+
24586+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
24587+{
24588+ int err, rerr;
79b8bda9 24589+ unsigned char no_dreval;
1facf9fc 24590+ struct inode *dir;
24591+ struct au_opt_xino *opt_xino;
24592+ struct au_opt *opt;
24593+ struct au_sbinfo *sbinfo;
24594+
dece6358
AM
24595+ SiMustWriteLock(sb);
24596+
79b8bda9 24597+ err = 0;
5527c038 24598+ dir = d_inode(sb->s_root);
1facf9fc 24599+ sbinfo = au_sbi(sb);
1facf9fc 24600+ opt_xino = NULL;
24601+ opt = opts->opt;
24602+ while (err >= 0 && opt->type != Opt_tail) {
24603+ err = au_opt_simple(sb, opt, opts);
24604+ if (!err)
24605+ err = au_opt_br(sb, opt, opts);
24606+ if (!err)
24607+ err = au_opt_xino(sb, opt, &opt_xino, opts);
24608+ opt++;
24609+ }
24610+ if (err > 0)
24611+ err = 0;
24612+ AuTraceErr(err);
24613+ /* go on even err */
24614+
79b8bda9 24615+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 24616+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
24617+ if (unlikely(rerr && !err))
24618+ err = rerr;
24619+
79b8bda9 24620+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
b95c5147 24621+ au_fset_opts(opts->flags, REFRESH_IDOP);
79b8bda9 24622+
1facf9fc 24623+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
24624+ rerr = au_xib_trunc(sb);
24625+ if (unlikely(rerr && !err))
24626+ err = rerr;
24627+ }
24628+
24629+ /* will be handled by the caller */
027c5e7a 24630+ if (!au_ftest_opts(opts->flags, REFRESH)
79b8bda9
AM
24631+ && (opts->given_udba
24632+ || au_opt_test(sbinfo->si_mntflags, XINO)
b95c5147 24633+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
79b8bda9 24634+ ))
027c5e7a 24635+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24636+
24637+ AuDbg("status 0x%x\n", opts->flags);
24638+ return err;
24639+}
24640+
24641+/* ---------------------------------------------------------------------- */
24642+
24643+unsigned int au_opt_udba(struct super_block *sb)
24644+{
24645+ return au_mntflags(sb) & AuOptMask_UDBA;
24646+}
7f207e10
AM
24647diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
24648--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 24649+++ linux/fs/aufs/opts.h 2016-02-28 11:26:32.573304539 +0100
79b8bda9 24650@@ -0,0 +1,211 @@
1facf9fc 24651+/*
8cdd5066 24652+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 24653+ *
24654+ * This program, aufs is free software; you can redistribute it and/or modify
24655+ * it under the terms of the GNU General Public License as published by
24656+ * the Free Software Foundation; either version 2 of the License, or
24657+ * (at your option) any later version.
dece6358
AM
24658+ *
24659+ * This program is distributed in the hope that it will be useful,
24660+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24661+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24662+ * GNU General Public License for more details.
24663+ *
24664+ * You should have received a copy of the GNU General Public License
523b37e3 24665+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24666+ */
24667+
24668+/*
24669+ * mount options/flags
24670+ */
24671+
24672+#ifndef __AUFS_OPTS_H__
24673+#define __AUFS_OPTS_H__
24674+
24675+#ifdef __KERNEL__
24676+
dece6358 24677+#include <linux/path.h>
1facf9fc 24678+
dece6358
AM
24679+struct file;
24680+struct super_block;
24681+
1facf9fc 24682+/* ---------------------------------------------------------------------- */
24683+
24684+/* mount flags */
24685+#define AuOpt_XINO 1 /* external inode number bitmap
24686+ and translation table */
24687+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
24688+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
24689+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 24690+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
24691+#define AuOpt_SHWH (1 << 5) /* show whiteout */
24692+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
24693+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
24694+ bits */
dece6358
AM
24695+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
24696+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
24697+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
24698+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
24699+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 24700+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 24701+
4a4d8108
AM
24702+#ifndef CONFIG_AUFS_HNOTIFY
24703+#undef AuOpt_UDBA_HNOTIFY
24704+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 24705+#endif
dece6358
AM
24706+#ifndef CONFIG_AUFS_SHWH
24707+#undef AuOpt_SHWH
24708+#define AuOpt_SHWH 0
24709+#endif
1facf9fc 24710+
24711+#define AuOpt_Def (AuOpt_XINO \
24712+ | AuOpt_UDBA_REVAL \
24713+ | AuOpt_PLINK \
24714+ /* | AuOpt_DIRPERM1 */ \
24715+ | AuOpt_WARN_PERM)
24716+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
24717+ | AuOpt_UDBA_REVAL \
4a4d8108 24718+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 24719+
24720+#define au_opt_test(flags, name) (flags & AuOpt_##name)
24721+#define au_opt_set(flags, name) do { \
24722+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
24723+ ((flags) |= AuOpt_##name); \
24724+} while (0)
24725+#define au_opt_set_udba(flags, name) do { \
24726+ (flags) &= ~AuOptMask_UDBA; \
24727+ ((flags) |= AuOpt_##name); \
24728+} while (0)
7f207e10
AM
24729+#define au_opt_clr(flags, name) do { \
24730+ ((flags) &= ~AuOpt_##name); \
24731+} while (0)
1facf9fc 24732+
e49829fe
JR
24733+static inline unsigned int au_opts_plink(unsigned int mntflags)
24734+{
24735+#ifdef CONFIG_PROC_FS
24736+ return mntflags;
24737+#else
24738+ return mntflags & ~AuOpt_PLINK;
24739+#endif
24740+}
24741+
1facf9fc 24742+/* ---------------------------------------------------------------------- */
24743+
24744+/* policies to select one among multiple writable branches */
24745+enum {
24746+ AuWbrCreate_TDP, /* top down parent */
24747+ AuWbrCreate_RR, /* round robin */
24748+ AuWbrCreate_MFS, /* most free space */
24749+ AuWbrCreate_MFSV, /* mfs with seconds */
24750+ AuWbrCreate_MFSRR, /* mfs then rr */
24751+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
24752+ AuWbrCreate_PMFS, /* parent and mfs */
24753+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
24754+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
24755+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 24756+
24757+ AuWbrCreate_Def = AuWbrCreate_TDP
24758+};
24759+
24760+enum {
24761+ AuWbrCopyup_TDP, /* top down parent */
24762+ AuWbrCopyup_BUP, /* bottom up parent */
24763+ AuWbrCopyup_BU, /* bottom up */
24764+
24765+ AuWbrCopyup_Def = AuWbrCopyup_TDP
24766+};
24767+
24768+/* ---------------------------------------------------------------------- */
24769+
24770+struct au_opt_add {
24771+ aufs_bindex_t bindex;
24772+ char *pathname;
24773+ int perm;
24774+ struct path path;
24775+};
24776+
24777+struct au_opt_del {
24778+ char *pathname;
24779+ struct path h_path;
24780+};
24781+
24782+struct au_opt_mod {
24783+ char *path;
24784+ int perm;
24785+ struct dentry *h_root;
24786+};
24787+
24788+struct au_opt_xino {
24789+ char *path;
24790+ struct file *file;
24791+};
24792+
24793+struct au_opt_xino_itrunc {
24794+ aufs_bindex_t bindex;
24795+};
24796+
24797+struct au_opt_wbr_create {
24798+ int wbr_create;
24799+ int mfs_second;
24800+ unsigned long long mfsrr_watermark;
24801+};
24802+
24803+struct au_opt {
24804+ int type;
24805+ union {
24806+ struct au_opt_xino xino;
24807+ struct au_opt_xino_itrunc xino_itrunc;
24808+ struct au_opt_add add;
24809+ struct au_opt_del del;
24810+ struct au_opt_mod mod;
24811+ int dirwh;
24812+ int rdcache;
24813+ unsigned int rdblk;
24814+ unsigned int rdhash;
24815+ int udba;
24816+ struct au_opt_wbr_create wbr_create;
24817+ int wbr_copyup;
076b876e 24818+ unsigned int fhsm_second;
1facf9fc 24819+ };
24820+};
24821+
24822+/* opts flags */
24823+#define AuOpts_REMOUNT 1
027c5e7a
AM
24824+#define AuOpts_REFRESH (1 << 1)
24825+#define AuOpts_TRUNC_XIB (1 << 2)
24826+#define AuOpts_REFRESH_DYAOP (1 << 3)
b95c5147 24827+#define AuOpts_REFRESH_IDOP (1 << 4)
1facf9fc 24828+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
24829+#define au_fset_opts(flags, name) \
24830+ do { (flags) |= AuOpts_##name; } while (0)
24831+#define au_fclr_opts(flags, name) \
24832+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 24833+
24834+struct au_opts {
24835+ struct au_opt *opt;
24836+ int max_opt;
24837+
24838+ unsigned int given_udba;
24839+ unsigned int flags;
24840+ unsigned long sb_flags;
24841+};
24842+
24843+/* ---------------------------------------------------------------------- */
24844+
7e9cd9fe 24845+/* opts.c */
076b876e 24846+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 24847+const char *au_optstr_udba(int udba);
24848+const char *au_optstr_wbr_copyup(int wbr_copyup);
24849+const char *au_optstr_wbr_create(int wbr_create);
24850+
24851+void au_opts_free(struct au_opts *opts);
24852+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
24853+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24854+ unsigned int pending);
24855+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
24856+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
24857+
24858+unsigned int au_opt_udba(struct super_block *sb);
24859+
1facf9fc 24860+#endif /* __KERNEL__ */
24861+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
24862diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
24863--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 24864+++ linux/fs/aufs/plink.c 2016-02-28 11:26:32.573304539 +0100
5527c038 24865@@ -0,0 +1,528 @@
1facf9fc 24866+/*
8cdd5066 24867+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 24868+ *
24869+ * This program, aufs is free software; you can redistribute it and/or modify
24870+ * it under the terms of the GNU General Public License as published by
24871+ * the Free Software Foundation; either version 2 of the License, or
24872+ * (at your option) any later version.
dece6358
AM
24873+ *
24874+ * This program is distributed in the hope that it will be useful,
24875+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24876+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24877+ * GNU General Public License for more details.
24878+ *
24879+ * You should have received a copy of the GNU General Public License
523b37e3 24880+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24881+ */
24882+
24883+/*
24884+ * pseudo-link
24885+ */
24886+
24887+#include "aufs.h"
24888+
24889+/*
e49829fe 24890+ * the pseudo-link maintenance mode.
1facf9fc 24891+ * during a user process maintains the pseudo-links,
24892+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
24893+ *
24894+ * Flags
24895+ * NOPLM:
24896+ * For entry functions which will handle plink, and i_mutex is already held
24897+ * in VFS.
24898+ * They cannot wait and should return an error at once.
24899+ * Callers has to check the error.
24900+ * NOPLMW:
24901+ * For entry functions which will handle plink, but i_mutex is not held
24902+ * in VFS.
24903+ * They can wait the plink maintenance mode to finish.
24904+ *
24905+ * They behave like F_SETLK and F_SETLKW.
24906+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 24907+ */
e49829fe
JR
24908+
24909+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 24910+{
e49829fe
JR
24911+ int err;
24912+ pid_t pid, ppid;
24913+ struct au_sbinfo *sbi;
dece6358
AM
24914+
24915+ SiMustAnyLock(sb);
24916+
e49829fe
JR
24917+ err = 0;
24918+ if (!au_opt_test(au_mntflags(sb), PLINK))
24919+ goto out;
24920+
24921+ sbi = au_sbi(sb);
24922+ pid = sbi->si_plink_maint_pid;
24923+ if (!pid || pid == current->pid)
24924+ goto out;
24925+
24926+ /* todo: it highly depends upon /sbin/mount.aufs */
24927+ rcu_read_lock();
24928+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
24929+ rcu_read_unlock();
24930+ if (pid == ppid)
24931+ goto out;
24932+
24933+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
24934+ /* if there is no i_mutex lock in VFS, we don't need to wait */
24935+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
24936+ while (sbi->si_plink_maint_pid) {
24937+ si_read_unlock(sb);
24938+ /* gave up wake_up_bit() */
24939+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
24940+
24941+ if (au_ftest_lock(flags, FLUSH))
24942+ au_nwt_flush(&sbi->si_nowait);
24943+ si_noflush_read_lock(sb);
24944+ }
24945+ } else if (au_ftest_lock(flags, NOPLM)) {
24946+ AuDbg("ppid %d, pid %d\n", ppid, pid);
24947+ err = -EAGAIN;
24948+ }
24949+
24950+out:
24951+ return err;
4a4d8108
AM
24952+}
24953+
e49829fe 24954+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 24955+{
4a4d8108 24956+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 24957+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 24958+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 24959+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
24960+}
24961+
e49829fe 24962+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
24963+{
24964+ int err;
4a4d8108
AM
24965+ struct au_sbinfo *sbinfo;
24966+
24967+ err = 0;
4a4d8108
AM
24968+ sbinfo = au_sbi(sb);
24969+ /* make sure i am the only one in this fs */
e49829fe
JR
24970+ si_write_lock(sb, AuLock_FLUSH);
24971+ if (au_opt_test(au_mntflags(sb), PLINK)) {
24972+ spin_lock(&sbinfo->si_plink_maint_lock);
24973+ if (!sbinfo->si_plink_maint_pid)
24974+ sbinfo->si_plink_maint_pid = current->pid;
24975+ else
24976+ err = -EBUSY;
24977+ spin_unlock(&sbinfo->si_plink_maint_lock);
24978+ }
4a4d8108
AM
24979+ si_write_unlock(sb);
24980+
24981+ return err;
1facf9fc 24982+}
24983+
24984+/* ---------------------------------------------------------------------- */
24985+
1facf9fc 24986+#ifdef CONFIG_AUFS_DEBUG
24987+void au_plink_list(struct super_block *sb)
24988+{
86dc4139 24989+ int i;
1facf9fc 24990+ struct au_sbinfo *sbinfo;
86dc4139 24991+ struct hlist_head *plink_hlist;
1facf9fc 24992+ struct pseudo_link *plink;
24993+
dece6358
AM
24994+ SiMustAnyLock(sb);
24995+
1facf9fc 24996+ sbinfo = au_sbi(sb);
24997+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 24998+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 24999+
86dc4139
AM
25000+ for (i = 0; i < AuPlink_NHASH; i++) {
25001+ plink_hlist = &sbinfo->si_plink[i].head;
25002+ rcu_read_lock();
25003+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
25004+ AuDbg("%lu\n", plink->inode->i_ino);
25005+ rcu_read_unlock();
25006+ }
1facf9fc 25007+}
25008+#endif
25009+
25010+/* is the inode pseudo-linked? */
25011+int au_plink_test(struct inode *inode)
25012+{
86dc4139 25013+ int found, i;
1facf9fc 25014+ struct au_sbinfo *sbinfo;
86dc4139 25015+ struct hlist_head *plink_hlist;
1facf9fc 25016+ struct pseudo_link *plink;
25017+
25018+ sbinfo = au_sbi(inode->i_sb);
dece6358 25019+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 25020+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 25021+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 25022+
25023+ found = 0;
86dc4139
AM
25024+ i = au_plink_hash(inode->i_ino);
25025+ plink_hlist = &sbinfo->si_plink[i].head;
4a4d8108 25026+ rcu_read_lock();
86dc4139 25027+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
1facf9fc 25028+ if (plink->inode == inode) {
25029+ found = 1;
25030+ break;
25031+ }
4a4d8108 25032+ rcu_read_unlock();
1facf9fc 25033+ return found;
25034+}
25035+
25036+/* ---------------------------------------------------------------------- */
25037+
25038+/*
25039+ * generate a name for plink.
25040+ * the file will be stored under AUFS_WH_PLINKDIR.
25041+ */
25042+/* 20 is max digits length of ulong 64 */
25043+#define PLINK_NAME_LEN ((20 + 1) * 2)
25044+
25045+static int plink_name(char *name, int len, struct inode *inode,
25046+ aufs_bindex_t bindex)
25047+{
25048+ int rlen;
25049+ struct inode *h_inode;
25050+
25051+ h_inode = au_h_iptr(inode, bindex);
25052+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
25053+ return rlen;
25054+}
25055+
7f207e10
AM
25056+struct au_do_plink_lkup_args {
25057+ struct dentry **errp;
25058+ struct qstr *tgtname;
25059+ struct dentry *h_parent;
25060+ struct au_branch *br;
25061+};
25062+
25063+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
25064+ struct dentry *h_parent,
25065+ struct au_branch *br)
25066+{
25067+ struct dentry *h_dentry;
25068+ struct mutex *h_mtx;
25069+
5527c038 25070+ h_mtx = &d_inode(h_parent)->i_mutex;
7f207e10 25071+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
b4510431 25072+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
7f207e10
AM
25073+ mutex_unlock(h_mtx);
25074+ return h_dentry;
25075+}
25076+
25077+static void au_call_do_plink_lkup(void *args)
25078+{
25079+ struct au_do_plink_lkup_args *a = args;
25080+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
25081+}
25082+
1facf9fc 25083+/* lookup the plink-ed @inode under the branch at @bindex */
25084+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
25085+{
25086+ struct dentry *h_dentry, *h_parent;
25087+ struct au_branch *br;
7f207e10 25088+ int wkq_err;
1facf9fc 25089+ char a[PLINK_NAME_LEN];
0c3ec466 25090+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25091+
e49829fe
JR
25092+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
25093+
1facf9fc 25094+ br = au_sbr(inode->i_sb, bindex);
25095+ h_parent = br->br_wbr->wbr_plink;
1facf9fc 25096+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25097+
2dfbb274 25098+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
25099+ struct au_do_plink_lkup_args args = {
25100+ .errp = &h_dentry,
25101+ .tgtname = &tgtname,
25102+ .h_parent = h_parent,
25103+ .br = br
25104+ };
25105+
25106+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
25107+ if (unlikely(wkq_err))
25108+ h_dentry = ERR_PTR(wkq_err);
25109+ } else
25110+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
25111+
1facf9fc 25112+ return h_dentry;
25113+}
25114+
25115+/* create a pseudo-link */
25116+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
25117+ struct dentry *h_dentry, struct au_branch *br)
25118+{
25119+ int err;
25120+ struct path h_path = {
86dc4139 25121+ .mnt = au_br_mnt(br)
1facf9fc 25122+ };
523b37e3 25123+ struct inode *h_dir, *delegated;
1facf9fc 25124+
5527c038 25125+ h_dir = d_inode(h_parent);
7f207e10 25126+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 25127+again:
b4510431 25128+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 25129+ err = PTR_ERR(h_path.dentry);
25130+ if (IS_ERR(h_path.dentry))
25131+ goto out;
25132+
25133+ err = 0;
25134+ /* wh.plink dir is not monitored */
7f207e10 25135+ /* todo: is it really safe? */
5527c038
JR
25136+ if (d_is_positive(h_path.dentry)
25137+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
523b37e3
AM
25138+ delegated = NULL;
25139+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25140+ if (unlikely(err == -EWOULDBLOCK)) {
25141+ pr_warn("cannot retry for NFSv4 delegation"
25142+ " for an internal unlink\n");
25143+ iput(delegated);
25144+ }
1facf9fc 25145+ dput(h_path.dentry);
25146+ h_path.dentry = NULL;
25147+ if (!err)
25148+ goto again;
25149+ }
5527c038 25150+ if (!err && d_is_negative(h_path.dentry)) {
523b37e3
AM
25151+ delegated = NULL;
25152+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25153+ if (unlikely(err == -EWOULDBLOCK)) {
25154+ pr_warn("cannot retry for NFSv4 delegation"
25155+ " for an internal link\n");
25156+ iput(delegated);
25157+ }
25158+ }
1facf9fc 25159+ dput(h_path.dentry);
25160+
4f0767ce 25161+out:
7f207e10 25162+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 25163+ return err;
25164+}
25165+
25166+struct do_whplink_args {
25167+ int *errp;
25168+ struct qstr *tgt;
25169+ struct dentry *h_parent;
25170+ struct dentry *h_dentry;
25171+ struct au_branch *br;
25172+};
25173+
25174+static void call_do_whplink(void *args)
25175+{
25176+ struct do_whplink_args *a = args;
25177+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25178+}
25179+
25180+static int whplink(struct dentry *h_dentry, struct inode *inode,
25181+ aufs_bindex_t bindex, struct au_branch *br)
25182+{
25183+ int err, wkq_err;
25184+ struct au_wbr *wbr;
25185+ struct dentry *h_parent;
1facf9fc 25186+ char a[PLINK_NAME_LEN];
0c3ec466 25187+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25188+
25189+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25190+ h_parent = wbr->wbr_plink;
1facf9fc 25191+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25192+
25193+ /* always superio. */
2dfbb274 25194+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 25195+ struct do_whplink_args args = {
25196+ .errp = &err,
25197+ .tgt = &tgtname,
25198+ .h_parent = h_parent,
25199+ .h_dentry = h_dentry,
25200+ .br = br
25201+ };
25202+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25203+ if (unlikely(wkq_err))
25204+ err = wkq_err;
25205+ } else
25206+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 25207+
25208+ return err;
25209+}
25210+
25211+/* free a single plink */
25212+static void do_put_plink(struct pseudo_link *plink, int do_del)
25213+{
1facf9fc 25214+ if (do_del)
86dc4139 25215+ hlist_del(&plink->hlist);
4a4d8108
AM
25216+ iput(plink->inode);
25217+ kfree(plink);
25218+}
25219+
25220+static void do_put_plink_rcu(struct rcu_head *rcu)
25221+{
25222+ struct pseudo_link *plink;
25223+
25224+ plink = container_of(rcu, struct pseudo_link, rcu);
25225+ iput(plink->inode);
1facf9fc 25226+ kfree(plink);
25227+}
25228+
25229+/*
25230+ * create a new pseudo-link for @h_dentry on @bindex.
25231+ * the linked inode is held in aufs @inode.
25232+ */
25233+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25234+ struct dentry *h_dentry)
25235+{
25236+ struct super_block *sb;
25237+ struct au_sbinfo *sbinfo;
86dc4139 25238+ struct hlist_head *plink_hlist;
4a4d8108 25239+ struct pseudo_link *plink, *tmp;
86dc4139
AM
25240+ struct au_sphlhead *sphl;
25241+ int found, err, cnt, i;
1facf9fc 25242+
25243+ sb = inode->i_sb;
25244+ sbinfo = au_sbi(sb);
25245+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25246+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25247+
86dc4139 25248+ found = au_plink_test(inode);
4a4d8108 25249+ if (found)
1facf9fc 25250+ return;
4a4d8108 25251+
86dc4139
AM
25252+ i = au_plink_hash(inode->i_ino);
25253+ sphl = sbinfo->si_plink + i;
25254+ plink_hlist = &sphl->head;
4a4d8108
AM
25255+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
25256+ if (tmp)
25257+ tmp->inode = au_igrab(inode);
25258+ else {
25259+ err = -ENOMEM;
25260+ goto out;
1facf9fc 25261+ }
25262+
86dc4139
AM
25263+ spin_lock(&sphl->spin);
25264+ hlist_for_each_entry(plink, plink_hlist, hlist) {
4a4d8108
AM
25265+ if (plink->inode == inode) {
25266+ found = 1;
25267+ break;
25268+ }
1facf9fc 25269+ }
4a4d8108 25270+ if (!found)
86dc4139
AM
25271+ hlist_add_head_rcu(&tmp->hlist, plink_hlist);
25272+ spin_unlock(&sphl->spin);
4a4d8108 25273+ if (!found) {
86dc4139
AM
25274+ cnt = au_sphl_count(sphl);
25275+#define msg "unexpectedly unblanced or too many pseudo-links"
25276+ if (cnt > AUFS_PLINK_WARN)
25277+ AuWarn1(msg ", %d\n", cnt);
25278+#undef msg
1facf9fc 25279+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
25280+ } else {
25281+ do_put_plink(tmp, 0);
25282+ return;
1facf9fc 25283+ }
25284+
4a4d8108 25285+out:
1facf9fc 25286+ if (unlikely(err)) {
0c3ec466 25287+ pr_warn("err %d, damaged pseudo link.\n", err);
4a4d8108 25288+ if (tmp) {
86dc4139 25289+ au_sphl_del_rcu(&tmp->hlist, sphl);
4a4d8108
AM
25290+ call_rcu(&tmp->rcu, do_put_plink_rcu);
25291+ }
1facf9fc 25292+ }
25293+}
25294+
25295+/* free all plinks */
e49829fe 25296+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 25297+{
86dc4139 25298+ int i, warned;
1facf9fc 25299+ struct au_sbinfo *sbinfo;
86dc4139
AM
25300+ struct hlist_head *plink_hlist;
25301+ struct hlist_node *tmp;
25302+ struct pseudo_link *plink;
1facf9fc 25303+
dece6358
AM
25304+ SiMustWriteLock(sb);
25305+
1facf9fc 25306+ sbinfo = au_sbi(sb);
25307+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25308+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25309+
1facf9fc 25310+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25311+ warned = 0;
25312+ for (i = 0; i < AuPlink_NHASH; i++) {
25313+ plink_hlist = &sbinfo->si_plink[i].head;
25314+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25315+ pr_warn("pseudo-link is not flushed");
25316+ warned = 1;
25317+ }
25318+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist)
25319+ do_put_plink(plink, 0);
25320+ INIT_HLIST_HEAD(plink_hlist);
25321+ }
1facf9fc 25322+}
25323+
e49829fe
JR
25324+void au_plink_clean(struct super_block *sb, int verbose)
25325+{
25326+ struct dentry *root;
25327+
25328+ root = sb->s_root;
25329+ aufs_write_lock(root);
25330+ if (au_opt_test(au_mntflags(sb), PLINK))
25331+ au_plink_put(sb, verbose);
25332+ aufs_write_unlock(root);
25333+}
25334+
86dc4139
AM
25335+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25336+{
25337+ int do_put;
25338+ aufs_bindex_t bstart, bend, bindex;
25339+
25340+ do_put = 0;
25341+ bstart = au_ibstart(inode);
25342+ bend = au_ibend(inode);
25343+ if (bstart >= 0) {
25344+ for (bindex = bstart; bindex <= bend; bindex++) {
25345+ if (!au_h_iptr(inode, bindex)
25346+ || au_ii_br_id(inode, bindex) != br_id)
25347+ continue;
25348+ au_set_h_iptr(inode, bindex, NULL, 0);
25349+ do_put = 1;
25350+ break;
25351+ }
25352+ if (do_put)
25353+ for (bindex = bstart; bindex <= bend; bindex++)
25354+ if (au_h_iptr(inode, bindex)) {
25355+ do_put = 0;
25356+ break;
25357+ }
25358+ } else
25359+ do_put = 1;
25360+
25361+ return do_put;
25362+}
25363+
1facf9fc 25364+/* free the plinks on a branch specified by @br_id */
25365+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25366+{
25367+ struct au_sbinfo *sbinfo;
86dc4139
AM
25368+ struct hlist_head *plink_hlist;
25369+ struct hlist_node *tmp;
25370+ struct pseudo_link *plink;
1facf9fc 25371+ struct inode *inode;
86dc4139 25372+ int i, do_put;
1facf9fc 25373+
dece6358
AM
25374+ SiMustWriteLock(sb);
25375+
1facf9fc 25376+ sbinfo = au_sbi(sb);
25377+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25378+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25379+
1facf9fc 25380+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25381+ for (i = 0; i < AuPlink_NHASH; i++) {
25382+ plink_hlist = &sbinfo->si_plink[i].head;
25383+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) {
25384+ inode = au_igrab(plink->inode);
25385+ ii_write_lock_child(inode);
25386+ do_put = au_plink_do_half_refresh(inode, br_id);
dece6358
AM
25387+ if (do_put)
25388+ do_put_plink(plink, 1);
86dc4139
AM
25389+ ii_write_unlock(inode);
25390+ iput(inode);
dece6358 25391+ }
dece6358
AM
25392+ }
25393+}
7f207e10
AM
25394diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25395--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 25396+++ linux/fs/aufs/poll.c 2016-02-28 11:26:32.573304539 +0100
b912730e 25397@@ -0,0 +1,52 @@
dece6358 25398+/*
8cdd5066 25399+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
25400+ *
25401+ * This program, aufs is free software; you can redistribute it and/or modify
25402+ * it under the terms of the GNU General Public License as published by
25403+ * the Free Software Foundation; either version 2 of the License, or
25404+ * (at your option) any later version.
25405+ *
25406+ * This program is distributed in the hope that it will be useful,
25407+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25408+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25409+ * GNU General Public License for more details.
25410+ *
25411+ * You should have received a copy of the GNU General Public License
523b37e3 25412+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
25413+ */
25414+
1308ab2a 25415+/*
25416+ * poll operation
25417+ * There is only one filesystem which implements ->poll operation, currently.
25418+ */
25419+
25420+#include "aufs.h"
25421+
25422+unsigned int aufs_poll(struct file *file, poll_table *wait)
25423+{
25424+ unsigned int mask;
25425+ int err;
25426+ struct file *h_file;
1308ab2a 25427+ struct super_block *sb;
25428+
25429+ /* We should pretend an error happened. */
25430+ mask = POLLERR /* | POLLIN | POLLOUT */;
b912730e 25431+ sb = file->f_path.dentry->d_sb;
e49829fe 25432+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
b912730e
AM
25433+
25434+ h_file = au_read_pre(file, /*keep_fi*/0);
25435+ err = PTR_ERR(h_file);
25436+ if (IS_ERR(h_file))
1308ab2a 25437+ goto out;
25438+
25439+ /* it is not an error if h_file has no operation */
25440+ mask = DEFAULT_POLLMASK;
523b37e3 25441+ if (h_file->f_op->poll)
1308ab2a 25442+ mask = h_file->f_op->poll(h_file, wait);
b912730e 25443+ fput(h_file); /* instead of au_read_post() */
1308ab2a 25444+
4f0767ce 25445+out:
1308ab2a 25446+ si_read_unlock(sb);
25447+ AuTraceErr((int)mask);
25448+ return mask;
25449+}
c1595e42
JR
25450diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25451--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
25452+++ linux/fs/aufs/posix_acl.c 2016-02-28 11:26:32.573304539 +0100
25453@@ -0,0 +1,98 @@
c1595e42 25454+/*
8cdd5066 25455+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
25456+ *
25457+ * This program, aufs is free software; you can redistribute it and/or modify
25458+ * it under the terms of the GNU General Public License as published by
25459+ * the Free Software Foundation; either version 2 of the License, or
25460+ * (at your option) any later version.
25461+ *
25462+ * This program is distributed in the hope that it will be useful,
25463+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25464+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25465+ * GNU General Public License for more details.
25466+ *
25467+ * You should have received a copy of the GNU General Public License
25468+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25469+ */
25470+
25471+/*
25472+ * posix acl operations
25473+ */
25474+
25475+#include <linux/fs.h>
c1595e42
JR
25476+#include "aufs.h"
25477+
25478+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25479+{
25480+ struct posix_acl *acl;
25481+ int err;
25482+ aufs_bindex_t bindex;
25483+ struct inode *h_inode;
25484+ struct super_block *sb;
25485+
25486+ acl = NULL;
25487+ sb = inode->i_sb;
25488+ si_read_lock(sb, AuLock_FLUSH);
25489+ ii_read_lock_child(inode);
25490+ if (!(sb->s_flags & MS_POSIXACL))
25491+ goto out;
25492+
25493+ bindex = au_ibstart(inode);
25494+ h_inode = au_h_iptr(inode, bindex);
25495+ if (unlikely(!h_inode
25496+ || ((h_inode->i_mode & S_IFMT)
25497+ != (inode->i_mode & S_IFMT)))) {
25498+ err = au_busy_or_stale();
25499+ acl = ERR_PTR(err);
25500+ goto out;
25501+ }
25502+
25503+ /* always topmost only */
25504+ acl = get_acl(h_inode, type);
25505+
25506+out:
25507+ ii_read_unlock(inode);
25508+ si_read_unlock(sb);
25509+
25510+ AuTraceErrPtr(acl);
25511+ return acl;
25512+}
25513+
25514+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25515+{
25516+ int err;
25517+ ssize_t ssz;
25518+ struct dentry *dentry;
25519+ struct au_srxattr arg = {
25520+ .type = AU_ACL_SET,
25521+ .u.acl_set = {
25522+ .acl = acl,
25523+ .type = type
25524+ },
25525+ };
25526+
25527+ mutex_lock(&inode->i_mutex);
25528+ if (inode->i_ino == AUFS_ROOT_INO)
25529+ dentry = dget(inode->i_sb->s_root);
25530+ else {
25531+ dentry = d_find_alias(inode);
25532+ if (!dentry)
25533+ dentry = d_find_any_alias(inode);
25534+ if (!dentry) {
25535+ pr_warn("cannot handle this inode, "
25536+ "please report to aufs-users ML\n");
25537+ err = -ENOENT;
25538+ goto out;
25539+ }
25540+ }
25541+
25542+ ssz = au_srxattr(dentry, &arg);
25543+ dput(dentry);
25544+ err = ssz;
25545+ if (ssz >= 0)
25546+ err = 0;
25547+
25548+out:
25549+ mutex_unlock(&inode->i_mutex);
25550+ return err;
25551+}
7f207e10
AM
25552diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
25553--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 25554+++ linux/fs/aufs/procfs.c 2016-02-28 11:26:32.573304539 +0100
523b37e3 25555@@ -0,0 +1,169 @@
e49829fe 25556+/*
8cdd5066 25557+ * Copyright (C) 2010-2016 Junjiro R. Okajima
e49829fe
JR
25558+ *
25559+ * This program, aufs is free software; you can redistribute it and/or modify
25560+ * it under the terms of the GNU General Public License as published by
25561+ * the Free Software Foundation; either version 2 of the License, or
25562+ * (at your option) any later version.
25563+ *
25564+ * This program is distributed in the hope that it will be useful,
25565+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25566+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25567+ * GNU General Public License for more details.
25568+ *
25569+ * You should have received a copy of the GNU General Public License
523b37e3 25570+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
25571+ */
25572+
25573+/*
25574+ * procfs interfaces
25575+ */
25576+
25577+#include <linux/proc_fs.h>
25578+#include "aufs.h"
25579+
25580+static int au_procfs_plm_release(struct inode *inode, struct file *file)
25581+{
25582+ struct au_sbinfo *sbinfo;
25583+
25584+ sbinfo = file->private_data;
25585+ if (sbinfo) {
25586+ au_plink_maint_leave(sbinfo);
25587+ kobject_put(&sbinfo->si_kobj);
25588+ }
25589+
25590+ return 0;
25591+}
25592+
25593+static void au_procfs_plm_write_clean(struct file *file)
25594+{
25595+ struct au_sbinfo *sbinfo;
25596+
25597+ sbinfo = file->private_data;
25598+ if (sbinfo)
25599+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
25600+}
25601+
25602+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
25603+{
25604+ int err;
25605+ struct super_block *sb;
25606+ struct au_sbinfo *sbinfo;
25607+
25608+ err = -EBUSY;
25609+ if (unlikely(file->private_data))
25610+ goto out;
25611+
25612+ sb = NULL;
53392da6 25613+ /* don't use au_sbilist_lock() here */
e49829fe
JR
25614+ spin_lock(&au_sbilist.spin);
25615+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
25616+ if (id == sysaufs_si_id(sbinfo)) {
25617+ kobject_get(&sbinfo->si_kobj);
25618+ sb = sbinfo->si_sb;
25619+ break;
25620+ }
25621+ spin_unlock(&au_sbilist.spin);
25622+
25623+ err = -EINVAL;
25624+ if (unlikely(!sb))
25625+ goto out;
25626+
25627+ err = au_plink_maint_enter(sb);
25628+ if (!err)
25629+ /* keep kobject_get() */
25630+ file->private_data = sbinfo;
25631+ else
25632+ kobject_put(&sbinfo->si_kobj);
25633+out:
25634+ return err;
25635+}
25636+
25637+/*
25638+ * Accept a valid "si=xxxx" only.
25639+ * Once it is accepted successfully, accept "clean" too.
25640+ */
25641+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
25642+ size_t count, loff_t *ppos)
25643+{
25644+ ssize_t err;
25645+ unsigned long id;
25646+ /* last newline is allowed */
25647+ char buf[3 + sizeof(unsigned long) * 2 + 1];
25648+
25649+ err = -EACCES;
25650+ if (unlikely(!capable(CAP_SYS_ADMIN)))
25651+ goto out;
25652+
25653+ err = -EINVAL;
25654+ if (unlikely(count > sizeof(buf)))
25655+ goto out;
25656+
25657+ err = copy_from_user(buf, ubuf, count);
25658+ if (unlikely(err)) {
25659+ err = -EFAULT;
25660+ goto out;
25661+ }
25662+ buf[count] = 0;
25663+
25664+ err = -EINVAL;
25665+ if (!strcmp("clean", buf)) {
25666+ au_procfs_plm_write_clean(file);
25667+ goto out_success;
25668+ } else if (unlikely(strncmp("si=", buf, 3)))
25669+ goto out;
25670+
9dbd164d 25671+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
25672+ if (unlikely(err))
25673+ goto out;
25674+
25675+ err = au_procfs_plm_write_si(file, id);
25676+ if (unlikely(err))
25677+ goto out;
25678+
25679+out_success:
25680+ err = count; /* success */
25681+out:
25682+ return err;
25683+}
25684+
25685+static const struct file_operations au_procfs_plm_fop = {
25686+ .write = au_procfs_plm_write,
25687+ .release = au_procfs_plm_release,
25688+ .owner = THIS_MODULE
25689+};
25690+
25691+/* ---------------------------------------------------------------------- */
25692+
25693+static struct proc_dir_entry *au_procfs_dir;
25694+
25695+void au_procfs_fin(void)
25696+{
25697+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
25698+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25699+}
25700+
25701+int __init au_procfs_init(void)
25702+{
25703+ int err;
25704+ struct proc_dir_entry *entry;
25705+
25706+ err = -ENOMEM;
25707+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
25708+ if (unlikely(!au_procfs_dir))
25709+ goto out;
25710+
25711+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
25712+ au_procfs_dir, &au_procfs_plm_fop);
25713+ if (unlikely(!entry))
25714+ goto out_dir;
25715+
25716+ err = 0;
25717+ goto out; /* success */
25718+
25719+
25720+out_dir:
25721+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25722+out:
25723+ return err;
25724+}
7f207e10
AM
25725diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
25726--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 25727+++ linux/fs/aufs/rdu.c 2016-02-28 11:26:32.573304539 +0100
523b37e3 25728@@ -0,0 +1,388 @@
1308ab2a 25729+/*
8cdd5066 25730+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1308ab2a 25731+ *
25732+ * This program, aufs is free software; you can redistribute it and/or modify
25733+ * it under the terms of the GNU General Public License as published by
25734+ * the Free Software Foundation; either version 2 of the License, or
25735+ * (at your option) any later version.
25736+ *
25737+ * This program is distributed in the hope that it will be useful,
25738+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25739+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25740+ * GNU General Public License for more details.
25741+ *
25742+ * You should have received a copy of the GNU General Public License
523b37e3 25743+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 25744+ */
25745+
25746+/*
25747+ * readdir in userspace.
25748+ */
25749+
b752ccd1 25750+#include <linux/compat.h>
4a4d8108 25751+#include <linux/fs_stack.h>
1308ab2a 25752+#include <linux/security.h>
1308ab2a 25753+#include "aufs.h"
25754+
25755+/* bits for struct aufs_rdu.flags */
25756+#define AuRdu_CALLED 1
25757+#define AuRdu_CONT (1 << 1)
25758+#define AuRdu_FULL (1 << 2)
25759+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
25760+#define au_fset_rdu(flags, name) \
25761+ do { (flags) |= AuRdu_##name; } while (0)
25762+#define au_fclr_rdu(flags, name) \
25763+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 25764+
25765+struct au_rdu_arg {
392086de 25766+ struct dir_context ctx;
1308ab2a 25767+ struct aufs_rdu *rdu;
25768+ union au_rdu_ent_ul ent;
25769+ unsigned long end;
25770+
25771+ struct super_block *sb;
25772+ int err;
25773+};
25774+
392086de 25775+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 25776+ loff_t offset, u64 h_ino, unsigned int d_type)
25777+{
25778+ int err, len;
392086de 25779+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 25780+ struct aufs_rdu *rdu = arg->rdu;
25781+ struct au_rdu_ent ent;
25782+
25783+ err = 0;
25784+ arg->err = 0;
25785+ au_fset_rdu(rdu->cookie.flags, CALLED);
25786+ len = au_rdu_len(nlen);
25787+ if (arg->ent.ul + len < arg->end) {
25788+ ent.ino = h_ino;
25789+ ent.bindex = rdu->cookie.bindex;
25790+ ent.type = d_type;
25791+ ent.nlen = nlen;
4a4d8108
AM
25792+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25793+ ent.type = DT_UNKNOWN;
1308ab2a 25794+
9dbd164d 25795+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25796+ err = -EFAULT;
25797+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
25798+ goto out;
25799+ if (copy_to_user(arg->ent.e->name, name, nlen))
25800+ goto out;
25801+ /* the terminating NULL */
25802+ if (__put_user(0, arg->ent.e->name + nlen))
25803+ goto out;
25804+ err = 0;
25805+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
25806+ arg->ent.ul += len;
25807+ rdu->rent++;
25808+ } else {
25809+ err = -EFAULT;
25810+ au_fset_rdu(rdu->cookie.flags, FULL);
25811+ rdu->full = 1;
25812+ rdu->tail = arg->ent;
25813+ }
25814+
4f0767ce 25815+out:
1308ab2a 25816+ /* AuTraceErr(err); */
25817+ return err;
25818+}
25819+
25820+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
25821+{
25822+ int err;
25823+ loff_t offset;
25824+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
25825+
92d182d2 25826+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 25827+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
25828+ err = offset;
25829+ if (unlikely(offset != cookie->h_pos))
25830+ goto out;
25831+
25832+ err = 0;
25833+ do {
25834+ arg->err = 0;
25835+ au_fclr_rdu(cookie->flags, CALLED);
25836+ /* smp_mb(); */
392086de 25837+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 25838+ if (err >= 0)
25839+ err = arg->err;
25840+ } while (!err
25841+ && au_ftest_rdu(cookie->flags, CALLED)
25842+ && !au_ftest_rdu(cookie->flags, FULL));
25843+ cookie->h_pos = h_file->f_pos;
25844+
4f0767ce 25845+out:
1308ab2a 25846+ AuTraceErr(err);
25847+ return err;
25848+}
25849+
25850+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
25851+{
25852+ int err;
25853+ aufs_bindex_t bend;
392086de
AM
25854+ struct au_rdu_arg arg = {
25855+ .ctx = {
2000de60 25856+ .actor = au_rdu_fill
392086de
AM
25857+ }
25858+ };
1308ab2a 25859+ struct dentry *dentry;
25860+ struct inode *inode;
25861+ struct file *h_file;
25862+ struct au_rdu_cookie *cookie = &rdu->cookie;
25863+
25864+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
25865+ if (unlikely(err)) {
25866+ err = -EFAULT;
25867+ AuTraceErr(err);
25868+ goto out;
25869+ }
25870+ rdu->rent = 0;
25871+ rdu->tail = rdu->ent;
25872+ rdu->full = 0;
25873+ arg.rdu = rdu;
25874+ arg.ent = rdu->ent;
25875+ arg.end = arg.ent.ul;
25876+ arg.end += rdu->sz;
25877+
25878+ err = -ENOTDIR;
523b37e3 25879+ if (unlikely(!file->f_op->iterate))
1308ab2a 25880+ goto out;
25881+
25882+ err = security_file_permission(file, MAY_READ);
25883+ AuTraceErr(err);
25884+ if (unlikely(err))
25885+ goto out;
25886+
2000de60 25887+ dentry = file->f_path.dentry;
5527c038 25888+ inode = d_inode(dentry);
1308ab2a 25889+#if 1
25890+ mutex_lock(&inode->i_mutex);
25891+#else
25892+ err = mutex_lock_killable(&inode->i_mutex);
25893+ AuTraceErr(err);
25894+ if (unlikely(err))
25895+ goto out;
25896+#endif
1308ab2a 25897+
25898+ arg.sb = inode->i_sb;
e49829fe
JR
25899+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
25900+ if (unlikely(err))
25901+ goto out_mtx;
027c5e7a
AM
25902+ err = au_alive_dir(dentry);
25903+ if (unlikely(err))
25904+ goto out_si;
e49829fe 25905+ /* todo: reval? */
1308ab2a 25906+ fi_read_lock(file);
25907+
25908+ err = -EAGAIN;
25909+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
25910+ && cookie->generation != au_figen(file)))
25911+ goto out_unlock;
25912+
25913+ err = 0;
25914+ if (!rdu->blk) {
25915+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
25916+ if (!rdu->blk)
25917+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
25918+ }
25919+ bend = au_fbstart(file);
25920+ if (cookie->bindex < bend)
25921+ cookie->bindex = bend;
4a4d8108 25922+ bend = au_fbend_dir(file);
1308ab2a 25923+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
25924+ for (; !err && cookie->bindex <= bend;
25925+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 25926+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 25927+ if (!h_file)
25928+ continue;
25929+
25930+ au_fclr_rdu(cookie->flags, FULL);
25931+ err = au_rdu_do(h_file, &arg);
25932+ AuTraceErr(err);
25933+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
25934+ break;
25935+ }
25936+ AuDbg("rent %llu\n", rdu->rent);
25937+
25938+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
25939+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
25940+ au_fset_rdu(cookie->flags, CONT);
25941+ cookie->generation = au_figen(file);
25942+ }
25943+
25944+ ii_read_lock_child(inode);
25945+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
25946+ ii_read_unlock(inode);
25947+
4f0767ce 25948+out_unlock:
1308ab2a 25949+ fi_read_unlock(file);
027c5e7a 25950+out_si:
1308ab2a 25951+ si_read_unlock(arg.sb);
4f0767ce 25952+out_mtx:
1308ab2a 25953+ mutex_unlock(&inode->i_mutex);
4f0767ce 25954+out:
1308ab2a 25955+ AuTraceErr(err);
25956+ return err;
25957+}
25958+
25959+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
25960+{
25961+ int err;
25962+ ino_t ino;
25963+ unsigned long long nent;
25964+ union au_rdu_ent_ul *u;
25965+ struct au_rdu_ent ent;
25966+ struct super_block *sb;
25967+
25968+ err = 0;
25969+ nent = rdu->nent;
25970+ u = &rdu->ent;
2000de60 25971+ sb = file->f_path.dentry->d_sb;
1308ab2a 25972+ si_read_lock(sb, AuLock_FLUSH);
25973+ while (nent-- > 0) {
9dbd164d 25974+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25975+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
25976+ if (!err)
25977+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 25978+ if (unlikely(err)) {
25979+ err = -EFAULT;
25980+ AuTraceErr(err);
25981+ break;
25982+ }
25983+
25984+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
25985+ if (!ent.wh)
25986+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
25987+ else
25988+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
25989+ &ino);
25990+ if (unlikely(err)) {
25991+ AuTraceErr(err);
25992+ break;
25993+ }
25994+
25995+ err = __put_user(ino, &u->e->ino);
25996+ if (unlikely(err)) {
25997+ err = -EFAULT;
25998+ AuTraceErr(err);
25999+ break;
26000+ }
26001+ u->ul += au_rdu_len(ent.nlen);
26002+ }
26003+ si_read_unlock(sb);
26004+
26005+ return err;
26006+}
26007+
26008+/* ---------------------------------------------------------------------- */
26009+
26010+static int au_rdu_verify(struct aufs_rdu *rdu)
26011+{
b752ccd1 26012+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 26013+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 26014+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 26015+ rdu->blk,
26016+ rdu->rent, rdu->shwh, rdu->full,
26017+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
26018+ rdu->cookie.generation);
dece6358 26019+
b752ccd1 26020+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 26021+ return 0;
dece6358 26022+
b752ccd1
AM
26023+ AuDbg("%u:%u\n",
26024+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 26025+ return -EINVAL;
26026+}
26027+
26028+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 26029+{
1308ab2a 26030+ long err, e;
26031+ struct aufs_rdu rdu;
26032+ void __user *p = (void __user *)arg;
dece6358 26033+
1308ab2a 26034+ err = copy_from_user(&rdu, p, sizeof(rdu));
26035+ if (unlikely(err)) {
26036+ err = -EFAULT;
26037+ AuTraceErr(err);
26038+ goto out;
26039+ }
26040+ err = au_rdu_verify(&rdu);
dece6358
AM
26041+ if (unlikely(err))
26042+ goto out;
26043+
1308ab2a 26044+ switch (cmd) {
26045+ case AUFS_CTL_RDU:
26046+ err = au_rdu(file, &rdu);
26047+ if (unlikely(err))
26048+ break;
dece6358 26049+
1308ab2a 26050+ e = copy_to_user(p, &rdu, sizeof(rdu));
26051+ if (unlikely(e)) {
26052+ err = -EFAULT;
26053+ AuTraceErr(err);
26054+ }
26055+ break;
26056+ case AUFS_CTL_RDU_INO:
26057+ err = au_rdu_ino(file, &rdu);
26058+ break;
26059+
26060+ default:
4a4d8108 26061+ /* err = -ENOTTY; */
1308ab2a 26062+ err = -EINVAL;
26063+ }
dece6358 26064+
4f0767ce 26065+out:
1308ab2a 26066+ AuTraceErr(err);
26067+ return err;
1facf9fc 26068+}
b752ccd1
AM
26069+
26070+#ifdef CONFIG_COMPAT
26071+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
26072+{
26073+ long err, e;
26074+ struct aufs_rdu rdu;
26075+ void __user *p = compat_ptr(arg);
26076+
26077+ /* todo: get_user()? */
26078+ err = copy_from_user(&rdu, p, sizeof(rdu));
26079+ if (unlikely(err)) {
26080+ err = -EFAULT;
26081+ AuTraceErr(err);
26082+ goto out;
26083+ }
26084+ rdu.ent.e = compat_ptr(rdu.ent.ul);
26085+ err = au_rdu_verify(&rdu);
26086+ if (unlikely(err))
26087+ goto out;
26088+
26089+ switch (cmd) {
26090+ case AUFS_CTL_RDU:
26091+ err = au_rdu(file, &rdu);
26092+ if (unlikely(err))
26093+ break;
26094+
26095+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
26096+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
26097+ e = copy_to_user(p, &rdu, sizeof(rdu));
26098+ if (unlikely(e)) {
26099+ err = -EFAULT;
26100+ AuTraceErr(err);
26101+ }
26102+ break;
26103+ case AUFS_CTL_RDU_INO:
26104+ err = au_rdu_ino(file, &rdu);
26105+ break;
26106+
26107+ default:
26108+ /* err = -ENOTTY; */
26109+ err = -EINVAL;
26110+ }
26111+
4f0767ce 26112+out:
b752ccd1
AM
26113+ AuTraceErr(err);
26114+ return err;
26115+}
26116+#endif
7f207e10
AM
26117diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
26118--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 26119+++ linux/fs/aufs/rwsem.h 2016-02-28 11:26:32.573304539 +0100
076b876e 26120@@ -0,0 +1,191 @@
1facf9fc 26121+/*
8cdd5066 26122+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26123+ *
26124+ * This program, aufs is free software; you can redistribute it and/or modify
26125+ * it under the terms of the GNU General Public License as published by
26126+ * the Free Software Foundation; either version 2 of the License, or
26127+ * (at your option) any later version.
dece6358
AM
26128+ *
26129+ * This program is distributed in the hope that it will be useful,
26130+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26131+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26132+ * GNU General Public License for more details.
26133+ *
26134+ * You should have received a copy of the GNU General Public License
523b37e3 26135+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26136+ */
26137+
26138+/*
26139+ * simple read-write semaphore wrappers
26140+ */
26141+
26142+#ifndef __AUFS_RWSEM_H__
26143+#define __AUFS_RWSEM_H__
26144+
26145+#ifdef __KERNEL__
26146+
4a4d8108 26147+#include "debug.h"
dece6358
AM
26148+
26149+struct au_rwsem {
26150+ struct rw_semaphore rwsem;
26151+#ifdef CONFIG_AUFS_DEBUG
26152+ /* just for debugging, not almighty counter */
26153+ atomic_t rcnt, wcnt;
26154+#endif
26155+};
26156+
26157+#ifdef CONFIG_AUFS_DEBUG
26158+#define AuDbgCntInit(rw) do { \
26159+ atomic_set(&(rw)->rcnt, 0); \
26160+ atomic_set(&(rw)->wcnt, 0); \
26161+ smp_mb(); /* atomic set */ \
26162+} while (0)
26163+
e49829fe 26164+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 26165+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 26166+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
26167+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
26168+#else
26169+#define AuDbgCntInit(rw) do {} while (0)
26170+#define AuDbgRcntInc(rw) do {} while (0)
26171+#define AuDbgRcntDec(rw) do {} while (0)
26172+#define AuDbgWcntInc(rw) do {} while (0)
26173+#define AuDbgWcntDec(rw) do {} while (0)
26174+#endif /* CONFIG_AUFS_DEBUG */
26175+
26176+/* to debug easier, do not make them inlined functions */
26177+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
26178+/* rwsem_is_locked() is unusable */
26179+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
26180+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
26181+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
26182+ && atomic_read(&(rw)->wcnt) <= 0)
26183+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
26184+ || atomic_read(&(rw)->wcnt))
26185+
e49829fe
JR
26186+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
26187+
dece6358
AM
26188+static inline void au_rw_init(struct au_rwsem *rw)
26189+{
26190+ AuDbgCntInit(rw);
26191+ init_rwsem(&rw->rwsem);
26192+}
26193+
26194+static inline void au_rw_init_wlock(struct au_rwsem *rw)
26195+{
26196+ au_rw_init(rw);
26197+ down_write(&rw->rwsem);
26198+ AuDbgWcntInc(rw);
26199+}
26200+
26201+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
26202+ unsigned int lsc)
26203+{
26204+ au_rw_init(rw);
26205+ down_write_nested(&rw->rwsem, lsc);
26206+ AuDbgWcntInc(rw);
26207+}
26208+
26209+static inline void au_rw_read_lock(struct au_rwsem *rw)
26210+{
26211+ down_read(&rw->rwsem);
26212+ AuDbgRcntInc(rw);
26213+}
26214+
26215+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26216+{
26217+ down_read_nested(&rw->rwsem, lsc);
26218+ AuDbgRcntInc(rw);
26219+}
26220+
26221+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26222+{
26223+ AuRwMustReadLock(rw);
26224+ AuDbgRcntDec(rw);
26225+ up_read(&rw->rwsem);
26226+}
26227+
26228+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26229+{
26230+ AuRwMustWriteLock(rw);
26231+ AuDbgRcntInc(rw);
26232+ AuDbgWcntDec(rw);
26233+ downgrade_write(&rw->rwsem);
26234+}
26235+
26236+static inline void au_rw_write_lock(struct au_rwsem *rw)
26237+{
26238+ down_write(&rw->rwsem);
26239+ AuDbgWcntInc(rw);
26240+}
26241+
26242+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26243+ unsigned int lsc)
26244+{
26245+ down_write_nested(&rw->rwsem, lsc);
26246+ AuDbgWcntInc(rw);
26247+}
1facf9fc 26248+
dece6358
AM
26249+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26250+{
26251+ AuRwMustWriteLock(rw);
26252+ AuDbgWcntDec(rw);
26253+ up_write(&rw->rwsem);
26254+}
26255+
26256+/* why is not _nested version defined */
26257+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26258+{
076b876e
AM
26259+ int ret;
26260+
26261+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
26262+ if (ret)
26263+ AuDbgRcntInc(rw);
26264+ return ret;
26265+}
26266+
26267+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26268+{
076b876e
AM
26269+ int ret;
26270+
26271+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
26272+ if (ret)
26273+ AuDbgWcntInc(rw);
26274+ return ret;
26275+}
26276+
26277+#undef AuDbgCntInit
26278+#undef AuDbgRcntInc
26279+#undef AuDbgRcntDec
26280+#undef AuDbgWcntInc
26281+#undef AuDbgWcntDec
1facf9fc 26282+
26283+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26284+static inline void prefix##_read_lock(param) \
dece6358 26285+{ au_rw_read_lock(rwsem); } \
1facf9fc 26286+static inline void prefix##_write_lock(param) \
dece6358 26287+{ au_rw_write_lock(rwsem); } \
1facf9fc 26288+static inline int prefix##_read_trylock(param) \
dece6358 26289+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 26290+static inline int prefix##_write_trylock(param) \
dece6358 26291+{ return au_rw_write_trylock(rwsem); }
1facf9fc 26292+/* why is not _nested version defined */
26293+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 26294+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 26295+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 26296+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 26297+
26298+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26299+static inline void prefix##_read_unlock(param) \
dece6358 26300+{ au_rw_read_unlock(rwsem); } \
1facf9fc 26301+static inline void prefix##_write_unlock(param) \
dece6358 26302+{ au_rw_write_unlock(rwsem); } \
1facf9fc 26303+static inline void prefix##_downgrade_lock(param) \
dece6358 26304+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 26305+
26306+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26307+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26308+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26309+
26310+#endif /* __KERNEL__ */
26311+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
26312diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26313--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 26314+++ linux/fs/aufs/sbinfo.c 2016-02-28 11:26:32.573304539 +0100
b95c5147 26315@@ -0,0 +1,366 @@
1facf9fc 26316+/*
8cdd5066 26317+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26318+ *
26319+ * This program, aufs is free software; you can redistribute it and/or modify
26320+ * it under the terms of the GNU General Public License as published by
26321+ * the Free Software Foundation; either version 2 of the License, or
26322+ * (at your option) any later version.
dece6358
AM
26323+ *
26324+ * This program is distributed in the hope that it will be useful,
26325+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26326+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26327+ * GNU General Public License for more details.
26328+ *
26329+ * You should have received a copy of the GNU General Public License
523b37e3 26330+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26331+ */
26332+
26333+/*
26334+ * superblock private data
26335+ */
26336+
26337+#include "aufs.h"
26338+
26339+/*
26340+ * they are necessary regardless sysfs is disabled.
26341+ */
26342+void au_si_free(struct kobject *kobj)
26343+{
86dc4139 26344+ int i;
1facf9fc 26345+ struct au_sbinfo *sbinfo;
b752ccd1 26346+ char *locked __maybe_unused; /* debug only */
1facf9fc 26347+
26348+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139
AM
26349+ for (i = 0; i < AuPlink_NHASH; i++)
26350+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
e49829fe 26351+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 26352+
c2c0f25c
AM
26353+ AuDebugOn(!hlist_empty(&sbinfo->si_symlink.head));
26354+
e49829fe 26355+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 26356+ au_br_free(sbinfo);
e49829fe 26357+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
26358+
26359+ AuDebugOn(radix_tree_gang_lookup
26360+ (&sbinfo->au_si_pid.tree, (void **)&locked,
26361+ /*first_index*/PID_MAX_DEFAULT - 1,
26362+ /*max_items*/sizeof(locked)/sizeof(*locked)));
26363+
1facf9fc 26364+ kfree(sbinfo->si_branch);
b752ccd1 26365+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 26366+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 26367+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 26368+
26369+ kfree(sbinfo);
26370+}
26371+
26372+int au_si_alloc(struct super_block *sb)
26373+{
86dc4139 26374+ int err, i;
1facf9fc 26375+ struct au_sbinfo *sbinfo;
e49829fe 26376+ static struct lock_class_key aufs_si;
1facf9fc 26377+
26378+ err = -ENOMEM;
4a4d8108 26379+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 26380+ if (unlikely(!sbinfo))
26381+ goto out;
26382+
b752ccd1
AM
26383+ BUILD_BUG_ON(sizeof(unsigned long) !=
26384+ sizeof(*sbinfo->au_si_pid.bitmap));
26385+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
26386+ sizeof(*sbinfo->au_si_pid.bitmap),
26387+ GFP_NOFS);
26388+ if (unlikely(!sbinfo->au_si_pid.bitmap))
26389+ goto out_sbinfo;
26390+
1facf9fc 26391+ /* will be reallocated separately */
26392+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26393+ if (unlikely(!sbinfo->si_branch))
b752ccd1 26394+ goto out_pidmap;
1facf9fc 26395+
1facf9fc 26396+ err = sysaufs_si_init(sbinfo);
26397+ if (unlikely(err))
26398+ goto out_br;
26399+
26400+ au_nwt_init(&sbinfo->si_nowait);
dece6358 26401+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 26402+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
26403+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
26404+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
26405+
7f207e10 26406+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
26407+ atomic_long_set(&sbinfo->si_nfiles, 0);
26408+
1facf9fc 26409+ sbinfo->si_bend = -1;
392086de 26410+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 26411+
26412+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26413+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
26414+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26415+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 26416+
076b876e
AM
26417+ au_fhsm_init(sbinfo);
26418+
e49829fe 26419+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 26420+
c2c0f25c
AM
26421+ au_sphl_init(&sbinfo->si_symlink);
26422+
392086de
AM
26423+ sbinfo->si_xino_jiffy = jiffies;
26424+ sbinfo->si_xino_expire
26425+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 26426+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 26427+ sbinfo->si_xino_brid = -1;
26428+ /* leave si_xib_last_pindex and si_xib_next_bit */
26429+
b912730e
AM
26430+ au_sphl_init(&sbinfo->si_aopen);
26431+
e49829fe 26432+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 26433+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26434+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26435+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26436+
86dc4139
AM
26437+ for (i = 0; i < AuPlink_NHASH; i++)
26438+ au_sphl_init(sbinfo->si_plink + i);
1facf9fc 26439+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 26440+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 26441+
523b37e3
AM
26442+ au_sphl_init(&sbinfo->si_files);
26443+
b95c5147
AM
26444+ /* with getattr by default */
26445+ sbinfo->si_iop_array = aufs_iop;
26446+
1facf9fc 26447+ /* leave other members for sysaufs and si_mnt. */
26448+ sbinfo->si_sb = sb;
26449+ sb->s_fs_info = sbinfo;
b752ccd1 26450+ si_pid_set(sb);
1facf9fc 26451+ return 0; /* success */
26452+
4f0767ce 26453+out_br:
1facf9fc 26454+ kfree(sbinfo->si_branch);
4f0767ce 26455+out_pidmap:
b752ccd1 26456+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 26457+out_sbinfo:
1facf9fc 26458+ kfree(sbinfo);
4f0767ce 26459+out:
1facf9fc 26460+ return err;
26461+}
26462+
26463+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
26464+{
26465+ int err, sz;
26466+ struct au_branch **brp;
26467+
dece6358
AM
26468+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26469+
1facf9fc 26470+ err = -ENOMEM;
26471+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
26472+ if (unlikely(!sz))
26473+ sz = sizeof(*brp);
26474+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
26475+ if (brp) {
26476+ sbinfo->si_branch = brp;
26477+ err = 0;
26478+ }
26479+
26480+ return err;
26481+}
26482+
26483+/* ---------------------------------------------------------------------- */
26484+
26485+unsigned int au_sigen_inc(struct super_block *sb)
26486+{
26487+ unsigned int gen;
5527c038 26488+ struct inode *inode;
1facf9fc 26489+
dece6358
AM
26490+ SiMustWriteLock(sb);
26491+
1facf9fc 26492+ gen = ++au_sbi(sb)->si_generation;
26493+ au_update_digen(sb->s_root);
5527c038
JR
26494+ inode = d_inode(sb->s_root);
26495+ au_update_iigen(inode, /*half*/0);
26496+ inode->i_version++;
1facf9fc 26497+ return gen;
26498+}
26499+
26500+aufs_bindex_t au_new_br_id(struct super_block *sb)
26501+{
26502+ aufs_bindex_t br_id;
26503+ int i;
26504+ struct au_sbinfo *sbinfo;
26505+
dece6358
AM
26506+ SiMustWriteLock(sb);
26507+
1facf9fc 26508+ sbinfo = au_sbi(sb);
26509+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26510+ br_id = ++sbinfo->si_last_br_id;
7f207e10 26511+ AuDebugOn(br_id < 0);
1facf9fc 26512+ if (br_id && au_br_index(sb, br_id) < 0)
26513+ return br_id;
26514+ }
26515+
26516+ return -1;
26517+}
26518+
26519+/* ---------------------------------------------------------------------- */
26520+
e49829fe
JR
26521+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26522+int si_read_lock(struct super_block *sb, int flags)
26523+{
26524+ int err;
26525+
26526+ err = 0;
26527+ if (au_ftest_lock(flags, FLUSH))
26528+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26529+
26530+ si_noflush_read_lock(sb);
26531+ err = au_plink_maint(sb, flags);
26532+ if (unlikely(err))
26533+ si_read_unlock(sb);
26534+
26535+ return err;
26536+}
26537+
26538+int si_write_lock(struct super_block *sb, int flags)
26539+{
26540+ int err;
26541+
26542+ if (au_ftest_lock(flags, FLUSH))
26543+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26544+
26545+ si_noflush_write_lock(sb);
26546+ err = au_plink_maint(sb, flags);
26547+ if (unlikely(err))
26548+ si_write_unlock(sb);
26549+
26550+ return err;
26551+}
26552+
1facf9fc 26553+/* dentry and super_block lock. call at entry point */
e49829fe 26554+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 26555+{
e49829fe 26556+ int err;
027c5e7a 26557+ struct super_block *sb;
e49829fe 26558+
027c5e7a
AM
26559+ sb = dentry->d_sb;
26560+ err = si_read_lock(sb, flags);
26561+ if (unlikely(err))
26562+ goto out;
26563+
26564+ if (au_ftest_lock(flags, DW))
26565+ di_write_lock_child(dentry);
26566+ else
26567+ di_read_lock_child(dentry, flags);
26568+
26569+ if (au_ftest_lock(flags, GEN)) {
26570+ err = au_digen_test(dentry, au_sigen(sb));
79b8bda9
AM
26571+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
26572+ AuDebugOn(!err && au_dbrange_test(dentry));
26573+ else if (!err)
26574+ err = au_dbrange_test(dentry);
027c5e7a
AM
26575+ if (unlikely(err))
26576+ aufs_read_unlock(dentry, flags);
e49829fe
JR
26577+ }
26578+
027c5e7a 26579+out:
e49829fe 26580+ return err;
1facf9fc 26581+}
26582+
26583+void aufs_read_unlock(struct dentry *dentry, int flags)
26584+{
26585+ if (au_ftest_lock(flags, DW))
26586+ di_write_unlock(dentry);
26587+ else
26588+ di_read_unlock(dentry, flags);
26589+ si_read_unlock(dentry->d_sb);
26590+}
26591+
26592+void aufs_write_lock(struct dentry *dentry)
26593+{
e49829fe 26594+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 26595+ di_write_lock_child(dentry);
26596+}
26597+
26598+void aufs_write_unlock(struct dentry *dentry)
26599+{
26600+ di_write_unlock(dentry);
26601+ si_write_unlock(dentry->d_sb);
26602+}
26603+
e49829fe 26604+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 26605+{
e49829fe 26606+ int err;
027c5e7a
AM
26607+ unsigned int sigen;
26608+ struct super_block *sb;
e49829fe 26609+
027c5e7a
AM
26610+ sb = d1->d_sb;
26611+ err = si_read_lock(sb, flags);
26612+ if (unlikely(err))
26613+ goto out;
26614+
b95c5147 26615+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
26616+
26617+ if (au_ftest_lock(flags, GEN)) {
26618+ sigen = au_sigen(sb);
26619+ err = au_digen_test(d1, sigen);
26620+ AuDebugOn(!err && au_dbrange_test(d1));
26621+ if (!err) {
26622+ err = au_digen_test(d2, sigen);
26623+ AuDebugOn(!err && au_dbrange_test(d2));
26624+ }
26625+ if (unlikely(err))
26626+ aufs_read_and_write_unlock2(d1, d2);
26627+ }
26628+
26629+out:
e49829fe 26630+ return err;
1facf9fc 26631+}
26632+
26633+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
26634+{
26635+ di_write_unlock2(d1, d2);
26636+ si_read_unlock(d1->d_sb);
26637+}
b752ccd1
AM
26638+
26639+/* ---------------------------------------------------------------------- */
26640+
26641+int si_pid_test_slow(struct super_block *sb)
26642+{
26643+ void *p;
26644+
26645+ rcu_read_lock();
26646+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
26647+ rcu_read_unlock();
26648+
027c5e7a 26649+ return (long)!!p;
b752ccd1
AM
26650+}
26651+
26652+void si_pid_set_slow(struct super_block *sb)
26653+{
26654+ int err;
26655+ struct au_sbinfo *sbinfo;
26656+
26657+ AuDebugOn(si_pid_test_slow(sb));
26658+
26659+ sbinfo = au_sbi(sb);
26660+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
26661+ AuDebugOn(err);
26662+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26663+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 26664+ /*any valid ptr*/sb);
b752ccd1
AM
26665+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
26666+ AuDebugOn(err);
26667+ radix_tree_preload_end();
26668+}
26669+
26670+void si_pid_clr_slow(struct super_block *sb)
26671+{
26672+ void *p;
26673+ struct au_sbinfo *sbinfo;
26674+
26675+ AuDebugOn(!si_pid_test_slow(sb));
26676+
26677+ sbinfo = au_sbi(sb);
26678+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26679+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
26680+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 26681+}
7f207e10
AM
26682diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
26683--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 26684+++ linux/fs/aufs/spl.h 2016-02-28 11:26:32.573304539 +0100
523b37e3 26685@@ -0,0 +1,111 @@
1facf9fc 26686+/*
8cdd5066 26687+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26688+ *
26689+ * This program, aufs is free software; you can redistribute it and/or modify
26690+ * it under the terms of the GNU General Public License as published by
26691+ * the Free Software Foundation; either version 2 of the License, or
26692+ * (at your option) any later version.
dece6358
AM
26693+ *
26694+ * This program is distributed in the hope that it will be useful,
26695+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26696+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26697+ * GNU General Public License for more details.
26698+ *
26699+ * You should have received a copy of the GNU General Public License
523b37e3 26700+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26701+ */
26702+
26703+/*
26704+ * simple list protected by a spinlock
26705+ */
26706+
26707+#ifndef __AUFS_SPL_H__
26708+#define __AUFS_SPL_H__
26709+
26710+#ifdef __KERNEL__
26711+
1facf9fc 26712+struct au_splhead {
26713+ spinlock_t spin;
26714+ struct list_head head;
26715+};
26716+
26717+static inline void au_spl_init(struct au_splhead *spl)
26718+{
26719+ spin_lock_init(&spl->spin);
26720+ INIT_LIST_HEAD(&spl->head);
26721+}
26722+
26723+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
26724+{
26725+ spin_lock(&spl->spin);
26726+ list_add(list, &spl->head);
26727+ spin_unlock(&spl->spin);
26728+}
26729+
26730+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
26731+{
26732+ spin_lock(&spl->spin);
26733+ list_del(list);
26734+ spin_unlock(&spl->spin);
26735+}
26736+
4a4d8108
AM
26737+static inline void au_spl_del_rcu(struct list_head *list,
26738+ struct au_splhead *spl)
26739+{
26740+ spin_lock(&spl->spin);
26741+ list_del_rcu(list);
26742+ spin_unlock(&spl->spin);
26743+}
26744+
86dc4139
AM
26745+/* ---------------------------------------------------------------------- */
26746+
26747+struct au_sphlhead {
26748+ spinlock_t spin;
26749+ struct hlist_head head;
26750+};
26751+
26752+static inline void au_sphl_init(struct au_sphlhead *sphl)
26753+{
26754+ spin_lock_init(&sphl->spin);
26755+ INIT_HLIST_HEAD(&sphl->head);
26756+}
26757+
26758+static inline void au_sphl_add(struct hlist_node *hlist,
26759+ struct au_sphlhead *sphl)
26760+{
26761+ spin_lock(&sphl->spin);
26762+ hlist_add_head(hlist, &sphl->head);
26763+ spin_unlock(&sphl->spin);
26764+}
26765+
26766+static inline void au_sphl_del(struct hlist_node *hlist,
26767+ struct au_sphlhead *sphl)
26768+{
26769+ spin_lock(&sphl->spin);
26770+ hlist_del(hlist);
26771+ spin_unlock(&sphl->spin);
26772+}
26773+
26774+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
26775+ struct au_sphlhead *sphl)
26776+{
26777+ spin_lock(&sphl->spin);
26778+ hlist_del_rcu(hlist);
26779+ spin_unlock(&sphl->spin);
26780+}
26781+
26782+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
26783+{
26784+ unsigned long cnt;
26785+ struct hlist_node *pos;
26786+
26787+ cnt = 0;
26788+ spin_lock(&sphl->spin);
26789+ hlist_for_each(pos, &sphl->head)
26790+ cnt++;
26791+ spin_unlock(&sphl->spin);
26792+ return cnt;
26793+}
26794+
1facf9fc 26795+#endif /* __KERNEL__ */
26796+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
26797diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
26798--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 26799+++ linux/fs/aufs/super.c 2016-02-28 11:26:32.573304539 +0100
be52b249 26800@@ -0,0 +1,1039 @@
1facf9fc 26801+/*
8cdd5066 26802+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26803+ *
26804+ * This program, aufs is free software; you can redistribute it and/or modify
26805+ * it under the terms of the GNU General Public License as published by
26806+ * the Free Software Foundation; either version 2 of the License, or
26807+ * (at your option) any later version.
dece6358
AM
26808+ *
26809+ * This program is distributed in the hope that it will be useful,
26810+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26811+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26812+ * GNU General Public License for more details.
26813+ *
26814+ * You should have received a copy of the GNU General Public License
523b37e3 26815+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26816+ */
26817+
26818+/*
26819+ * mount and super_block operations
26820+ */
26821+
f6c5ef8b 26822+#include <linux/mm.h>
1facf9fc 26823+#include <linux/seq_file.h>
26824+#include <linux/statfs.h>
7f207e10 26825+#include <linux/vmalloc.h>
1facf9fc 26826+#include "aufs.h"
26827+
26828+/*
26829+ * super_operations
26830+ */
26831+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
26832+{
26833+ struct au_icntnr *c;
26834+
26835+ c = au_cache_alloc_icntnr();
26836+ if (c) {
027c5e7a 26837+ au_icntnr_init(c);
1facf9fc 26838+ c->vfs_inode.i_version = 1; /* sigen(sb); */
26839+ c->iinfo.ii_hinode = NULL;
26840+ return &c->vfs_inode;
26841+ }
26842+ return NULL;
26843+}
26844+
027c5e7a
AM
26845+static void aufs_destroy_inode_cb(struct rcu_head *head)
26846+{
26847+ struct inode *inode = container_of(head, struct inode, i_rcu);
26848+
b4510431 26849+ INIT_HLIST_HEAD(&inode->i_dentry);
027c5e7a
AM
26850+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
26851+}
26852+
1facf9fc 26853+static void aufs_destroy_inode(struct inode *inode)
26854+{
26855+ au_iinfo_fin(inode);
027c5e7a 26856+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 26857+}
26858+
26859+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
26860+{
26861+ struct inode *inode;
26862+ int err;
26863+
26864+ inode = iget_locked(sb, ino);
26865+ if (unlikely(!inode)) {
26866+ inode = ERR_PTR(-ENOMEM);
26867+ goto out;
26868+ }
26869+ if (!(inode->i_state & I_NEW))
26870+ goto out;
26871+
26872+ err = au_xigen_new(inode);
26873+ if (!err)
26874+ err = au_iinfo_init(inode);
26875+ if (!err)
26876+ inode->i_version++;
26877+ else {
26878+ iget_failed(inode);
26879+ inode = ERR_PTR(err);
26880+ }
26881+
4f0767ce 26882+out:
1facf9fc 26883+ /* never return NULL */
26884+ AuDebugOn(!inode);
26885+ AuTraceErrPtr(inode);
26886+ return inode;
26887+}
26888+
26889+/* lock free root dinfo */
26890+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
26891+{
26892+ int err;
26893+ aufs_bindex_t bindex, bend;
26894+ struct path path;
4a4d8108 26895+ struct au_hdentry *hdp;
1facf9fc 26896+ struct au_branch *br;
076b876e 26897+ au_br_perm_str_t perm;
1facf9fc 26898+
26899+ err = 0;
26900+ bend = au_sbend(sb);
4a4d8108 26901+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 26902+ for (bindex = 0; !err && bindex <= bend; bindex++) {
26903+ br = au_sbr(sb, bindex);
86dc4139 26904+ path.mnt = au_br_mnt(br);
4a4d8108 26905+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 26906+ err = au_seq_path(seq, &path);
79b8bda9 26907+ if (!err) {
076b876e 26908+ au_optstr_br_perm(&perm, br->br_perm);
79b8bda9
AM
26909+ seq_printf(seq, "=%s", perm.a);
26910+ if (bindex != bend)
26911+ seq_putc(seq, ':');
1e00d052 26912+ }
1facf9fc 26913+ }
79b8bda9
AM
26914+ if (unlikely(err || seq_has_overflowed(seq)))
26915+ err = -E2BIG;
1facf9fc 26916+
26917+ return err;
26918+}
26919+
26920+static void au_show_wbr_create(struct seq_file *m, int v,
26921+ struct au_sbinfo *sbinfo)
26922+{
26923+ const char *pat;
26924+
dece6358
AM
26925+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26926+
c2b27bf2 26927+ seq_puts(m, ",create=");
1facf9fc 26928+ pat = au_optstr_wbr_create(v);
26929+ switch (v) {
26930+ case AuWbrCreate_TDP:
26931+ case AuWbrCreate_RR:
26932+ case AuWbrCreate_MFS:
26933+ case AuWbrCreate_PMFS:
c2b27bf2 26934+ seq_puts(m, pat);
1facf9fc 26935+ break;
26936+ case AuWbrCreate_MFSV:
26937+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
26938+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26939+ / MSEC_PER_SEC);
1facf9fc 26940+ break;
26941+ case AuWbrCreate_PMFSV:
26942+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
26943+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26944+ / MSEC_PER_SEC);
1facf9fc 26945+ break;
26946+ case AuWbrCreate_MFSRR:
26947+ seq_printf(m, /*pat*/"mfsrr:%llu",
26948+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26949+ break;
26950+ case AuWbrCreate_MFSRRV:
26951+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
26952+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
26953+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26954+ / MSEC_PER_SEC);
1facf9fc 26955+ break;
392086de
AM
26956+ case AuWbrCreate_PMFSRR:
26957+ seq_printf(m, /*pat*/"pmfsrr:%llu",
26958+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26959+ break;
26960+ case AuWbrCreate_PMFSRRV:
26961+ seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
26962+ sbinfo->si_wbr_mfs.mfsrr_watermark,
26963+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26964+ / MSEC_PER_SEC);
26965+ break;
1facf9fc 26966+ }
26967+}
26968+
7eafdf33 26969+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 26970+{
26971+#ifdef CONFIG_SYSFS
26972+ return 0;
26973+#else
26974+ int err;
26975+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
26976+ aufs_bindex_t bindex, brid;
1facf9fc 26977+ struct qstr *name;
26978+ struct file *f;
26979+ struct dentry *d, *h_root;
4a4d8108 26980+ struct au_hdentry *hdp;
1facf9fc 26981+
dece6358
AM
26982+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26983+
1facf9fc 26984+ err = 0;
1facf9fc 26985+ f = au_sbi(sb)->si_xib;
26986+ if (!f)
26987+ goto out;
26988+
26989+ /* stop printing the default xino path on the first writable branch */
26990+ h_root = NULL;
26991+ brid = au_xino_brid(sb);
26992+ if (brid >= 0) {
26993+ bindex = au_br_index(sb, brid);
4a4d8108
AM
26994+ hdp = au_di(sb->s_root)->di_hdentry;
26995+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 26996+ }
2000de60 26997+ d = f->f_path.dentry;
1facf9fc 26998+ name = &d->d_name;
26999+ /* safe ->d_parent because the file is unlinked */
27000+ if (d->d_parent == h_root
27001+ && name->len == len
27002+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
27003+ goto out;
27004+
27005+ seq_puts(seq, ",xino=");
27006+ err = au_xino_path(seq, f);
27007+
4f0767ce 27008+out:
1facf9fc 27009+ return err;
27010+#endif
27011+}
27012+
27013+/* seq_file will re-call me in case of too long string */
7eafdf33 27014+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 27015+{
027c5e7a 27016+ int err;
1facf9fc 27017+ unsigned int mnt_flags, v;
27018+ struct super_block *sb;
27019+ struct au_sbinfo *sbinfo;
27020+
27021+#define AuBool(name, str) do { \
27022+ v = au_opt_test(mnt_flags, name); \
27023+ if (v != au_opt_test(AuOpt_Def, name)) \
27024+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
27025+} while (0)
27026+
27027+#define AuStr(name, str) do { \
27028+ v = mnt_flags & AuOptMask_##name; \
27029+ if (v != (AuOpt_Def & AuOptMask_##name)) \
27030+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
27031+} while (0)
27032+
27033+#define AuUInt(name, str, val) do { \
27034+ if (val != AUFS_##name##_DEF) \
27035+ seq_printf(m, "," #str "=%u", val); \
27036+} while (0)
27037+
7eafdf33 27038+ sb = dentry->d_sb;
c1595e42
JR
27039+ if (sb->s_flags & MS_POSIXACL)
27040+ seq_puts(m, ",acl");
27041+
27042+ /* lock free root dinfo */
1facf9fc 27043+ si_noflush_read_lock(sb);
27044+ sbinfo = au_sbi(sb);
27045+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
27046+
27047+ mnt_flags = au_mntflags(sb);
27048+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 27049+ err = au_show_xino(m, sb);
1facf9fc 27050+ if (unlikely(err))
27051+ goto out;
27052+ } else
27053+ seq_puts(m, ",noxino");
27054+
27055+ AuBool(TRUNC_XINO, trunc_xino);
27056+ AuStr(UDBA, udba);
dece6358 27057+ AuBool(SHWH, shwh);
1facf9fc 27058+ AuBool(PLINK, plink);
4a4d8108 27059+ AuBool(DIO, dio);
076b876e 27060+ AuBool(DIRPERM1, dirperm1);
1facf9fc 27061+
27062+ v = sbinfo->si_wbr_create;
27063+ if (v != AuWbrCreate_Def)
27064+ au_show_wbr_create(m, v, sbinfo);
27065+
27066+ v = sbinfo->si_wbr_copyup;
27067+ if (v != AuWbrCopyup_Def)
27068+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
27069+
27070+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
27071+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
27072+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
27073+
27074+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
27075+
027c5e7a
AM
27076+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
27077+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 27078+
27079+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
27080+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
27081+
076b876e
AM
27082+ au_fhsm_show(m, sbinfo);
27083+
1facf9fc 27084+ AuBool(SUM, sum);
27085+ /* AuBool(SUM_W, wsum); */
27086+ AuBool(WARN_PERM, warn_perm);
27087+ AuBool(VERBOSE, verbose);
27088+
4f0767ce 27089+out:
1facf9fc 27090+ /* be sure to print "br:" last */
27091+ if (!sysaufs_brs) {
27092+ seq_puts(m, ",br:");
27093+ au_show_brs(m, sb);
27094+ }
27095+ si_read_unlock(sb);
27096+ return 0;
27097+
1facf9fc 27098+#undef AuBool
27099+#undef AuStr
4a4d8108 27100+#undef AuUInt
1facf9fc 27101+}
27102+
27103+/* ---------------------------------------------------------------------- */
27104+
27105+/* sum mode which returns the summation for statfs(2) */
27106+
27107+static u64 au_add_till_max(u64 a, u64 b)
27108+{
27109+ u64 old;
27110+
27111+ old = a;
27112+ a += b;
92d182d2
AM
27113+ if (old <= a)
27114+ return a;
27115+ return ULLONG_MAX;
27116+}
27117+
27118+static u64 au_mul_till_max(u64 a, long mul)
27119+{
27120+ u64 old;
27121+
27122+ old = a;
27123+ a *= mul;
27124+ if (old <= a)
1facf9fc 27125+ return a;
27126+ return ULLONG_MAX;
27127+}
27128+
27129+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
27130+{
27131+ int err;
92d182d2 27132+ long bsize, factor;
1facf9fc 27133+ u64 blocks, bfree, bavail, files, ffree;
27134+ aufs_bindex_t bend, bindex, i;
27135+ unsigned char shared;
7f207e10 27136+ struct path h_path;
1facf9fc 27137+ struct super_block *h_sb;
27138+
92d182d2
AM
27139+ err = 0;
27140+ bsize = LONG_MAX;
27141+ files = 0;
27142+ ffree = 0;
1facf9fc 27143+ blocks = 0;
27144+ bfree = 0;
27145+ bavail = 0;
1facf9fc 27146+ bend = au_sbend(sb);
92d182d2 27147+ for (bindex = 0; bindex <= bend; bindex++) {
7f207e10
AM
27148+ h_path.mnt = au_sbr_mnt(sb, bindex);
27149+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 27150+ shared = 0;
92d182d2 27151+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 27152+ shared = (au_sbr_sb(sb, i) == h_sb);
27153+ if (shared)
27154+ continue;
27155+
27156+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27157+ h_path.dentry = h_path.mnt->mnt_root;
27158+ err = vfs_statfs(&h_path, buf);
1facf9fc 27159+ if (unlikely(err))
27160+ goto out;
27161+
92d182d2
AM
27162+ if (bsize > buf->f_bsize) {
27163+ /*
27164+ * we will reduce bsize, so we have to expand blocks
27165+ * etc. to match them again
27166+ */
27167+ factor = (bsize / buf->f_bsize);
27168+ blocks = au_mul_till_max(blocks, factor);
27169+ bfree = au_mul_till_max(bfree, factor);
27170+ bavail = au_mul_till_max(bavail, factor);
27171+ bsize = buf->f_bsize;
27172+ }
27173+
27174+ factor = (buf->f_bsize / bsize);
27175+ blocks = au_add_till_max(blocks,
27176+ au_mul_till_max(buf->f_blocks, factor));
27177+ bfree = au_add_till_max(bfree,
27178+ au_mul_till_max(buf->f_bfree, factor));
27179+ bavail = au_add_till_max(bavail,
27180+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 27181+ files = au_add_till_max(files, buf->f_files);
27182+ ffree = au_add_till_max(ffree, buf->f_ffree);
27183+ }
27184+
92d182d2 27185+ buf->f_bsize = bsize;
1facf9fc 27186+ buf->f_blocks = blocks;
27187+ buf->f_bfree = bfree;
27188+ buf->f_bavail = bavail;
27189+ buf->f_files = files;
27190+ buf->f_ffree = ffree;
92d182d2 27191+ buf->f_frsize = 0;
1facf9fc 27192+
4f0767ce 27193+out:
1facf9fc 27194+ return err;
27195+}
27196+
27197+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27198+{
27199+ int err;
7f207e10 27200+ struct path h_path;
1facf9fc 27201+ struct super_block *sb;
27202+
27203+ /* lock free root dinfo */
27204+ sb = dentry->d_sb;
27205+ si_noflush_read_lock(sb);
7f207e10 27206+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 27207+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27208+ h_path.mnt = au_sbr_mnt(sb, 0);
27209+ h_path.dentry = h_path.mnt->mnt_root;
27210+ err = vfs_statfs(&h_path, buf);
27211+ } else
1facf9fc 27212+ err = au_statfs_sum(sb, buf);
27213+ si_read_unlock(sb);
27214+
27215+ if (!err) {
27216+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 27217+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 27218+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27219+ }
27220+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27221+
27222+ return err;
27223+}
27224+
27225+/* ---------------------------------------------------------------------- */
27226+
537831f9
AM
27227+static int aufs_sync_fs(struct super_block *sb, int wait)
27228+{
27229+ int err, e;
27230+ aufs_bindex_t bend, bindex;
27231+ struct au_branch *br;
27232+ struct super_block *h_sb;
27233+
27234+ err = 0;
27235+ si_noflush_read_lock(sb);
27236+ bend = au_sbend(sb);
27237+ for (bindex = 0; bindex <= bend; bindex++) {
27238+ br = au_sbr(sb, bindex);
27239+ if (!au_br_writable(br->br_perm))
27240+ continue;
27241+
27242+ h_sb = au_sbr_sb(sb, bindex);
27243+ if (h_sb->s_op->sync_fs) {
27244+ e = h_sb->s_op->sync_fs(h_sb, wait);
27245+ if (unlikely(e && !err))
27246+ err = e;
27247+ /* go on even if an error happens */
27248+ }
27249+ }
27250+ si_read_unlock(sb);
27251+
27252+ return err;
27253+}
27254+
27255+/* ---------------------------------------------------------------------- */
27256+
1facf9fc 27257+/* final actions when unmounting a file system */
27258+static void aufs_put_super(struct super_block *sb)
27259+{
27260+ struct au_sbinfo *sbinfo;
27261+
27262+ sbinfo = au_sbi(sb);
27263+ if (!sbinfo)
27264+ return;
27265+
1facf9fc 27266+ dbgaufs_si_fin(sbinfo);
27267+ kobject_put(&sbinfo->si_kobj);
27268+}
27269+
27270+/* ---------------------------------------------------------------------- */
27271+
79b8bda9
AM
27272+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
27273+ struct super_block *sb, void *arg)
7f207e10
AM
27274+{
27275+ void *array;
076b876e 27276+ unsigned long long n, sz;
7f207e10
AM
27277+
27278+ array = NULL;
27279+ n = 0;
27280+ if (!*hint)
27281+ goto out;
27282+
27283+ if (*hint > ULLONG_MAX / sizeof(array)) {
27284+ array = ERR_PTR(-EMFILE);
27285+ pr_err("hint %llu\n", *hint);
27286+ goto out;
27287+ }
27288+
076b876e
AM
27289+ sz = sizeof(array) * *hint;
27290+ array = kzalloc(sz, GFP_NOFS);
7f207e10 27291+ if (unlikely(!array))
076b876e 27292+ array = vzalloc(sz);
7f207e10
AM
27293+ if (unlikely(!array)) {
27294+ array = ERR_PTR(-ENOMEM);
27295+ goto out;
27296+ }
27297+
79b8bda9 27298+ n = cb(sb, array, *hint, arg);
7f207e10
AM
27299+ AuDebugOn(n > *hint);
27300+
27301+out:
27302+ *hint = n;
27303+ return array;
27304+}
27305+
79b8bda9 27306+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
7f207e10
AM
27307+ unsigned long long max __maybe_unused,
27308+ void *arg)
27309+{
27310+ unsigned long long n;
27311+ struct inode **p, *inode;
27312+ struct list_head *head;
27313+
27314+ n = 0;
27315+ p = a;
27316+ head = arg;
79b8bda9 27317+ spin_lock(&sb->s_inode_list_lock);
7f207e10
AM
27318+ list_for_each_entry(inode, head, i_sb_list) {
27319+ if (!is_bad_inode(inode)
27320+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
27321+ spin_lock(&inode->i_lock);
27322+ if (atomic_read(&inode->i_count)) {
27323+ au_igrab(inode);
27324+ *p++ = inode;
27325+ n++;
27326+ AuDebugOn(n > max);
27327+ }
27328+ spin_unlock(&inode->i_lock);
7f207e10
AM
27329+ }
27330+ }
79b8bda9 27331+ spin_unlock(&sb->s_inode_list_lock);
7f207e10
AM
27332+
27333+ return n;
27334+}
27335+
27336+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27337+{
27338+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
79b8bda9 27339+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
7f207e10
AM
27340+}
27341+
27342+void au_iarray_free(struct inode **a, unsigned long long max)
27343+{
27344+ unsigned long long ull;
27345+
27346+ for (ull = 0; ull < max; ull++)
27347+ iput(a[ull]);
be52b249 27348+ kvfree(a);
7f207e10
AM
27349+}
27350+
27351+/* ---------------------------------------------------------------------- */
27352+
1facf9fc 27353+/*
27354+ * refresh dentry and inode at remount time.
27355+ */
027c5e7a
AM
27356+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27357+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27358+ struct dentry *parent)
1facf9fc 27359+{
27360+ int err;
1facf9fc 27361+
27362+ di_write_lock_child(dentry);
1facf9fc 27363+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
27364+ err = au_refresh_dentry(dentry, parent);
27365+ if (!err && dir_flags)
5527c038 27366+ au_hn_reset(d_inode(dentry), dir_flags);
1facf9fc 27367+ di_read_unlock(parent, AuLock_IR);
1facf9fc 27368+ di_write_unlock(dentry);
27369+
27370+ return err;
27371+}
27372+
027c5e7a
AM
27373+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27374+ struct au_sbinfo *sbinfo,
b95c5147 27375+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 27376+{
027c5e7a
AM
27377+ int err;
27378+ struct dentry *parent;
027c5e7a
AM
27379+
27380+ err = 0;
27381+ parent = dget_parent(dentry);
27382+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
5527c038
JR
27383+ if (d_really_is_positive(dentry)) {
27384+ if (!d_is_dir(dentry))
027c5e7a
AM
27385+ err = au_do_refresh(dentry, /*dir_flags*/0,
27386+ parent);
27387+ else {
27388+ err = au_do_refresh(dentry, dir_flags, parent);
27389+ if (unlikely(err))
27390+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27391+ }
27392+ } else
27393+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27394+ AuDbgDentry(dentry);
27395+ }
27396+ dput(parent);
27397+
79b8bda9 27398+ if (!err) {
b95c5147 27399+ if (do_idop)
79b8bda9
AM
27400+ au_refresh_dop(dentry, /*force_reval*/0);
27401+ } else
27402+ au_refresh_dop(dentry, /*force_reval*/1);
27403+
027c5e7a
AM
27404+ AuTraceErr(err);
27405+ return err;
1facf9fc 27406+}
27407+
b95c5147 27408+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 27409+{
27410+ int err, i, j, ndentry, e;
027c5e7a 27411+ unsigned int sigen;
1facf9fc 27412+ struct au_dcsub_pages dpages;
27413+ struct au_dpage *dpage;
027c5e7a
AM
27414+ struct dentry **dentries, *d;
27415+ struct au_sbinfo *sbinfo;
27416+ struct dentry *root = sb->s_root;
5527c038 27417+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
1facf9fc 27418+
b95c5147 27419+ if (do_idop)
79b8bda9
AM
27420+ au_refresh_dop(root, /*force_reval*/0);
27421+
027c5e7a
AM
27422+ err = au_dpages_init(&dpages, GFP_NOFS);
27423+ if (unlikely(err))
1facf9fc 27424+ goto out;
027c5e7a
AM
27425+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27426+ if (unlikely(err))
1facf9fc 27427+ goto out_dpages;
1facf9fc 27428+
027c5e7a
AM
27429+ sigen = au_sigen(sb);
27430+ sbinfo = au_sbi(sb);
27431+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 27432+ dpage = dpages.dpages + i;
27433+ dentries = dpage->dentries;
27434+ ndentry = dpage->ndentry;
027c5e7a 27435+ for (j = 0; j < ndentry; j++) {
1facf9fc 27436+ d = dentries[j];
79b8bda9 27437+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
b95c5147 27438+ do_idop);
027c5e7a
AM
27439+ if (unlikely(e && !err))
27440+ err = e;
27441+ /* go on even err */
1facf9fc 27442+ }
27443+ }
27444+
4f0767ce 27445+out_dpages:
1facf9fc 27446+ au_dpages_free(&dpages);
4f0767ce 27447+out:
1facf9fc 27448+ return err;
27449+}
27450+
b95c5147 27451+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 27452+{
027c5e7a
AM
27453+ int err, e;
27454+ unsigned int sigen;
27455+ unsigned long long max, ull;
27456+ struct inode *inode, **array;
1facf9fc 27457+
027c5e7a
AM
27458+ array = au_iarray_alloc(sb, &max);
27459+ err = PTR_ERR(array);
27460+ if (IS_ERR(array))
27461+ goto out;
1facf9fc 27462+
27463+ err = 0;
027c5e7a
AM
27464+ sigen = au_sigen(sb);
27465+ for (ull = 0; ull < max; ull++) {
27466+ inode = array[ull];
076b876e
AM
27467+ if (unlikely(!inode))
27468+ break;
b95c5147
AM
27469+
27470+ e = 0;
27471+ ii_write_lock_child(inode);
537831f9 27472+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 27473+ e = au_refresh_hinode_self(inode);
1facf9fc 27474+ if (unlikely(e)) {
b95c5147 27475+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 27476+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 27477+ if (!err)
27478+ err = e;
27479+ /* go on even if err */
27480+ }
27481+ }
b95c5147
AM
27482+ if (!e && do_idop)
27483+ au_refresh_iop(inode, /*force_getattr*/0);
27484+ ii_write_unlock(inode);
1facf9fc 27485+ }
27486+
027c5e7a 27487+ au_iarray_free(array, max);
1facf9fc 27488+
4f0767ce 27489+out:
1facf9fc 27490+ return err;
27491+}
27492+
b95c5147 27493+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 27494+{
027c5e7a
AM
27495+ int err, e;
27496+ unsigned int udba;
27497+ aufs_bindex_t bindex, bend;
1facf9fc 27498+ struct dentry *root;
27499+ struct inode *inode;
027c5e7a 27500+ struct au_branch *br;
79b8bda9 27501+ struct au_sbinfo *sbi;
1facf9fc 27502+
27503+ au_sigen_inc(sb);
79b8bda9
AM
27504+ sbi = au_sbi(sb);
27505+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 27506+
27507+ root = sb->s_root;
27508+ DiMustNoWaiters(root);
5527c038 27509+ inode = d_inode(root);
1facf9fc 27510+ IiMustNoWaiters(inode);
1facf9fc 27511+
027c5e7a
AM
27512+ udba = au_opt_udba(sb);
27513+ bend = au_sbend(sb);
27514+ for (bindex = 0; bindex <= bend; bindex++) {
27515+ br = au_sbr(sb, bindex);
27516+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 27517+ if (unlikely(err))
027c5e7a
AM
27518+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27519+ bindex, err);
27520+ /* go on even if err */
1facf9fc 27521+ }
027c5e7a 27522+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 27523+
b95c5147 27524+ if (do_idop) {
79b8bda9
AM
27525+ if (au_ftest_si(sbi, NO_DREVAL)) {
27526+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27527+ sb->s_d_op = &aufs_dop_noreval;
b95c5147
AM
27528+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27529+ sbi->si_iop_array = aufs_iop_nogetattr;
79b8bda9
AM
27530+ } else {
27531+ AuDebugOn(sb->s_d_op == &aufs_dop);
27532+ sb->s_d_op = &aufs_dop;
b95c5147
AM
27533+ AuDebugOn(sbi->si_iop_array == aufs_iop);
27534+ sbi->si_iop_array = aufs_iop;
79b8bda9 27535+ }
b95c5147
AM
27536+ pr_info("reset to %pf and %pf\n",
27537+ sb->s_d_op, sbi->si_iop_array);
79b8bda9
AM
27538+ }
27539+
027c5e7a 27540+ di_write_unlock(root);
b95c5147
AM
27541+ err = au_refresh_d(sb, do_idop);
27542+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
27543+ if (unlikely(e && !err))
27544+ err = e;
1facf9fc 27545+ /* aufs_write_lock() calls ..._child() */
27546+ di_write_lock_child(root);
027c5e7a
AM
27547+
27548+ au_cpup_attr_all(inode, /*force*/1);
27549+
27550+ if (unlikely(err))
27551+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 27552+}
27553+
27554+/* stop extra interpretation of errno in mount(8), and strange error messages */
27555+static int cvt_err(int err)
27556+{
27557+ AuTraceErr(err);
27558+
27559+ switch (err) {
27560+ case -ENOENT:
27561+ case -ENOTDIR:
27562+ case -EEXIST:
27563+ case -EIO:
27564+ err = -EINVAL;
27565+ }
27566+ return err;
27567+}
27568+
27569+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
27570+{
4a4d8108
AM
27571+ int err, do_dx;
27572+ unsigned int mntflags;
be52b249
AM
27573+ struct au_opts opts = {
27574+ .opt = NULL
27575+ };
1facf9fc 27576+ struct dentry *root;
27577+ struct inode *inode;
27578+ struct au_sbinfo *sbinfo;
27579+
27580+ err = 0;
27581+ root = sb->s_root;
27582+ if (!data || !*data) {
e49829fe
JR
27583+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27584+ if (!err) {
27585+ di_write_lock_child(root);
27586+ err = au_opts_verify(sb, *flags, /*pending*/0);
27587+ aufs_write_unlock(root);
27588+ }
1facf9fc 27589+ goto out;
27590+ }
27591+
27592+ err = -ENOMEM;
1facf9fc 27593+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27594+ if (unlikely(!opts.opt))
27595+ goto out;
27596+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27597+ opts.flags = AuOpts_REMOUNT;
27598+ opts.sb_flags = *flags;
27599+
27600+ /* parse it before aufs lock */
27601+ err = au_opts_parse(sb, data, &opts);
27602+ if (unlikely(err))
27603+ goto out_opts;
27604+
27605+ sbinfo = au_sbi(sb);
5527c038 27606+ inode = d_inode(root);
1facf9fc 27607+ mutex_lock(&inode->i_mutex);
e49829fe
JR
27608+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27609+ if (unlikely(err))
27610+ goto out_mtx;
27611+ di_write_lock_child(root);
1facf9fc 27612+
27613+ /* au_opts_remount() may return an error */
27614+ err = au_opts_remount(sb, &opts);
27615+ au_opts_free(&opts);
27616+
027c5e7a 27617+ if (au_ftest_opts(opts.flags, REFRESH))
b95c5147 27618+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 27619+
4a4d8108
AM
27620+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
27621+ mntflags = au_mntflags(sb);
27622+ do_dx = !!au_opt_test(mntflags, DIO);
27623+ au_dy_arefresh(do_dx);
27624+ }
27625+
076b876e 27626+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 27627+ aufs_write_unlock(root);
953406b4 27628+
e49829fe
JR
27629+out_mtx:
27630+ mutex_unlock(&inode->i_mutex);
4f0767ce 27631+out_opts:
1facf9fc 27632+ free_page((unsigned long)opts.opt);
4f0767ce 27633+out:
1facf9fc 27634+ err = cvt_err(err);
27635+ AuTraceErr(err);
27636+ return err;
27637+}
27638+
4a4d8108 27639+static const struct super_operations aufs_sop = {
1facf9fc 27640+ .alloc_inode = aufs_alloc_inode,
27641+ .destroy_inode = aufs_destroy_inode,
b752ccd1 27642+ /* always deleting, no clearing */
1facf9fc 27643+ .drop_inode = generic_delete_inode,
27644+ .show_options = aufs_show_options,
27645+ .statfs = aufs_statfs,
27646+ .put_super = aufs_put_super,
537831f9 27647+ .sync_fs = aufs_sync_fs,
1facf9fc 27648+ .remount_fs = aufs_remount_fs
27649+};
27650+
27651+/* ---------------------------------------------------------------------- */
27652+
27653+static int alloc_root(struct super_block *sb)
27654+{
27655+ int err;
27656+ struct inode *inode;
27657+ struct dentry *root;
27658+
27659+ err = -ENOMEM;
27660+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
27661+ err = PTR_ERR(inode);
27662+ if (IS_ERR(inode))
27663+ goto out;
27664+
b95c5147 27665+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 27666+ inode->i_fop = &aufs_dir_fop;
27667+ inode->i_mode = S_IFDIR;
9dbd164d 27668+ set_nlink(inode, 2);
1facf9fc 27669+ unlock_new_inode(inode);
27670+
92d182d2 27671+ root = d_make_root(inode);
1facf9fc 27672+ if (unlikely(!root))
92d182d2 27673+ goto out;
1facf9fc 27674+ err = PTR_ERR(root);
27675+ if (IS_ERR(root))
92d182d2 27676+ goto out;
1facf9fc 27677+
4a4d8108 27678+ err = au_di_init(root);
1facf9fc 27679+ if (!err) {
27680+ sb->s_root = root;
27681+ return 0; /* success */
27682+ }
27683+ dput(root);
1facf9fc 27684+
4f0767ce 27685+out:
1facf9fc 27686+ return err;
1facf9fc 27687+}
27688+
27689+static int aufs_fill_super(struct super_block *sb, void *raw_data,
27690+ int silent __maybe_unused)
27691+{
27692+ int err;
be52b249
AM
27693+ struct au_opts opts = {
27694+ .opt = NULL
27695+ };
79b8bda9 27696+ struct au_sbinfo *sbinfo;
1facf9fc 27697+ struct dentry *root;
27698+ struct inode *inode;
27699+ char *arg = raw_data;
27700+
27701+ if (unlikely(!arg || !*arg)) {
27702+ err = -EINVAL;
4a4d8108 27703+ pr_err("no arg\n");
1facf9fc 27704+ goto out;
27705+ }
27706+
27707+ err = -ENOMEM;
1facf9fc 27708+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27709+ if (unlikely(!opts.opt))
27710+ goto out;
27711+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27712+ opts.sb_flags = sb->s_flags;
27713+
27714+ err = au_si_alloc(sb);
27715+ if (unlikely(err))
27716+ goto out_opts;
79b8bda9 27717+ sbinfo = au_sbi(sb);
1facf9fc 27718+
27719+ /* all timestamps always follow the ones on the branch */
27720+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
27721+ sb->s_op = &aufs_sop;
027c5e7a 27722+ sb->s_d_op = &aufs_dop;
1facf9fc 27723+ sb->s_magic = AUFS_SUPER_MAGIC;
27724+ sb->s_maxbytes = 0;
c1595e42 27725+ sb->s_stack_depth = 1;
1facf9fc 27726+ au_export_init(sb);
c1595e42 27727+ /* au_xattr_init(sb); */
1facf9fc 27728+
27729+ err = alloc_root(sb);
27730+ if (unlikely(err)) {
27731+ si_write_unlock(sb);
27732+ goto out_info;
27733+ }
27734+ root = sb->s_root;
5527c038 27735+ inode = d_inode(root);
1facf9fc 27736+
27737+ /*
27738+ * actually we can parse options regardless aufs lock here.
27739+ * but at remount time, parsing must be done before aufs lock.
27740+ * so we follow the same rule.
27741+ */
27742+ ii_write_lock_parent(inode);
27743+ aufs_write_unlock(root);
27744+ err = au_opts_parse(sb, arg, &opts);
27745+ if (unlikely(err))
27746+ goto out_root;
27747+
27748+ /* lock vfs_inode first, then aufs. */
27749+ mutex_lock(&inode->i_mutex);
1facf9fc 27750+ aufs_write_lock(root);
27751+ err = au_opts_mount(sb, &opts);
27752+ au_opts_free(&opts);
79b8bda9
AM
27753+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
27754+ sb->s_d_op = &aufs_dop_noreval;
27755+ pr_info("%pf\n", sb->s_d_op);
27756+ au_refresh_dop(root, /*force_reval*/0);
b95c5147
AM
27757+ sbinfo->si_iop_array = aufs_iop_nogetattr;
27758+ au_refresh_iop(inode, /*force_getattr*/0);
79b8bda9 27759+ }
1facf9fc 27760+ aufs_write_unlock(root);
27761+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
27762+ if (!err)
27763+ goto out_opts; /* success */
1facf9fc 27764+
4f0767ce 27765+out_root:
1facf9fc 27766+ dput(root);
27767+ sb->s_root = NULL;
4f0767ce 27768+out_info:
79b8bda9
AM
27769+ dbgaufs_si_fin(sbinfo);
27770+ kobject_put(&sbinfo->si_kobj);
1facf9fc 27771+ sb->s_fs_info = NULL;
4f0767ce 27772+out_opts:
1facf9fc 27773+ free_page((unsigned long)opts.opt);
4f0767ce 27774+out:
1facf9fc 27775+ AuTraceErr(err);
27776+ err = cvt_err(err);
27777+ AuTraceErr(err);
27778+ return err;
27779+}
27780+
27781+/* ---------------------------------------------------------------------- */
27782+
027c5e7a
AM
27783+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
27784+ const char *dev_name __maybe_unused,
27785+ void *raw_data)
1facf9fc 27786+{
027c5e7a 27787+ struct dentry *root;
1facf9fc 27788+ struct super_block *sb;
27789+
27790+ /* all timestamps always follow the ones on the branch */
27791+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
27792+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
27793+ if (IS_ERR(root))
27794+ goto out;
27795+
27796+ sb = root->d_sb;
27797+ si_write_lock(sb, !AuLock_FLUSH);
27798+ sysaufs_brs_add(sb, 0);
27799+ si_write_unlock(sb);
27800+ au_sbilist_add(sb);
27801+
27802+out:
27803+ return root;
1facf9fc 27804+}
27805+
e49829fe
JR
27806+static void aufs_kill_sb(struct super_block *sb)
27807+{
27808+ struct au_sbinfo *sbinfo;
27809+
27810+ sbinfo = au_sbi(sb);
27811+ if (sbinfo) {
27812+ au_sbilist_del(sb);
27813+ aufs_write_lock(sb->s_root);
076b876e 27814+ au_fhsm_fin(sb);
e49829fe
JR
27815+ if (sbinfo->si_wbr_create_ops->fin)
27816+ sbinfo->si_wbr_create_ops->fin(sb);
27817+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
27818+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
b95c5147 27819+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
27820+ }
27821+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
27822+ au_plink_put(sb, /*verbose*/1);
27823+ au_xino_clr(sb);
1e00d052 27824+ sbinfo->si_sb = NULL;
e49829fe 27825+ aufs_write_unlock(sb->s_root);
e49829fe
JR
27826+ au_nwt_flush(&sbinfo->si_nowait);
27827+ }
98d9a5b1 27828+ kill_anon_super(sb);
e49829fe
JR
27829+}
27830+
1facf9fc 27831+struct file_system_type aufs_fs_type = {
27832+ .name = AUFS_FSTYPE,
c06a8ce3
AM
27833+ /* a race between rename and others */
27834+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 27835+ .mount = aufs_mount,
e49829fe 27836+ .kill_sb = aufs_kill_sb,
1facf9fc 27837+ /* no need to __module_get() and module_put(). */
27838+ .owner = THIS_MODULE,
27839+};
7f207e10
AM
27840diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
27841--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 27842+++ linux/fs/aufs/super.h 2016-02-28 11:26:32.573304539 +0100
be52b249 27843@@ -0,0 +1,641 @@
1facf9fc 27844+/*
8cdd5066 27845+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 27846+ *
27847+ * This program, aufs is free software; you can redistribute it and/or modify
27848+ * it under the terms of the GNU General Public License as published by
27849+ * the Free Software Foundation; either version 2 of the License, or
27850+ * (at your option) any later version.
dece6358
AM
27851+ *
27852+ * This program is distributed in the hope that it will be useful,
27853+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27854+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27855+ * GNU General Public License for more details.
27856+ *
27857+ * You should have received a copy of the GNU General Public License
523b37e3 27858+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27859+ */
27860+
27861+/*
27862+ * super_block operations
27863+ */
27864+
27865+#ifndef __AUFS_SUPER_H__
27866+#define __AUFS_SUPER_H__
27867+
27868+#ifdef __KERNEL__
27869+
27870+#include <linux/fs.h>
5527c038 27871+#include <linux/kobject.h>
1facf9fc 27872+#include "rwsem.h"
27873+#include "spl.h"
27874+#include "wkq.h"
27875+
1facf9fc 27876+/* policies to select one among multiple writable branches */
27877+struct au_wbr_copyup_operations {
27878+ int (*copyup)(struct dentry *dentry);
27879+};
27880+
392086de
AM
27881+#define AuWbr_DIR 1 /* target is a dir */
27882+#define AuWbr_PARENT (1 << 1) /* always require a parent */
27883+
27884+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
27885+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
27886+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
27887+
1facf9fc 27888+struct au_wbr_create_operations {
392086de 27889+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 27890+ int (*init)(struct super_block *sb);
27891+ int (*fin)(struct super_block *sb);
27892+};
27893+
27894+struct au_wbr_mfs {
27895+ struct mutex mfs_lock; /* protect this structure */
27896+ unsigned long mfs_jiffy;
27897+ unsigned long mfs_expire;
27898+ aufs_bindex_t mfs_bindex;
27899+
27900+ unsigned long long mfsrr_bytes;
27901+ unsigned long long mfsrr_watermark;
27902+};
27903+
86dc4139
AM
27904+struct pseudo_link {
27905+ union {
27906+ struct hlist_node hlist;
27907+ struct rcu_head rcu;
27908+ };
27909+ struct inode *inode;
27910+};
27911+
27912+#define AuPlink_NHASH 100
27913+static inline int au_plink_hash(ino_t ino)
27914+{
27915+ return ino % AuPlink_NHASH;
27916+}
27917+
076b876e
AM
27918+/* File-based Hierarchical Storage Management */
27919+struct au_fhsm {
27920+#ifdef CONFIG_AUFS_FHSM
27921+ /* allow only one process who can receive the notification */
27922+ spinlock_t fhsm_spin;
27923+ pid_t fhsm_pid;
27924+ wait_queue_head_t fhsm_wqh;
27925+ atomic_t fhsm_readable;
27926+
c1595e42 27927+ /* these are protected by si_rwsem */
076b876e 27928+ unsigned long fhsm_expire;
c1595e42 27929+ aufs_bindex_t fhsm_bottom;
076b876e
AM
27930+#endif
27931+};
27932+
1facf9fc 27933+struct au_branch;
27934+struct au_sbinfo {
27935+ /* nowait tasks in the system-wide workqueue */
27936+ struct au_nowait_tasks si_nowait;
27937+
b752ccd1
AM
27938+ /*
27939+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
27940+ * rwsem for au_sbinfo is necessary.
27941+ */
dece6358 27942+ struct au_rwsem si_rwsem;
1facf9fc 27943+
b752ccd1
AM
27944+ /* prevent recursive locking in deleting inode */
27945+ struct {
27946+ unsigned long *bitmap;
27947+ spinlock_t tree_lock;
27948+ struct radix_tree_root tree;
27949+ } au_si_pid;
27950+
7f207e10 27951+ /*
523b37e3
AM
27952+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
27953+ * remount.
7f207e10
AM
27954+ */
27955+ atomic_long_t si_ninodes, si_nfiles;
27956+
1facf9fc 27957+ /* branch management */
27958+ unsigned int si_generation;
27959+
2000de60 27960+ /* see AuSi_ flags */
1facf9fc 27961+ unsigned char au_si_status;
27962+
27963+ aufs_bindex_t si_bend;
7f207e10
AM
27964+
27965+ /* dirty trick to keep br_id plus */
27966+ unsigned int si_last_br_id :
27967+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 27968+ struct au_branch **si_branch;
27969+
27970+ /* policy to select a writable branch */
27971+ unsigned char si_wbr_copyup;
27972+ unsigned char si_wbr_create;
27973+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
27974+ struct au_wbr_create_operations *si_wbr_create_ops;
27975+
27976+ /* round robin */
27977+ atomic_t si_wbr_rr_next;
27978+
27979+ /* most free space */
27980+ struct au_wbr_mfs si_wbr_mfs;
27981+
076b876e
AM
27982+ /* File-based Hierarchical Storage Management */
27983+ struct au_fhsm si_fhsm;
27984+
1facf9fc 27985+ /* mount flags */
27986+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
27987+ unsigned int si_mntflags;
27988+
c2c0f25c
AM
27989+ /* symlink to follow_link() and put_link() */
27990+ struct au_sphlhead si_symlink;
27991+
1facf9fc 27992+ /* external inode number (bitmap and translation table) */
5527c038
JR
27993+ vfs_readf_t si_xread;
27994+ vfs_writef_t si_xwrite;
1facf9fc 27995+ struct file *si_xib;
27996+ struct mutex si_xib_mtx; /* protect xib members */
27997+ unsigned long *si_xib_buf;
27998+ unsigned long si_xib_last_pindex;
27999+ int si_xib_next_bit;
28000+ aufs_bindex_t si_xino_brid;
392086de
AM
28001+ unsigned long si_xino_jiffy;
28002+ unsigned long si_xino_expire;
1facf9fc 28003+ /* reserved for future use */
28004+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
28005+
28006+#ifdef CONFIG_AUFS_EXPORT
28007+ /* i_generation */
28008+ struct file *si_xigen;
28009+ atomic_t si_xigen_next;
28010+#endif
28011+
b912730e
AM
28012+ /* dirty trick to suppoer atomic_open */
28013+ struct au_sphlhead si_aopen;
28014+
1facf9fc 28015+ /* vdir parameters */
e49829fe 28016+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 28017+ unsigned int si_rdblk; /* deblk size */
28018+ unsigned int si_rdhash; /* hash size */
28019+
28020+ /*
28021+ * If the number of whiteouts are larger than si_dirwh, leave all of
28022+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
28023+ * future fsck.aufs or kernel thread will remove them later.
28024+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
28025+ */
28026+ unsigned int si_dirwh;
28027+
1facf9fc 28028+ /* pseudo_link list */
86dc4139 28029+ struct au_sphlhead si_plink[AuPlink_NHASH];
1facf9fc 28030+ wait_queue_head_t si_plink_wq;
4a4d8108 28031+ spinlock_t si_plink_maint_lock;
e49829fe 28032+ pid_t si_plink_maint_pid;
1facf9fc 28033+
523b37e3
AM
28034+ /* file list */
28035+ struct au_sphlhead si_files;
28036+
b95c5147
AM
28037+ /* with/without getattr, brother of sb->s_d_op */
28038+ struct inode_operations *si_iop_array;
28039+
1facf9fc 28040+ /*
28041+ * sysfs and lifetime management.
28042+ * this is not a small structure and it may be a waste of memory in case
28043+ * of sysfs is disabled, particulary when many aufs-es are mounted.
28044+ * but using sysfs is majority.
28045+ */
28046+ struct kobject si_kobj;
28047+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
28048+ struct dentry *si_dbgaufs;
28049+ struct dentry *si_dbgaufs_plink;
28050+ struct dentry *si_dbgaufs_xib;
1facf9fc 28051+#ifdef CONFIG_AUFS_EXPORT
28052+ struct dentry *si_dbgaufs_xigen;
28053+#endif
28054+#endif
28055+
e49829fe
JR
28056+#ifdef CONFIG_AUFS_SBILIST
28057+ struct list_head si_list;
28058+#endif
28059+
1facf9fc 28060+ /* dirty, necessary for unmounting, sysfs and sysrq */
28061+ struct super_block *si_sb;
28062+};
28063+
dece6358
AM
28064+/* sbinfo status flags */
28065+/*
28066+ * set true when refresh_dirs() failed at remount time.
28067+ * then try refreshing dirs at access time again.
28068+ * if it is false, refreshing dirs at access time is unnecesary
28069+ */
027c5e7a 28070+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 28071+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
79b8bda9 28072+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
28073+
28074+#ifndef CONFIG_AUFS_FHSM
28075+#undef AuSi_FHSM
28076+#define AuSi_FHSM 0
28077+#endif
28078+
dece6358
AM
28079+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
28080+ unsigned int flag)
28081+{
28082+ AuRwMustAnyLock(&sbi->si_rwsem);
28083+ return sbi->au_si_status & flag;
28084+}
28085+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
28086+#define au_fset_si(sbinfo, name) do { \
28087+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28088+ (sbinfo)->au_si_status |= AuSi_##name; \
28089+} while (0)
28090+#define au_fclr_si(sbinfo, name) do { \
28091+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28092+ (sbinfo)->au_si_status &= ~AuSi_##name; \
28093+} while (0)
28094+
1facf9fc 28095+/* ---------------------------------------------------------------------- */
28096+
28097+/* policy to select one among writable branches */
4a4d8108
AM
28098+#define AuWbrCopyup(sbinfo, ...) \
28099+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
28100+#define AuWbrCreate(sbinfo, ...) \
28101+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 28102+
28103+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
28104+#define AuLock_DW 1 /* write-lock dentry */
28105+#define AuLock_IR (1 << 1) /* read-lock inode */
28106+#define AuLock_IW (1 << 2) /* write-lock inode */
28107+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
b95c5147 28108+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
e49829fe
JR
28109+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
28110+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 28111+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 28112+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
28113+#define au_fset_lock(flags, name) \
28114+ do { (flags) |= AuLock_##name; } while (0)
28115+#define au_fclr_lock(flags, name) \
28116+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 28117+
28118+/* ---------------------------------------------------------------------- */
28119+
28120+/* super.c */
28121+extern struct file_system_type aufs_fs_type;
28122+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
79b8bda9
AM
28123+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
28124+ unsigned long long max, void *arg);
79b8bda9
AM
28125+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
28126+ struct super_block *sb, void *arg);
7f207e10
AM
28127+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
28128+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 28129+
28130+/* sbinfo.c */
28131+void au_si_free(struct kobject *kobj);
28132+int au_si_alloc(struct super_block *sb);
28133+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
28134+
28135+unsigned int au_sigen_inc(struct super_block *sb);
28136+aufs_bindex_t au_new_br_id(struct super_block *sb);
28137+
e49829fe
JR
28138+int si_read_lock(struct super_block *sb, int flags);
28139+int si_write_lock(struct super_block *sb, int flags);
28140+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 28141+void aufs_read_unlock(struct dentry *dentry, int flags);
28142+void aufs_write_lock(struct dentry *dentry);
28143+void aufs_write_unlock(struct dentry *dentry);
e49829fe 28144+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 28145+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28146+
b752ccd1
AM
28147+int si_pid_test_slow(struct super_block *sb);
28148+void si_pid_set_slow(struct super_block *sb);
28149+void si_pid_clr_slow(struct super_block *sb);
28150+
1facf9fc 28151+/* wbr_policy.c */
28152+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28153+extern struct au_wbr_create_operations au_wbr_create_ops[];
28154+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 28155+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
076b876e 28156+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart);
c2b27bf2
AM
28157+
28158+/* mvdown.c */
28159+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 28160+
076b876e
AM
28161+#ifdef CONFIG_AUFS_FHSM
28162+/* fhsm.c */
28163+
28164+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28165+{
28166+ pid_t pid;
28167+
28168+ spin_lock(&fhsm->fhsm_spin);
28169+ pid = fhsm->fhsm_pid;
28170+ spin_unlock(&fhsm->fhsm_spin);
28171+
28172+ return pid;
28173+}
28174+
28175+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28176+void au_fhsm_wrote_all(struct super_block *sb, int force);
28177+int au_fhsm_fd(struct super_block *sb, int oflags);
28178+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 28179+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
28180+void au_fhsm_fin(struct super_block *sb);
28181+void au_fhsm_init(struct au_sbinfo *sbinfo);
28182+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28183+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28184+#else
28185+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28186+ int force)
28187+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28188+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
28189+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28190+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28191+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
28192+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28193+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28194+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28195+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28196+#endif
28197+
1facf9fc 28198+/* ---------------------------------------------------------------------- */
28199+
28200+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28201+{
28202+ return sb->s_fs_info;
28203+}
28204+
28205+/* ---------------------------------------------------------------------- */
28206+
28207+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 28208+int au_test_nfsd(void);
1facf9fc 28209+void au_export_init(struct super_block *sb);
b752ccd1 28210+void au_xigen_inc(struct inode *inode);
1facf9fc 28211+int au_xigen_new(struct inode *inode);
28212+int au_xigen_set(struct super_block *sb, struct file *base);
28213+void au_xigen_clr(struct super_block *sb);
28214+
28215+static inline int au_busy_or_stale(void)
28216+{
b752ccd1 28217+ if (!au_test_nfsd())
1facf9fc 28218+ return -EBUSY;
28219+ return -ESTALE;
28220+}
28221+#else
b752ccd1 28222+AuStubInt0(au_test_nfsd, void)
a2a7ad62 28223+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 28224+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
28225+AuStubInt0(au_xigen_new, struct inode *inode)
28226+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28227+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 28228+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 28229+#endif /* CONFIG_AUFS_EXPORT */
28230+
28231+/* ---------------------------------------------------------------------- */
28232+
e49829fe
JR
28233+#ifdef CONFIG_AUFS_SBILIST
28234+/* module.c */
28235+extern struct au_splhead au_sbilist;
28236+
28237+static inline void au_sbilist_init(void)
28238+{
28239+ au_spl_init(&au_sbilist);
28240+}
28241+
28242+static inline void au_sbilist_add(struct super_block *sb)
28243+{
28244+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
28245+}
28246+
28247+static inline void au_sbilist_del(struct super_block *sb)
28248+{
28249+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
28250+}
53392da6
AM
28251+
28252+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28253+static inline void au_sbilist_lock(void)
28254+{
28255+ spin_lock(&au_sbilist.spin);
28256+}
28257+
28258+static inline void au_sbilist_unlock(void)
28259+{
28260+ spin_unlock(&au_sbilist.spin);
28261+}
28262+#define AuGFP_SBILIST GFP_ATOMIC
28263+#else
28264+AuStubVoid(au_sbilist_lock, void)
28265+AuStubVoid(au_sbilist_unlock, void)
28266+#define AuGFP_SBILIST GFP_NOFS
28267+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
28268+#else
28269+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
28270+AuStubVoid(au_sbilist_add, struct super_block *sb)
28271+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
28272+AuStubVoid(au_sbilist_lock, void)
28273+AuStubVoid(au_sbilist_unlock, void)
28274+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
28275+#endif
28276+
28277+/* ---------------------------------------------------------------------- */
28278+
1facf9fc 28279+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28280+{
dece6358 28281+ /*
c1595e42 28282+ * This function is a dynamic '__init' function actually,
dece6358
AM
28283+ * so the tiny check for si_rwsem is unnecessary.
28284+ */
28285+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 28286+#ifdef CONFIG_DEBUG_FS
28287+ sbinfo->si_dbgaufs = NULL;
86dc4139 28288+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 28289+ sbinfo->si_dbgaufs_xib = NULL;
28290+#ifdef CONFIG_AUFS_EXPORT
28291+ sbinfo->si_dbgaufs_xigen = NULL;
28292+#endif
28293+#endif
28294+}
28295+
28296+/* ---------------------------------------------------------------------- */
28297+
b752ccd1
AM
28298+static inline pid_t si_pid_bit(void)
28299+{
28300+ /* the origin of pid is 1, but the bitmap's is 0 */
28301+ return current->pid - 1;
28302+}
28303+
28304+static inline int si_pid_test(struct super_block *sb)
28305+{
076b876e
AM
28306+ pid_t bit;
28307+
28308+ bit = si_pid_bit();
b752ccd1
AM
28309+ if (bit < PID_MAX_DEFAULT)
28310+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
c1595e42 28311+ return si_pid_test_slow(sb);
b752ccd1
AM
28312+}
28313+
28314+static inline void si_pid_set(struct super_block *sb)
28315+{
076b876e
AM
28316+ pid_t bit;
28317+
28318+ bit = si_pid_bit();
b752ccd1
AM
28319+ if (bit < PID_MAX_DEFAULT) {
28320+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28321+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28322+ /* smp_mb(); */
28323+ } else
28324+ si_pid_set_slow(sb);
28325+}
28326+
28327+static inline void si_pid_clr(struct super_block *sb)
28328+{
076b876e
AM
28329+ pid_t bit;
28330+
28331+ bit = si_pid_bit();
b752ccd1
AM
28332+ if (bit < PID_MAX_DEFAULT) {
28333+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28334+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28335+ /* smp_mb(); */
28336+ } else
28337+ si_pid_clr_slow(sb);
28338+}
28339+
28340+/* ---------------------------------------------------------------------- */
28341+
1facf9fc 28342+/* lock superblock. mainly for entry point functions */
28343+/*
b752ccd1
AM
28344+ * __si_read_lock, __si_write_lock,
28345+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 28346+ */
b752ccd1 28347+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 28348+
dece6358
AM
28349+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28350+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28351+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28352+
b752ccd1
AM
28353+static inline void si_noflush_read_lock(struct super_block *sb)
28354+{
28355+ __si_read_lock(sb);
28356+ si_pid_set(sb);
28357+}
28358+
28359+static inline int si_noflush_read_trylock(struct super_block *sb)
28360+{
076b876e
AM
28361+ int locked;
28362+
28363+ locked = __si_read_trylock(sb);
b752ccd1
AM
28364+ if (locked)
28365+ si_pid_set(sb);
28366+ return locked;
28367+}
28368+
28369+static inline void si_noflush_write_lock(struct super_block *sb)
28370+{
28371+ __si_write_lock(sb);
28372+ si_pid_set(sb);
28373+}
28374+
28375+static inline int si_noflush_write_trylock(struct super_block *sb)
28376+{
076b876e
AM
28377+ int locked;
28378+
28379+ locked = __si_write_trylock(sb);
b752ccd1
AM
28380+ if (locked)
28381+ si_pid_set(sb);
28382+ return locked;
28383+}
28384+
7e9cd9fe 28385+#if 0 /* reserved */
1facf9fc 28386+static inline int si_read_trylock(struct super_block *sb, int flags)
28387+{
28388+ if (au_ftest_lock(flags, FLUSH))
28389+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28390+ return si_noflush_read_trylock(sb);
28391+}
e49829fe 28392+#endif
1facf9fc 28393+
b752ccd1
AM
28394+static inline void si_read_unlock(struct super_block *sb)
28395+{
28396+ si_pid_clr(sb);
28397+ __si_read_unlock(sb);
28398+}
28399+
7e9cd9fe 28400+#if 0 /* reserved */
1facf9fc 28401+static inline int si_write_trylock(struct super_block *sb, int flags)
28402+{
28403+ if (au_ftest_lock(flags, FLUSH))
28404+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28405+ return si_noflush_write_trylock(sb);
28406+}
b752ccd1
AM
28407+#endif
28408+
28409+static inline void si_write_unlock(struct super_block *sb)
28410+{
28411+ si_pid_clr(sb);
28412+ __si_write_unlock(sb);
28413+}
28414+
7e9cd9fe 28415+#if 0 /* reserved */
b752ccd1
AM
28416+static inline void si_downgrade_lock(struct super_block *sb)
28417+{
28418+ __si_downgrade_lock(sb);
28419+}
28420+#endif
1facf9fc 28421+
28422+/* ---------------------------------------------------------------------- */
28423+
28424+static inline aufs_bindex_t au_sbend(struct super_block *sb)
28425+{
dece6358 28426+ SiMustAnyLock(sb);
1facf9fc 28427+ return au_sbi(sb)->si_bend;
28428+}
28429+
28430+static inline unsigned int au_mntflags(struct super_block *sb)
28431+{
dece6358 28432+ SiMustAnyLock(sb);
1facf9fc 28433+ return au_sbi(sb)->si_mntflags;
28434+}
28435+
28436+static inline unsigned int au_sigen(struct super_block *sb)
28437+{
dece6358 28438+ SiMustAnyLock(sb);
1facf9fc 28439+ return au_sbi(sb)->si_generation;
28440+}
28441+
7f207e10
AM
28442+static inline void au_ninodes_inc(struct super_block *sb)
28443+{
28444+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
28445+}
28446+
28447+static inline void au_ninodes_dec(struct super_block *sb)
28448+{
28449+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
28450+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
28451+}
28452+
28453+static inline void au_nfiles_inc(struct super_block *sb)
28454+{
28455+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
28456+}
28457+
28458+static inline void au_nfiles_dec(struct super_block *sb)
28459+{
28460+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
28461+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
28462+}
28463+
1facf9fc 28464+static inline struct au_branch *au_sbr(struct super_block *sb,
28465+ aufs_bindex_t bindex)
28466+{
dece6358 28467+ SiMustAnyLock(sb);
1facf9fc 28468+ return au_sbi(sb)->si_branch[0 + bindex];
28469+}
28470+
28471+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28472+{
dece6358 28473+ SiMustWriteLock(sb);
1facf9fc 28474+ au_sbi(sb)->si_xino_brid = brid;
28475+}
28476+
28477+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28478+{
dece6358 28479+ SiMustAnyLock(sb);
1facf9fc 28480+ return au_sbi(sb)->si_xino_brid;
28481+}
28482+
28483+#endif /* __KERNEL__ */
28484+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
28485diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28486--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 28487+++ linux/fs/aufs/sysaufs.c 2016-02-28 11:26:32.573304539 +0100
523b37e3 28488@@ -0,0 +1,104 @@
1facf9fc 28489+/*
8cdd5066 28490+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28491+ *
28492+ * This program, aufs is free software; you can redistribute it and/or modify
28493+ * it under the terms of the GNU General Public License as published by
28494+ * the Free Software Foundation; either version 2 of the License, or
28495+ * (at your option) any later version.
dece6358
AM
28496+ *
28497+ * This program is distributed in the hope that it will be useful,
28498+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28499+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28500+ * GNU General Public License for more details.
28501+ *
28502+ * You should have received a copy of the GNU General Public License
523b37e3 28503+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28504+ */
28505+
28506+/*
28507+ * sysfs interface and lifetime management
28508+ * they are necessary regardless sysfs is disabled.
28509+ */
28510+
1facf9fc 28511+#include <linux/random.h>
1facf9fc 28512+#include "aufs.h"
28513+
28514+unsigned long sysaufs_si_mask;
e49829fe 28515+struct kset *sysaufs_kset;
1facf9fc 28516+
28517+#define AuSiAttr(_name) { \
28518+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28519+ .show = sysaufs_si_##_name, \
28520+}
28521+
28522+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28523+struct attribute *sysaufs_si_attrs[] = {
28524+ &sysaufs_si_attr_xi_path.attr,
28525+ NULL,
28526+};
28527+
4a4d8108 28528+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 28529+ .show = sysaufs_si_show
28530+};
28531+
28532+static struct kobj_type au_sbi_ktype = {
28533+ .release = au_si_free,
28534+ .sysfs_ops = &au_sbi_ops,
28535+ .default_attrs = sysaufs_si_attrs
28536+};
28537+
28538+/* ---------------------------------------------------------------------- */
28539+
28540+int sysaufs_si_init(struct au_sbinfo *sbinfo)
28541+{
28542+ int err;
28543+
e49829fe 28544+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 28545+ /* cf. sysaufs_name() */
28546+ err = kobject_init_and_add
e49829fe 28547+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 28548+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
28549+
28550+ dbgaufs_si_null(sbinfo);
28551+ if (!err) {
28552+ err = dbgaufs_si_init(sbinfo);
28553+ if (unlikely(err))
28554+ kobject_put(&sbinfo->si_kobj);
28555+ }
28556+ return err;
28557+}
28558+
28559+void sysaufs_fin(void)
28560+{
28561+ dbgaufs_fin();
e49829fe
JR
28562+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
28563+ kset_unregister(sysaufs_kset);
1facf9fc 28564+}
28565+
28566+int __init sysaufs_init(void)
28567+{
28568+ int err;
28569+
28570+ do {
28571+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
28572+ } while (!sysaufs_si_mask);
28573+
4a4d8108 28574+ err = -EINVAL;
e49829fe
JR
28575+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
28576+ if (unlikely(!sysaufs_kset))
4a4d8108 28577+ goto out;
e49829fe
JR
28578+ err = PTR_ERR(sysaufs_kset);
28579+ if (IS_ERR(sysaufs_kset))
1facf9fc 28580+ goto out;
e49829fe 28581+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 28582+ if (unlikely(err)) {
e49829fe 28583+ kset_unregister(sysaufs_kset);
1facf9fc 28584+ goto out;
28585+ }
28586+
28587+ err = dbgaufs_init();
28588+ if (unlikely(err))
28589+ sysaufs_fin();
4f0767ce 28590+out:
1facf9fc 28591+ return err;
28592+}
7f207e10
AM
28593diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
28594--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 28595+++ linux/fs/aufs/sysaufs.h 2016-02-28 11:26:32.573304539 +0100
c1595e42 28596@@ -0,0 +1,101 @@
1facf9fc 28597+/*
8cdd5066 28598+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28599+ *
28600+ * This program, aufs is free software; you can redistribute it and/or modify
28601+ * it under the terms of the GNU General Public License as published by
28602+ * the Free Software Foundation; either version 2 of the License, or
28603+ * (at your option) any later version.
dece6358
AM
28604+ *
28605+ * This program is distributed in the hope that it will be useful,
28606+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28607+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28608+ * GNU General Public License for more details.
28609+ *
28610+ * You should have received a copy of the GNU General Public License
523b37e3 28611+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28612+ */
28613+
28614+/*
28615+ * sysfs interface and mount lifetime management
28616+ */
28617+
28618+#ifndef __SYSAUFS_H__
28619+#define __SYSAUFS_H__
28620+
28621+#ifdef __KERNEL__
28622+
1facf9fc 28623+#include <linux/sysfs.h>
1facf9fc 28624+#include "module.h"
28625+
dece6358
AM
28626+struct super_block;
28627+struct au_sbinfo;
28628+
1facf9fc 28629+struct sysaufs_si_attr {
28630+ struct attribute attr;
28631+ int (*show)(struct seq_file *seq, struct super_block *sb);
28632+};
28633+
28634+/* ---------------------------------------------------------------------- */
28635+
28636+/* sysaufs.c */
28637+extern unsigned long sysaufs_si_mask;
e49829fe 28638+extern struct kset *sysaufs_kset;
1facf9fc 28639+extern struct attribute *sysaufs_si_attrs[];
28640+int sysaufs_si_init(struct au_sbinfo *sbinfo);
28641+int __init sysaufs_init(void);
28642+void sysaufs_fin(void);
28643+
28644+/* ---------------------------------------------------------------------- */
28645+
28646+/* some people doesn't like to show a pointer in kernel */
28647+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
28648+{
28649+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
28650+}
28651+
28652+#define SysaufsSiNamePrefix "si_"
28653+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
28654+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
28655+{
28656+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
28657+ sysaufs_si_id(sbinfo));
28658+}
28659+
28660+struct au_branch;
28661+#ifdef CONFIG_SYSFS
28662+/* sysfs.c */
28663+extern struct attribute_group *sysaufs_attr_group;
28664+
28665+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
28666+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
28667+ char *buf);
076b876e
AM
28668+long au_brinfo_ioctl(struct file *file, unsigned long arg);
28669+#ifdef CONFIG_COMPAT
28670+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
28671+#endif
1facf9fc 28672+
28673+void sysaufs_br_init(struct au_branch *br);
28674+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
28675+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
28676+
28677+#define sysaufs_brs_init() do {} while (0)
28678+
28679+#else
28680+#define sysaufs_attr_group NULL
28681+
4a4d8108 28682+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
28683+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
28684+ struct attribute *attr, char *buf)
4a4d8108
AM
28685+AuStubVoid(sysaufs_br_init, struct au_branch *br)
28686+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
28687+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 28688+
28689+static inline void sysaufs_brs_init(void)
28690+{
28691+ sysaufs_brs = 0;
28692+}
28693+
28694+#endif /* CONFIG_SYSFS */
28695+
28696+#endif /* __KERNEL__ */
28697+#endif /* __SYSAUFS_H__ */
7f207e10
AM
28698diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
28699--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 28700+++ linux/fs/aufs/sysfs.c 2016-02-28 11:26:32.573304539 +0100
79b8bda9 28701@@ -0,0 +1,376 @@
1facf9fc 28702+/*
8cdd5066 28703+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28704+ *
28705+ * This program, aufs is free software; you can redistribute it and/or modify
28706+ * it under the terms of the GNU General Public License as published by
28707+ * the Free Software Foundation; either version 2 of the License, or
28708+ * (at your option) any later version.
dece6358
AM
28709+ *
28710+ * This program is distributed in the hope that it will be useful,
28711+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28712+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28713+ * GNU General Public License for more details.
28714+ *
28715+ * You should have received a copy of the GNU General Public License
523b37e3 28716+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28717+ */
28718+
28719+/*
28720+ * sysfs interface
28721+ */
28722+
076b876e 28723+#include <linux/compat.h>
1facf9fc 28724+#include <linux/seq_file.h>
1facf9fc 28725+#include "aufs.h"
28726+
4a4d8108
AM
28727+#ifdef CONFIG_AUFS_FS_MODULE
28728+/* this entry violates the "one line per file" policy of sysfs */
28729+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
28730+ char *buf)
28731+{
28732+ ssize_t err;
28733+ static char *conf =
28734+/* this file is generated at compiling */
28735+#include "conf.str"
28736+ ;
28737+
28738+ err = snprintf(buf, PAGE_SIZE, conf);
28739+ if (unlikely(err >= PAGE_SIZE))
28740+ err = -EFBIG;
28741+ return err;
28742+}
28743+
28744+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
28745+#endif
28746+
1facf9fc 28747+static struct attribute *au_attr[] = {
4a4d8108
AM
28748+#ifdef CONFIG_AUFS_FS_MODULE
28749+ &au_config_attr.attr,
28750+#endif
1facf9fc 28751+ NULL, /* need to NULL terminate the list of attributes */
28752+};
28753+
28754+static struct attribute_group sysaufs_attr_group_body = {
28755+ .attrs = au_attr
28756+};
28757+
28758+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
28759+
28760+/* ---------------------------------------------------------------------- */
28761+
28762+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
28763+{
28764+ int err;
28765+
dece6358
AM
28766+ SiMustAnyLock(sb);
28767+
1facf9fc 28768+ err = 0;
28769+ if (au_opt_test(au_mntflags(sb), XINO)) {
28770+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
28771+ seq_putc(seq, '\n');
28772+ }
28773+ return err;
28774+}
28775+
28776+/*
28777+ * the lifetime of branch is independent from the entry under sysfs.
28778+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
28779+ * unlinked.
28780+ */
28781+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 28782+ aufs_bindex_t bindex, int idx)
1facf9fc 28783+{
1e00d052 28784+ int err;
1facf9fc 28785+ struct path path;
28786+ struct dentry *root;
28787+ struct au_branch *br;
076b876e 28788+ au_br_perm_str_t perm;
1facf9fc 28789+
28790+ AuDbg("b%d\n", bindex);
28791+
1e00d052 28792+ err = 0;
1facf9fc 28793+ root = sb->s_root;
28794+ di_read_lock_parent(root, !AuLock_IR);
28795+ br = au_sbr(sb, bindex);
392086de
AM
28796+
28797+ switch (idx) {
28798+ case AuBrSysfs_BR:
28799+ path.mnt = au_br_mnt(br);
28800+ path.dentry = au_h_dptr(root, bindex);
79b8bda9
AM
28801+ err = au_seq_path(seq, &path);
28802+ if (!err) {
28803+ au_optstr_br_perm(&perm, br->br_perm);
28804+ seq_printf(seq, "=%s\n", perm.a);
28805+ }
392086de
AM
28806+ break;
28807+ case AuBrSysfs_BRID:
79b8bda9 28808+ seq_printf(seq, "%d\n", br->br_id);
392086de
AM
28809+ break;
28810+ }
076b876e 28811+ di_read_unlock(root, !AuLock_IR);
79b8bda9 28812+ if (unlikely(err || seq_has_overflowed(seq)))
076b876e 28813+ err = -E2BIG;
392086de 28814+
1e00d052 28815+ return err;
1facf9fc 28816+}
28817+
28818+/* ---------------------------------------------------------------------- */
28819+
28820+static struct seq_file *au_seq(char *p, ssize_t len)
28821+{
28822+ struct seq_file *seq;
28823+
28824+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
28825+ if (seq) {
28826+ /* mutex_init(&seq.lock); */
28827+ seq->buf = p;
28828+ seq->size = len;
28829+ return seq; /* success */
28830+ }
28831+
28832+ seq = ERR_PTR(-ENOMEM);
28833+ return seq;
28834+}
28835+
392086de
AM
28836+#define SysaufsBr_PREFIX "br"
28837+#define SysaufsBrid_PREFIX "brid"
1facf9fc 28838+
28839+/* todo: file size may exceed PAGE_SIZE */
28840+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 28841+ char *buf)
1facf9fc 28842+{
28843+ ssize_t err;
392086de 28844+ int idx;
1facf9fc 28845+ long l;
28846+ aufs_bindex_t bend;
28847+ struct au_sbinfo *sbinfo;
28848+ struct super_block *sb;
28849+ struct seq_file *seq;
28850+ char *name;
28851+ struct attribute **cattr;
28852+
28853+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
28854+ sb = sbinfo->si_sb;
1308ab2a 28855+
28856+ /*
28857+ * prevent a race condition between sysfs and aufs.
28858+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
28859+ * prohibits maintaining the sysfs entries.
28860+ * hew we acquire read lock after sysfs_get_active_two().
28861+ * on the other hand, the remount process may maintain the sysfs/aufs
28862+ * entries after acquiring write lock.
28863+ * it can cause a deadlock.
28864+ * simply we gave up processing read here.
28865+ */
28866+ err = -EBUSY;
28867+ if (unlikely(!si_noflush_read_trylock(sb)))
28868+ goto out;
1facf9fc 28869+
28870+ seq = au_seq(buf, PAGE_SIZE);
28871+ err = PTR_ERR(seq);
28872+ if (IS_ERR(seq))
1308ab2a 28873+ goto out_unlock;
1facf9fc 28874+
28875+ name = (void *)attr->name;
28876+ cattr = sysaufs_si_attrs;
28877+ while (*cattr) {
28878+ if (!strcmp(name, (*cattr)->name)) {
28879+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
28880+ ->show(seq, sb);
28881+ goto out_seq;
28882+ }
28883+ cattr++;
28884+ }
28885+
392086de
AM
28886+ if (!strncmp(name, SysaufsBrid_PREFIX,
28887+ sizeof(SysaufsBrid_PREFIX) - 1)) {
28888+ idx = AuBrSysfs_BRID;
28889+ name += sizeof(SysaufsBrid_PREFIX) - 1;
28890+ } else if (!strncmp(name, SysaufsBr_PREFIX,
28891+ sizeof(SysaufsBr_PREFIX) - 1)) {
28892+ idx = AuBrSysfs_BR;
1facf9fc 28893+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
28894+ } else
28895+ BUG();
28896+
28897+ err = kstrtol(name, 10, &l);
28898+ if (!err) {
28899+ bend = au_sbend(sb);
28900+ if (l <= bend)
28901+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
28902+ else
28903+ err = -ENOENT;
1facf9fc 28904+ }
1facf9fc 28905+
4f0767ce 28906+out_seq:
1facf9fc 28907+ if (!err) {
28908+ err = seq->count;
28909+ /* sysfs limit */
28910+ if (unlikely(err == PAGE_SIZE))
28911+ err = -EFBIG;
28912+ }
28913+ kfree(seq);
4f0767ce 28914+out_unlock:
1facf9fc 28915+ si_read_unlock(sb);
4f0767ce 28916+out:
1facf9fc 28917+ return err;
28918+}
28919+
28920+/* ---------------------------------------------------------------------- */
28921+
076b876e
AM
28922+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
28923+{
28924+ int err;
28925+ int16_t brid;
28926+ aufs_bindex_t bindex, bend;
28927+ size_t sz;
28928+ char *buf;
28929+ struct seq_file *seq;
28930+ struct au_branch *br;
28931+
28932+ si_read_lock(sb, AuLock_FLUSH);
28933+ bend = au_sbend(sb);
28934+ err = bend + 1;
28935+ if (!arg)
28936+ goto out;
28937+
28938+ err = -ENOMEM;
28939+ buf = (void *)__get_free_page(GFP_NOFS);
28940+ if (unlikely(!buf))
28941+ goto out;
28942+
28943+ seq = au_seq(buf, PAGE_SIZE);
28944+ err = PTR_ERR(seq);
28945+ if (IS_ERR(seq))
28946+ goto out_buf;
28947+
28948+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
28949+ for (bindex = 0; bindex <= bend; bindex++, arg++) {
28950+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
28951+ if (unlikely(err))
28952+ break;
28953+
28954+ br = au_sbr(sb, bindex);
28955+ brid = br->br_id;
28956+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
28957+ err = __put_user(brid, &arg->id);
28958+ if (unlikely(err))
28959+ break;
28960+
28961+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
28962+ err = __put_user(br->br_perm, &arg->perm);
28963+ if (unlikely(err))
28964+ break;
28965+
79b8bda9
AM
28966+ err = au_seq_path(seq, &br->br_path);
28967+ if (unlikely(err))
28968+ break;
28969+ seq_putc(seq, '\0');
28970+ if (!seq_has_overflowed(seq)) {
076b876e
AM
28971+ err = copy_to_user(arg->path, seq->buf, seq->count);
28972+ seq->count = 0;
28973+ if (unlikely(err))
28974+ break;
28975+ } else {
28976+ err = -E2BIG;
28977+ goto out_seq;
28978+ }
28979+ }
28980+ if (unlikely(err))
28981+ err = -EFAULT;
28982+
28983+out_seq:
28984+ kfree(seq);
28985+out_buf:
28986+ free_page((unsigned long)buf);
28987+out:
28988+ si_read_unlock(sb);
28989+ return err;
28990+}
28991+
28992+long au_brinfo_ioctl(struct file *file, unsigned long arg)
28993+{
2000de60 28994+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
076b876e
AM
28995+}
28996+
28997+#ifdef CONFIG_COMPAT
28998+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
28999+{
2000de60 29000+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
076b876e
AM
29001+}
29002+#endif
29003+
29004+/* ---------------------------------------------------------------------- */
29005+
1facf9fc 29006+void sysaufs_br_init(struct au_branch *br)
29007+{
392086de
AM
29008+ int i;
29009+ struct au_brsysfs *br_sysfs;
29010+ struct attribute *attr;
4a4d8108 29011+
392086de
AM
29012+ br_sysfs = br->br_sysfs;
29013+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29014+ attr = &br_sysfs->attr;
29015+ sysfs_attr_init(attr);
29016+ attr->name = br_sysfs->name;
29017+ attr->mode = S_IRUGO;
29018+ br_sysfs++;
29019+ }
1facf9fc 29020+}
29021+
29022+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
29023+{
29024+ struct au_branch *br;
29025+ struct kobject *kobj;
392086de
AM
29026+ struct au_brsysfs *br_sysfs;
29027+ int i;
1facf9fc 29028+ aufs_bindex_t bend;
29029+
29030+ dbgaufs_brs_del(sb, bindex);
29031+
29032+ if (!sysaufs_brs)
29033+ return;
29034+
29035+ kobj = &au_sbi(sb)->si_kobj;
29036+ bend = au_sbend(sb);
29037+ for (; bindex <= bend; bindex++) {
29038+ br = au_sbr(sb, bindex);
392086de
AM
29039+ br_sysfs = br->br_sysfs;
29040+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29041+ sysfs_remove_file(kobj, &br_sysfs->attr);
29042+ br_sysfs++;
29043+ }
1facf9fc 29044+ }
29045+}
29046+
29047+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
29048+{
392086de 29049+ int err, i;
1facf9fc 29050+ aufs_bindex_t bend;
29051+ struct kobject *kobj;
29052+ struct au_branch *br;
392086de 29053+ struct au_brsysfs *br_sysfs;
1facf9fc 29054+
29055+ dbgaufs_brs_add(sb, bindex);
29056+
29057+ if (!sysaufs_brs)
29058+ return;
29059+
29060+ kobj = &au_sbi(sb)->si_kobj;
29061+ bend = au_sbend(sb);
29062+ for (; bindex <= bend; bindex++) {
29063+ br = au_sbr(sb, bindex);
392086de
AM
29064+ br_sysfs = br->br_sysfs;
29065+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
29066+ SysaufsBr_PREFIX "%d", bindex);
29067+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
29068+ SysaufsBrid_PREFIX "%d", bindex);
29069+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29070+ err = sysfs_create_file(kobj, &br_sysfs->attr);
29071+ if (unlikely(err))
29072+ pr_warn("failed %s under sysfs(%d)\n",
29073+ br_sysfs->name, err);
29074+ br_sysfs++;
29075+ }
1facf9fc 29076+ }
29077+}
7f207e10
AM
29078diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
29079--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 29080+++ linux/fs/aufs/sysrq.c 2016-02-28 11:26:32.573304539 +0100
076b876e 29081@@ -0,0 +1,157 @@
1facf9fc 29082+/*
8cdd5066 29083+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29084+ *
29085+ * This program, aufs is free software; you can redistribute it and/or modify
29086+ * it under the terms of the GNU General Public License as published by
29087+ * the Free Software Foundation; either version 2 of the License, or
29088+ * (at your option) any later version.
dece6358
AM
29089+ *
29090+ * This program is distributed in the hope that it will be useful,
29091+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29092+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29093+ * GNU General Public License for more details.
29094+ *
29095+ * You should have received a copy of the GNU General Public License
523b37e3 29096+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29097+ */
29098+
29099+/*
29100+ * magic sysrq hanlder
29101+ */
29102+
1facf9fc 29103+/* #include <linux/sysrq.h> */
027c5e7a 29104+#include <linux/writeback.h>
1facf9fc 29105+#include "aufs.h"
29106+
29107+/* ---------------------------------------------------------------------- */
29108+
29109+static void sysrq_sb(struct super_block *sb)
29110+{
29111+ char *plevel;
29112+ struct au_sbinfo *sbinfo;
29113+ struct file *file;
523b37e3
AM
29114+ struct au_sphlhead *files;
29115+ struct au_finfo *finfo;
1facf9fc 29116+
29117+ plevel = au_plevel;
29118+ au_plevel = KERN_WARNING;
1facf9fc 29119+
4a4d8108 29120+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
29121+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
29122+
29123+ sbinfo = au_sbi(sb);
4a4d8108 29124+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 29125+ pr("superblock\n");
1facf9fc 29126+ au_dpri_sb(sb);
027c5e7a
AM
29127+
29128+#if 0
c06a8ce3 29129+ pr("root dentry\n");
1facf9fc 29130+ au_dpri_dentry(sb->s_root);
c06a8ce3 29131+ pr("root inode\n");
5527c038 29132+ au_dpri_inode(d_inode(sb->s_root));
027c5e7a
AM
29133+#endif
29134+
1facf9fc 29135+#if 0
027c5e7a
AM
29136+ do {
29137+ int err, i, j, ndentry;
29138+ struct au_dcsub_pages dpages;
29139+ struct au_dpage *dpage;
29140+
29141+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29142+ if (unlikely(err))
29143+ break;
29144+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29145+ if (!err)
29146+ for (i = 0; i < dpages.ndpage; i++) {
29147+ dpage = dpages.dpages + i;
29148+ ndentry = dpage->ndentry;
29149+ for (j = 0; j < ndentry; j++)
29150+ au_dpri_dentry(dpage->dentries[j]);
29151+ }
29152+ au_dpages_free(&dpages);
29153+ } while (0);
29154+#endif
29155+
29156+#if 1
29157+ {
29158+ struct inode *i;
076b876e 29159+
c06a8ce3 29160+ pr("isolated inode\n");
79b8bda9 29161+ spin_lock(&sb->s_inode_list_lock);
2cbb1c4b
JR
29162+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29163+ spin_lock(&i->i_lock);
b4510431 29164+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 29165+ au_dpri_inode(i);
2cbb1c4b
JR
29166+ spin_unlock(&i->i_lock);
29167+ }
79b8bda9 29168+ spin_unlock(&sb->s_inode_list_lock);
027c5e7a 29169+ }
1facf9fc 29170+#endif
c06a8ce3 29171+ pr("files\n");
523b37e3
AM
29172+ files = &au_sbi(sb)->si_files;
29173+ spin_lock(&files->spin);
29174+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
4a4d8108 29175+ umode_t mode;
076b876e 29176+
523b37e3 29177+ file = finfo->fi_file;
c06a8ce3 29178+ mode = file_inode(file)->i_mode;
38d290e6 29179+ if (!special_file(mode))
1facf9fc 29180+ au_dpri_file(file);
523b37e3
AM
29181+ }
29182+ spin_unlock(&files->spin);
c06a8ce3 29183+ pr("done\n");
1facf9fc 29184+
c06a8ce3 29185+#undef pr
1facf9fc 29186+ au_plevel = plevel;
1facf9fc 29187+}
29188+
29189+/* ---------------------------------------------------------------------- */
29190+
29191+/* module parameter */
29192+static char *aufs_sysrq_key = "a";
29193+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29194+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29195+
0c5527e5 29196+static void au_sysrq(int key __maybe_unused)
1facf9fc 29197+{
1facf9fc 29198+ struct au_sbinfo *sbinfo;
29199+
027c5e7a 29200+ lockdep_off();
53392da6 29201+ au_sbilist_lock();
e49829fe 29202+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 29203+ sysrq_sb(sbinfo->si_sb);
53392da6 29204+ au_sbilist_unlock();
027c5e7a 29205+ lockdep_on();
1facf9fc 29206+}
29207+
29208+static struct sysrq_key_op au_sysrq_op = {
29209+ .handler = au_sysrq,
29210+ .help_msg = "Aufs",
29211+ .action_msg = "Aufs",
29212+ .enable_mask = SYSRQ_ENABLE_DUMP
29213+};
29214+
29215+/* ---------------------------------------------------------------------- */
29216+
29217+int __init au_sysrq_init(void)
29218+{
29219+ int err;
29220+ char key;
29221+
29222+ err = -1;
29223+ key = *aufs_sysrq_key;
29224+ if ('a' <= key && key <= 'z')
29225+ err = register_sysrq_key(key, &au_sysrq_op);
29226+ if (unlikely(err))
4a4d8108 29227+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 29228+ return err;
29229+}
29230+
29231+void au_sysrq_fin(void)
29232+{
29233+ int err;
076b876e 29234+
1facf9fc 29235+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29236+ if (unlikely(err))
4a4d8108 29237+ pr_err("err %d (ignored)\n", err);
1facf9fc 29238+}
7f207e10
AM
29239diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29240--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 29241+++ linux/fs/aufs/vdir.c 2016-02-28 11:26:32.573304539 +0100
b912730e 29242@@ -0,0 +1,888 @@
1facf9fc 29243+/*
8cdd5066 29244+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29245+ *
29246+ * This program, aufs is free software; you can redistribute it and/or modify
29247+ * it under the terms of the GNU General Public License as published by
29248+ * the Free Software Foundation; either version 2 of the License, or
29249+ * (at your option) any later version.
dece6358
AM
29250+ *
29251+ * This program is distributed in the hope that it will be useful,
29252+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29253+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29254+ * GNU General Public License for more details.
29255+ *
29256+ * You should have received a copy of the GNU General Public License
523b37e3 29257+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29258+ */
29259+
29260+/*
29261+ * virtual or vertical directory
29262+ */
29263+
29264+#include "aufs.h"
29265+
dece6358 29266+static unsigned int calc_size(int nlen)
1facf9fc 29267+{
dece6358 29268+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 29269+}
29270+
29271+static int set_deblk_end(union au_vdir_deblk_p *p,
29272+ union au_vdir_deblk_p *deblk_end)
29273+{
29274+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29275+ p->de->de_str.len = 0;
29276+ /* smp_mb(); */
29277+ return 0;
29278+ }
29279+ return -1; /* error */
29280+}
29281+
29282+/* returns true or false */
29283+static int is_deblk_end(union au_vdir_deblk_p *p,
29284+ union au_vdir_deblk_p *deblk_end)
29285+{
29286+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29287+ return !p->de->de_str.len;
29288+ return 1;
29289+}
29290+
29291+static unsigned char *last_deblk(struct au_vdir *vdir)
29292+{
29293+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29294+}
29295+
29296+/* ---------------------------------------------------------------------- */
29297+
79b8bda9 29298+/* estimate the appropriate size for name hash table */
1308ab2a 29299+unsigned int au_rdhash_est(loff_t sz)
29300+{
29301+ unsigned int n;
29302+
29303+ n = UINT_MAX;
29304+ sz >>= 10;
29305+ if (sz < n)
29306+ n = sz;
29307+ if (sz < AUFS_RDHASH_DEF)
29308+ n = AUFS_RDHASH_DEF;
4a4d8108 29309+ /* pr_info("n %u\n", n); */
1308ab2a 29310+ return n;
29311+}
29312+
1facf9fc 29313+/*
29314+ * the allocated memory has to be freed by
dece6358 29315+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 29316+ */
dece6358 29317+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 29318+{
1facf9fc 29319+ struct hlist_head *head;
dece6358 29320+ unsigned int u;
076b876e 29321+ size_t sz;
1facf9fc 29322+
076b876e
AM
29323+ sz = sizeof(*nhash->nh_head) * num_hash;
29324+ head = kmalloc(sz, gfp);
dece6358
AM
29325+ if (head) {
29326+ nhash->nh_num = num_hash;
29327+ nhash->nh_head = head;
29328+ for (u = 0; u < num_hash; u++)
1facf9fc 29329+ INIT_HLIST_HEAD(head++);
dece6358 29330+ return 0; /* success */
1facf9fc 29331+ }
1facf9fc 29332+
dece6358 29333+ return -ENOMEM;
1facf9fc 29334+}
29335+
dece6358
AM
29336+static void nhash_count(struct hlist_head *head)
29337+{
29338+#if 0
29339+ unsigned long n;
29340+ struct hlist_node *pos;
29341+
29342+ n = 0;
29343+ hlist_for_each(pos, head)
29344+ n++;
4a4d8108 29345+ pr_info("%lu\n", n);
dece6358
AM
29346+#endif
29347+}
29348+
29349+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 29350+{
c06a8ce3
AM
29351+ struct au_vdir_wh *pos;
29352+ struct hlist_node *node;
1facf9fc 29353+
c06a8ce3
AM
29354+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
29355+ kfree(pos);
1facf9fc 29356+}
29357+
dece6358 29358+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 29359+{
c06a8ce3
AM
29360+ struct au_vdir_dehstr *pos;
29361+ struct hlist_node *node;
1facf9fc 29362+
c06a8ce3
AM
29363+ hlist_for_each_entry_safe(pos, node, head, hash)
29364+ au_cache_free_vdir_dehstr(pos);
1facf9fc 29365+}
29366+
dece6358
AM
29367+static void au_nhash_do_free(struct au_nhash *nhash,
29368+ void (*free)(struct hlist_head *head))
1facf9fc 29369+{
1308ab2a 29370+ unsigned int n;
1facf9fc 29371+ struct hlist_head *head;
1facf9fc 29372+
dece6358 29373+ n = nhash->nh_num;
1308ab2a 29374+ if (!n)
29375+ return;
29376+
dece6358 29377+ head = nhash->nh_head;
1308ab2a 29378+ while (n-- > 0) {
dece6358
AM
29379+ nhash_count(head);
29380+ free(head++);
1facf9fc 29381+ }
dece6358 29382+ kfree(nhash->nh_head);
1facf9fc 29383+}
29384+
dece6358 29385+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 29386+{
dece6358
AM
29387+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29388+}
1facf9fc 29389+
dece6358
AM
29390+static void au_nhash_de_free(struct au_nhash *delist)
29391+{
29392+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 29393+}
29394+
29395+/* ---------------------------------------------------------------------- */
29396+
29397+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29398+ int limit)
29399+{
29400+ int num;
29401+ unsigned int u, n;
29402+ struct hlist_head *head;
c06a8ce3 29403+ struct au_vdir_wh *pos;
1facf9fc 29404+
29405+ num = 0;
29406+ n = whlist->nh_num;
29407+ head = whlist->nh_head;
1308ab2a 29408+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
29409+ hlist_for_each_entry(pos, head, wh_hash)
29410+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 29411+ return 1;
1facf9fc 29412+ return 0;
29413+}
29414+
29415+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 29416+ unsigned char *name,
1facf9fc 29417+ unsigned int len)
29418+{
dece6358
AM
29419+ unsigned int v;
29420+ /* const unsigned int magic_bit = 12; */
29421+
1308ab2a 29422+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29423+
dece6358
AM
29424+ v = 0;
29425+ while (len--)
29426+ v += *name++;
29427+ /* v = hash_long(v, magic_bit); */
29428+ v %= nhash->nh_num;
29429+ return nhash->nh_head + v;
29430+}
29431+
29432+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29433+ int nlen)
29434+{
29435+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 29436+}
29437+
29438+/* returns found or not */
dece6358 29439+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 29440+{
29441+ struct hlist_head *head;
c06a8ce3 29442+ struct au_vdir_wh *pos;
1facf9fc 29443+ struct au_vdir_destr *str;
29444+
dece6358 29445+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
29446+ hlist_for_each_entry(pos, head, wh_hash) {
29447+ str = &pos->wh_str;
1facf9fc 29448+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
29449+ if (au_nhash_test_name(str, name, nlen))
29450+ return 1;
29451+ }
29452+ return 0;
29453+}
29454+
29455+/* returns found(true) or not */
29456+static int test_known(struct au_nhash *delist, char *name, int nlen)
29457+{
29458+ struct hlist_head *head;
c06a8ce3 29459+ struct au_vdir_dehstr *pos;
dece6358
AM
29460+ struct au_vdir_destr *str;
29461+
29462+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
29463+ hlist_for_each_entry(pos, head, hash) {
29464+ str = pos->str;
dece6358
AM
29465+ AuDbg("%.*s\n", str->len, str->name);
29466+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 29467+ return 1;
29468+ }
29469+ return 0;
29470+}
29471+
dece6358
AM
29472+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29473+ unsigned char d_type)
29474+{
29475+#ifdef CONFIG_AUFS_SHWH
29476+ wh->wh_ino = ino;
29477+ wh->wh_type = d_type;
29478+#endif
29479+}
29480+
29481+/* ---------------------------------------------------------------------- */
29482+
29483+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29484+ unsigned int d_type, aufs_bindex_t bindex,
29485+ unsigned char shwh)
1facf9fc 29486+{
29487+ int err;
29488+ struct au_vdir_destr *str;
29489+ struct au_vdir_wh *wh;
29490+
dece6358 29491+ AuDbg("%.*s\n", nlen, name);
1308ab2a 29492+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29493+
1facf9fc 29494+ err = -ENOMEM;
dece6358 29495+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 29496+ if (unlikely(!wh))
29497+ goto out;
29498+
29499+ err = 0;
29500+ wh->wh_bindex = bindex;
dece6358
AM
29501+ if (shwh)
29502+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 29503+ str = &wh->wh_str;
dece6358
AM
29504+ str->len = nlen;
29505+ memcpy(str->name, name, nlen);
29506+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 29507+ /* smp_mb(); */
29508+
4f0767ce 29509+out:
1facf9fc 29510+ return err;
29511+}
29512+
1facf9fc 29513+static int append_deblk(struct au_vdir *vdir)
29514+{
29515+ int err;
dece6358 29516+ unsigned long ul;
1facf9fc 29517+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29518+ union au_vdir_deblk_p p, deblk_end;
29519+ unsigned char **o;
29520+
29521+ err = -ENOMEM;
dece6358
AM
29522+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29523+ GFP_NOFS);
1facf9fc 29524+ if (unlikely(!o))
29525+ goto out;
29526+
29527+ vdir->vd_deblk = o;
29528+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29529+ if (p.deblk) {
29530+ ul = vdir->vd_nblk++;
29531+ vdir->vd_deblk[ul] = p.deblk;
29532+ vdir->vd_last.ul = ul;
29533+ vdir->vd_last.p.deblk = p.deblk;
29534+ deblk_end.deblk = p.deblk + deblk_sz;
29535+ err = set_deblk_end(&p, &deblk_end);
29536+ }
29537+
4f0767ce 29538+out:
1facf9fc 29539+ return err;
29540+}
29541+
dece6358
AM
29542+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
29543+ unsigned int d_type, struct au_nhash *delist)
29544+{
29545+ int err;
29546+ unsigned int sz;
29547+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29548+ union au_vdir_deblk_p p, *room, deblk_end;
29549+ struct au_vdir_dehstr *dehstr;
29550+
29551+ p.deblk = last_deblk(vdir);
29552+ deblk_end.deblk = p.deblk + deblk_sz;
29553+ room = &vdir->vd_last.p;
29554+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
29555+ || !is_deblk_end(room, &deblk_end));
29556+
29557+ sz = calc_size(nlen);
29558+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
29559+ err = append_deblk(vdir);
29560+ if (unlikely(err))
29561+ goto out;
29562+
29563+ p.deblk = last_deblk(vdir);
29564+ deblk_end.deblk = p.deblk + deblk_sz;
29565+ /* smp_mb(); */
29566+ AuDebugOn(room->deblk != p.deblk);
29567+ }
29568+
29569+ err = -ENOMEM;
4a4d8108 29570+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
29571+ if (unlikely(!dehstr))
29572+ goto out;
29573+
29574+ dehstr->str = &room->de->de_str;
29575+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
29576+ room->de->de_ino = ino;
29577+ room->de->de_type = d_type;
29578+ room->de->de_str.len = nlen;
29579+ memcpy(room->de->de_str.name, name, nlen);
29580+
29581+ err = 0;
29582+ room->deblk += sz;
29583+ if (unlikely(set_deblk_end(room, &deblk_end)))
29584+ err = append_deblk(vdir);
29585+ /* smp_mb(); */
29586+
4f0767ce 29587+out:
dece6358
AM
29588+ return err;
29589+}
29590+
29591+/* ---------------------------------------------------------------------- */
29592+
29593+void au_vdir_free(struct au_vdir *vdir)
29594+{
29595+ unsigned char **deblk;
29596+
29597+ deblk = vdir->vd_deblk;
29598+ while (vdir->vd_nblk--)
29599+ kfree(*deblk++);
29600+ kfree(vdir->vd_deblk);
29601+ au_cache_free_vdir(vdir);
29602+}
29603+
1308ab2a 29604+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 29605+{
29606+ struct au_vdir *vdir;
1308ab2a 29607+ struct super_block *sb;
1facf9fc 29608+ int err;
29609+
2000de60 29610+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29611+ SiMustAnyLock(sb);
29612+
1facf9fc 29613+ err = -ENOMEM;
29614+ vdir = au_cache_alloc_vdir();
29615+ if (unlikely(!vdir))
29616+ goto out;
29617+
29618+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
29619+ if (unlikely(!vdir->vd_deblk))
29620+ goto out_free;
29621+
29622+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 29623+ if (!vdir->vd_deblk_sz) {
79b8bda9 29624+ /* estimate the appropriate size for deblk */
1308ab2a 29625+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 29626+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 29627+ }
1facf9fc 29628+ vdir->vd_nblk = 0;
29629+ vdir->vd_version = 0;
29630+ vdir->vd_jiffy = 0;
29631+ err = append_deblk(vdir);
29632+ if (!err)
29633+ return vdir; /* success */
29634+
29635+ kfree(vdir->vd_deblk);
29636+
4f0767ce 29637+out_free:
1facf9fc 29638+ au_cache_free_vdir(vdir);
4f0767ce 29639+out:
1facf9fc 29640+ vdir = ERR_PTR(err);
29641+ return vdir;
29642+}
29643+
29644+static int reinit_vdir(struct au_vdir *vdir)
29645+{
29646+ int err;
29647+ union au_vdir_deblk_p p, deblk_end;
29648+
29649+ while (vdir->vd_nblk > 1) {
29650+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
29651+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
29652+ vdir->vd_nblk--;
29653+ }
29654+ p.deblk = vdir->vd_deblk[0];
29655+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
29656+ err = set_deblk_end(&p, &deblk_end);
29657+ /* keep vd_dblk_sz */
29658+ vdir->vd_last.ul = 0;
29659+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29660+ vdir->vd_version = 0;
29661+ vdir->vd_jiffy = 0;
29662+ /* smp_mb(); */
29663+ return err;
29664+}
29665+
29666+/* ---------------------------------------------------------------------- */
29667+
1facf9fc 29668+#define AuFillVdir_CALLED 1
29669+#define AuFillVdir_WHABLE (1 << 1)
dece6358 29670+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 29671+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
29672+#define au_fset_fillvdir(flags, name) \
29673+ do { (flags) |= AuFillVdir_##name; } while (0)
29674+#define au_fclr_fillvdir(flags, name) \
29675+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 29676+
dece6358
AM
29677+#ifndef CONFIG_AUFS_SHWH
29678+#undef AuFillVdir_SHWH
29679+#define AuFillVdir_SHWH 0
29680+#endif
29681+
1facf9fc 29682+struct fillvdir_arg {
392086de 29683+ struct dir_context ctx;
1facf9fc 29684+ struct file *file;
29685+ struct au_vdir *vdir;
dece6358
AM
29686+ struct au_nhash delist;
29687+ struct au_nhash whlist;
1facf9fc 29688+ aufs_bindex_t bindex;
29689+ unsigned int flags;
29690+ int err;
29691+};
29692+
392086de 29693+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 29694+ loff_t offset __maybe_unused, u64 h_ino,
29695+ unsigned int d_type)
29696+{
392086de 29697+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 29698+ char *name = (void *)__name;
29699+ struct super_block *sb;
1facf9fc 29700+ ino_t ino;
dece6358 29701+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 29702+
1facf9fc 29703+ arg->err = 0;
2000de60 29704+ sb = arg->file->f_path.dentry->d_sb;
1facf9fc 29705+ au_fset_fillvdir(arg->flags, CALLED);
29706+ /* smp_mb(); */
dece6358 29707+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 29708+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
29709+ if (test_known(&arg->delist, name, nlen)
29710+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
29711+ goto out; /* already exists or whiteouted */
1facf9fc 29712+
dece6358 29713+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
29714+ if (!arg->err) {
29715+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
29716+ d_type = DT_UNKNOWN;
dece6358
AM
29717+ arg->err = append_de(arg->vdir, name, nlen, ino,
29718+ d_type, &arg->delist);
4a4d8108 29719+ }
1facf9fc 29720+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
29721+ name += AUFS_WH_PFX_LEN;
dece6358
AM
29722+ nlen -= AUFS_WH_PFX_LEN;
29723+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
29724+ goto out; /* already whiteouted */
1facf9fc 29725+
dece6358
AM
29726+ if (shwh)
29727+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
29728+ &ino);
4a4d8108
AM
29729+ if (!arg->err) {
29730+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
29731+ d_type = DT_UNKNOWN;
1facf9fc 29732+ arg->err = au_nhash_append_wh
dece6358
AM
29733+ (&arg->whlist, name, nlen, ino, d_type,
29734+ arg->bindex, shwh);
4a4d8108 29735+ }
1facf9fc 29736+ }
29737+
4f0767ce 29738+out:
1facf9fc 29739+ if (!arg->err)
29740+ arg->vdir->vd_jiffy = jiffies;
29741+ /* smp_mb(); */
29742+ AuTraceErr(arg->err);
29743+ return arg->err;
29744+}
29745+
dece6358
AM
29746+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
29747+ struct au_nhash *whlist, struct au_nhash *delist)
29748+{
29749+#ifdef CONFIG_AUFS_SHWH
29750+ int err;
29751+ unsigned int nh, u;
29752+ struct hlist_head *head;
c06a8ce3
AM
29753+ struct au_vdir_wh *pos;
29754+ struct hlist_node *n;
dece6358
AM
29755+ char *p, *o;
29756+ struct au_vdir_destr *destr;
29757+
29758+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
29759+
29760+ err = -ENOMEM;
537831f9 29761+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
29762+ if (unlikely(!p))
29763+ goto out;
29764+
29765+ err = 0;
29766+ nh = whlist->nh_num;
29767+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
29768+ p += AUFS_WH_PFX_LEN;
29769+ for (u = 0; u < nh; u++) {
29770+ head = whlist->nh_head + u;
c06a8ce3
AM
29771+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
29772+ destr = &pos->wh_str;
dece6358
AM
29773+ memcpy(p, destr->name, destr->len);
29774+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 29775+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
29776+ if (unlikely(err))
29777+ break;
29778+ }
29779+ }
29780+
537831f9 29781+ free_page((unsigned long)o);
dece6358 29782+
4f0767ce 29783+out:
dece6358
AM
29784+ AuTraceErr(err);
29785+ return err;
29786+#else
29787+ return 0;
29788+#endif
29789+}
29790+
1facf9fc 29791+static int au_do_read_vdir(struct fillvdir_arg *arg)
29792+{
29793+ int err;
dece6358 29794+ unsigned int rdhash;
1facf9fc 29795+ loff_t offset;
dece6358
AM
29796+ aufs_bindex_t bend, bindex, bstart;
29797+ unsigned char shwh;
1facf9fc 29798+ struct file *hf, *file;
29799+ struct super_block *sb;
29800+
1facf9fc 29801+ file = arg->file;
2000de60 29802+ sb = file->f_path.dentry->d_sb;
dece6358
AM
29803+ SiMustAnyLock(sb);
29804+
29805+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 29806+ if (!rdhash)
29807+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
29808+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
29809+ if (unlikely(err))
1facf9fc 29810+ goto out;
dece6358
AM
29811+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
29812+ if (unlikely(err))
1facf9fc 29813+ goto out_delist;
29814+
29815+ err = 0;
29816+ arg->flags = 0;
dece6358
AM
29817+ shwh = 0;
29818+ if (au_opt_test(au_mntflags(sb), SHWH)) {
29819+ shwh = 1;
29820+ au_fset_fillvdir(arg->flags, SHWH);
29821+ }
29822+ bstart = au_fbstart(file);
4a4d8108 29823+ bend = au_fbend_dir(file);
dece6358 29824+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 29825+ hf = au_hf_dir(file, bindex);
1facf9fc 29826+ if (!hf)
29827+ continue;
29828+
29829+ offset = vfsub_llseek(hf, 0, SEEK_SET);
29830+ err = offset;
29831+ if (unlikely(offset))
29832+ break;
29833+
29834+ arg->bindex = bindex;
29835+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
29836+ if (shwh
29837+ || (bindex != bend
29838+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 29839+ au_fset_fillvdir(arg->flags, WHABLE);
29840+ do {
29841+ arg->err = 0;
29842+ au_fclr_fillvdir(arg->flags, CALLED);
29843+ /* smp_mb(); */
392086de 29844+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 29845+ if (err >= 0)
29846+ err = arg->err;
29847+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
29848+
29849+ /*
29850+ * dir_relax() may be good for concurrency, but aufs should not
29851+ * use it since it will cause a lockdep problem.
29852+ */
1facf9fc 29853+ }
dece6358
AM
29854+
29855+ if (!err && shwh)
29856+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
29857+
29858+ au_nhash_wh_free(&arg->whlist);
1facf9fc 29859+
4f0767ce 29860+out_delist:
dece6358 29861+ au_nhash_de_free(&arg->delist);
4f0767ce 29862+out:
1facf9fc 29863+ return err;
29864+}
29865+
29866+static int read_vdir(struct file *file, int may_read)
29867+{
29868+ int err;
29869+ unsigned long expire;
29870+ unsigned char do_read;
392086de
AM
29871+ struct fillvdir_arg arg = {
29872+ .ctx = {
2000de60 29873+ .actor = fillvdir
392086de
AM
29874+ }
29875+ };
1facf9fc 29876+ struct inode *inode;
29877+ struct au_vdir *vdir, *allocated;
29878+
29879+ err = 0;
c06a8ce3 29880+ inode = file_inode(file);
1facf9fc 29881+ IMustLock(inode);
dece6358
AM
29882+ SiMustAnyLock(inode->i_sb);
29883+
1facf9fc 29884+ allocated = NULL;
29885+ do_read = 0;
29886+ expire = au_sbi(inode->i_sb)->si_rdcache;
29887+ vdir = au_ivdir(inode);
29888+ if (!vdir) {
29889+ do_read = 1;
1308ab2a 29890+ vdir = alloc_vdir(file);
1facf9fc 29891+ err = PTR_ERR(vdir);
29892+ if (IS_ERR(vdir))
29893+ goto out;
29894+ err = 0;
29895+ allocated = vdir;
29896+ } else if (may_read
29897+ && (inode->i_version != vdir->vd_version
29898+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
29899+ do_read = 1;
29900+ err = reinit_vdir(vdir);
29901+ if (unlikely(err))
29902+ goto out;
29903+ }
29904+
29905+ if (!do_read)
29906+ return 0; /* success */
29907+
29908+ arg.file = file;
29909+ arg.vdir = vdir;
29910+ err = au_do_read_vdir(&arg);
29911+ if (!err) {
392086de 29912+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 29913+ vdir->vd_version = inode->i_version;
29914+ vdir->vd_last.ul = 0;
29915+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29916+ if (allocated)
29917+ au_set_ivdir(inode, allocated);
29918+ } else if (allocated)
29919+ au_vdir_free(allocated);
29920+
4f0767ce 29921+out:
1facf9fc 29922+ return err;
29923+}
29924+
29925+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
29926+{
29927+ int err, rerr;
29928+ unsigned long ul, n;
29929+ const unsigned int deblk_sz = src->vd_deblk_sz;
29930+
29931+ AuDebugOn(tgt->vd_nblk != 1);
29932+
29933+ err = -ENOMEM;
29934+ if (tgt->vd_nblk < src->vd_nblk) {
29935+ unsigned char **p;
29936+
dece6358
AM
29937+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
29938+ GFP_NOFS);
1facf9fc 29939+ if (unlikely(!p))
29940+ goto out;
29941+ tgt->vd_deblk = p;
29942+ }
29943+
1308ab2a 29944+ if (tgt->vd_deblk_sz != deblk_sz) {
29945+ unsigned char *p;
29946+
29947+ tgt->vd_deblk_sz = deblk_sz;
29948+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
29949+ if (unlikely(!p))
29950+ goto out;
29951+ tgt->vd_deblk[0] = p;
29952+ }
1facf9fc 29953+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 29954+ tgt->vd_version = src->vd_version;
29955+ tgt->vd_jiffy = src->vd_jiffy;
29956+
29957+ n = src->vd_nblk;
29958+ for (ul = 1; ul < n; ul++) {
dece6358
AM
29959+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
29960+ GFP_NOFS);
29961+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 29962+ goto out;
1308ab2a 29963+ tgt->vd_nblk++;
1facf9fc 29964+ }
1308ab2a 29965+ tgt->vd_nblk = n;
29966+ tgt->vd_last.ul = tgt->vd_last.ul;
29967+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
29968+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
29969+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 29970+ /* smp_mb(); */
29971+ return 0; /* success */
29972+
4f0767ce 29973+out:
1facf9fc 29974+ rerr = reinit_vdir(tgt);
29975+ BUG_ON(rerr);
29976+ return err;
29977+}
29978+
29979+int au_vdir_init(struct file *file)
29980+{
29981+ int err;
29982+ struct inode *inode;
29983+ struct au_vdir *vdir_cache, *allocated;
29984+
392086de 29985+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 29986+ err = read_vdir(file, !file->f_pos);
29987+ if (unlikely(err))
29988+ goto out;
29989+
29990+ allocated = NULL;
29991+ vdir_cache = au_fvdir_cache(file);
29992+ if (!vdir_cache) {
1308ab2a 29993+ vdir_cache = alloc_vdir(file);
1facf9fc 29994+ err = PTR_ERR(vdir_cache);
29995+ if (IS_ERR(vdir_cache))
29996+ goto out;
29997+ allocated = vdir_cache;
29998+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 29999+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30000+ err = reinit_vdir(vdir_cache);
30001+ if (unlikely(err))
30002+ goto out;
30003+ } else
30004+ return 0; /* success */
30005+
c06a8ce3 30006+ inode = file_inode(file);
1facf9fc 30007+ err = copy_vdir(vdir_cache, au_ivdir(inode));
30008+ if (!err) {
30009+ file->f_version = inode->i_version;
30010+ if (allocated)
30011+ au_set_fvdir_cache(file, allocated);
30012+ } else if (allocated)
30013+ au_vdir_free(allocated);
30014+
4f0767ce 30015+out:
1facf9fc 30016+ return err;
30017+}
30018+
30019+static loff_t calc_offset(struct au_vdir *vdir)
30020+{
30021+ loff_t offset;
30022+ union au_vdir_deblk_p p;
30023+
30024+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
30025+ offset = vdir->vd_last.p.deblk - p.deblk;
30026+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
30027+ return offset;
30028+}
30029+
30030+/* returns true or false */
392086de 30031+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 30032+{
30033+ int valid;
30034+ unsigned int deblk_sz;
30035+ unsigned long ul, n;
30036+ loff_t offset;
30037+ union au_vdir_deblk_p p, deblk_end;
30038+ struct au_vdir *vdir_cache;
30039+
30040+ valid = 1;
30041+ vdir_cache = au_fvdir_cache(file);
30042+ offset = calc_offset(vdir_cache);
30043+ AuDbg("offset %lld\n", offset);
392086de 30044+ if (ctx->pos == offset)
1facf9fc 30045+ goto out;
30046+
30047+ vdir_cache->vd_last.ul = 0;
30048+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 30049+ if (!ctx->pos)
1facf9fc 30050+ goto out;
30051+
30052+ valid = 0;
30053+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 30054+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 30055+ AuDbg("ul %lu\n", ul);
30056+ if (ul >= vdir_cache->vd_nblk)
30057+ goto out;
30058+
30059+ n = vdir_cache->vd_nblk;
30060+ for (; ul < n; ul++) {
30061+ p.deblk = vdir_cache->vd_deblk[ul];
30062+ deblk_end.deblk = p.deblk + deblk_sz;
30063+ offset = ul;
30064+ offset *= deblk_sz;
392086de 30065+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 30066+ unsigned int l;
30067+
30068+ l = calc_size(p.de->de_str.len);
30069+ offset += l;
30070+ p.deblk += l;
30071+ }
30072+ if (!is_deblk_end(&p, &deblk_end)) {
30073+ valid = 1;
30074+ vdir_cache->vd_last.ul = ul;
30075+ vdir_cache->vd_last.p = p;
30076+ break;
30077+ }
30078+ }
30079+
4f0767ce 30080+out:
1facf9fc 30081+ /* smp_mb(); */
30082+ AuTraceErr(!valid);
30083+ return valid;
30084+}
30085+
392086de 30086+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 30087+{
1facf9fc 30088+ unsigned int l, deblk_sz;
30089+ union au_vdir_deblk_p deblk_end;
30090+ struct au_vdir *vdir_cache;
30091+ struct au_vdir_de *de;
30092+
30093+ vdir_cache = au_fvdir_cache(file);
392086de 30094+ if (!seek_vdir(file, ctx))
1facf9fc 30095+ return 0;
30096+
30097+ deblk_sz = vdir_cache->vd_deblk_sz;
30098+ while (1) {
30099+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30100+ deblk_end.deblk += deblk_sz;
30101+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
30102+ de = vdir_cache->vd_last.p.de;
30103+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 30104+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 30105+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
30106+ if (unlikely(!dir_emit(ctx, de->de_str.name,
30107+ de->de_str.len, de->de_ino,
30108+ de->de_type))) {
1facf9fc 30109+ /* todo: ignore the error caused by udba? */
30110+ /* return err; */
30111+ return 0;
30112+ }
30113+
30114+ l = calc_size(de->de_str.len);
30115+ vdir_cache->vd_last.p.deblk += l;
392086de 30116+ ctx->pos += l;
1facf9fc 30117+ }
30118+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
30119+ vdir_cache->vd_last.ul++;
30120+ vdir_cache->vd_last.p.deblk
30121+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 30122+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 30123+ continue;
30124+ }
30125+ break;
30126+ }
30127+
30128+ /* smp_mb(); */
30129+ return 0;
30130+}
7f207e10
AM
30131diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30132--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
30133+++ linux/fs/aufs/vfsub.c 2016-02-28 11:26:32.573304539 +0100
30134@@ -0,0 +1,866 @@
1facf9fc 30135+/*
8cdd5066 30136+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 30137+ *
30138+ * This program, aufs is free software; you can redistribute it and/or modify
30139+ * it under the terms of the GNU General Public License as published by
30140+ * the Free Software Foundation; either version 2 of the License, or
30141+ * (at your option) any later version.
dece6358
AM
30142+ *
30143+ * This program is distributed in the hope that it will be useful,
30144+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30145+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30146+ * GNU General Public License for more details.
30147+ *
30148+ * You should have received a copy of the GNU General Public License
523b37e3 30149+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30150+ */
30151+
30152+/*
30153+ * sub-routines for VFS
30154+ */
30155+
dece6358 30156+#include <linux/namei.h>
8cdd5066 30157+#include <linux/nsproxy.h>
dece6358
AM
30158+#include <linux/security.h>
30159+#include <linux/splice.h>
8cdd5066 30160+#include "../fs/mount.h"
1facf9fc 30161+#include "aufs.h"
30162+
8cdd5066
JR
30163+#ifdef CONFIG_AUFS_BR_FUSE
30164+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
30165+{
30166+ struct nsproxy *ns;
30167+
30168+ if (!au_test_fuse(h_sb) || !au_userns)
30169+ return 0;
30170+
30171+ ns = current->nsproxy;
30172+ /* no {get,put}_nsproxy(ns) */
30173+ return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES;
30174+}
30175+#endif
30176+
30177+/* ---------------------------------------------------------------------- */
30178+
1facf9fc 30179+int vfsub_update_h_iattr(struct path *h_path, int *did)
30180+{
30181+ int err;
30182+ struct kstat st;
30183+ struct super_block *h_sb;
30184+
30185+ /* for remote fs, leave work for its getattr or d_revalidate */
30186+ /* for bad i_attr fs, handle them in aufs_getattr() */
30187+ /* still some fs may acquire i_mutex. we need to skip them */
30188+ err = 0;
30189+ if (!did)
30190+ did = &err;
30191+ h_sb = h_path->dentry->d_sb;
30192+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30193+ if (*did)
c06a8ce3 30194+ err = vfs_getattr(h_path, &st);
1facf9fc 30195+
30196+ return err;
30197+}
30198+
30199+/* ---------------------------------------------------------------------- */
30200+
4a4d8108 30201+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 30202+{
30203+ struct file *file;
30204+
b4510431 30205+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 30206+ current_cred());
2cbb1c4b
JR
30207+ if (!IS_ERR_OR_NULL(file)
30208+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
5527c038 30209+ i_readcount_inc(d_inode(path->dentry));
4a4d8108 30210+
1308ab2a 30211+ return file;
30212+}
30213+
1facf9fc 30214+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30215+{
30216+ struct file *file;
30217+
2cbb1c4b 30218+ lockdep_off();
7f207e10 30219+ file = filp_open(path,
2cbb1c4b 30220+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 30221+ mode);
2cbb1c4b 30222+ lockdep_on();
1facf9fc 30223+ if (IS_ERR(file))
30224+ goto out;
30225+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30226+
4f0767ce 30227+out:
1facf9fc 30228+ return file;
30229+}
30230+
b912730e
AM
30231+/*
30232+ * Ideally this function should call VFS:do_last() in order to keep all its
30233+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30234+ * structure such as nameidata. This is a second (or third) best approach.
30235+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30236+ */
30237+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30238+ struct vfsub_aopen_args *args, struct au_branch *br)
30239+{
30240+ int err;
30241+ struct file *file = args->file;
30242+ /* copied from linux/fs/namei.c:atomic_open() */
30243+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30244+
30245+ IMustLock(dir);
30246+ AuDebugOn(!dir->i_op->atomic_open);
30247+
30248+ err = au_br_test_oflag(args->open_flag, br);
30249+ if (unlikely(err))
30250+ goto out;
30251+
30252+ args->file->f_path.dentry = DENTRY_NOT_SET;
30253+ args->file->f_path.mnt = au_br_mnt(br);
30254+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30255+ args->create_mode, args->opened);
30256+ if (err >= 0) {
30257+ /* some filesystems don't set FILE_CREATED while succeeded? */
30258+ if (*args->opened & FILE_CREATED)
30259+ fsnotify_create(dir, dentry);
30260+ } else
30261+ goto out;
30262+
30263+
30264+ if (!err) {
30265+ /* todo: call VFS:may_open() here */
30266+ err = open_check_o_direct(file);
30267+ /* todo: ima_file_check() too? */
30268+ if (!err && (args->open_flag & __FMODE_EXEC))
30269+ err = deny_write_access(file);
30270+ if (unlikely(err))
30271+ /* note that the file is created and still opened */
30272+ goto out;
30273+ }
30274+
30275+ atomic_inc(&br->br_count);
30276+ fsnotify_open(file);
30277+
30278+out:
30279+ return err;
30280+}
30281+
1facf9fc 30282+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30283+{
30284+ int err;
30285+
1facf9fc 30286+ err = kern_path(name, flags, path);
5527c038 30287+ if (!err && d_is_positive(path->dentry))
1facf9fc 30288+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30289+ return err;
30290+}
30291+
30292+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30293+ int len)
30294+{
30295+ struct path path = {
30296+ .mnt = NULL
30297+ };
30298+
1308ab2a 30299+ /* VFS checks it too, but by WARN_ON_ONCE() */
5527c038 30300+ IMustLock(d_inode(parent));
1facf9fc 30301+
30302+ path.dentry = lookup_one_len(name, parent, len);
30303+ if (IS_ERR(path.dentry))
30304+ goto out;
5527c038 30305+ if (d_is_positive(path.dentry))
1facf9fc 30306+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30307+
4f0767ce 30308+out:
4a4d8108 30309+ AuTraceErrPtr(path.dentry);
1facf9fc 30310+ return path.dentry;
30311+}
30312+
b4510431 30313+void vfsub_call_lkup_one(void *args)
2cbb1c4b 30314+{
b4510431
AM
30315+ struct vfsub_lkup_one_args *a = args;
30316+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
30317+}
30318+
1facf9fc 30319+/* ---------------------------------------------------------------------- */
30320+
30321+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30322+ struct dentry *d2, struct au_hinode *hdir2)
30323+{
30324+ struct dentry *d;
30325+
2cbb1c4b 30326+ lockdep_off();
1facf9fc 30327+ d = lock_rename(d1, d2);
2cbb1c4b 30328+ lockdep_on();
4a4d8108 30329+ au_hn_suspend(hdir1);
1facf9fc 30330+ if (hdir1 != hdir2)
4a4d8108 30331+ au_hn_suspend(hdir2);
1facf9fc 30332+
30333+ return d;
30334+}
30335+
30336+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30337+ struct dentry *d2, struct au_hinode *hdir2)
30338+{
4a4d8108 30339+ au_hn_resume(hdir1);
1facf9fc 30340+ if (hdir1 != hdir2)
4a4d8108 30341+ au_hn_resume(hdir2);
2cbb1c4b 30342+ lockdep_off();
1facf9fc 30343+ unlock_rename(d1, d2);
2cbb1c4b 30344+ lockdep_on();
1facf9fc 30345+}
30346+
30347+/* ---------------------------------------------------------------------- */
30348+
b4510431 30349+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 30350+{
30351+ int err;
30352+ struct dentry *d;
30353+
30354+ IMustLock(dir);
30355+
30356+ d = path->dentry;
30357+ path->dentry = d->d_parent;
b752ccd1 30358+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 30359+ path->dentry = d;
30360+ if (unlikely(err))
30361+ goto out;
30362+
c1595e42 30363+ lockdep_off();
b4510431 30364+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 30365+ lockdep_on();
1facf9fc 30366+ if (!err) {
30367+ struct path tmp = *path;
30368+ int did;
30369+
30370+ vfsub_update_h_iattr(&tmp, &did);
30371+ if (did) {
30372+ tmp.dentry = path->dentry->d_parent;
30373+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30374+ }
30375+ /*ignore*/
30376+ }
30377+
4f0767ce 30378+out:
1facf9fc 30379+ return err;
30380+}
30381+
30382+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30383+{
30384+ int err;
30385+ struct dentry *d;
30386+
30387+ IMustLock(dir);
30388+
30389+ d = path->dentry;
30390+ path->dentry = d->d_parent;
b752ccd1 30391+ err = security_path_symlink(path, d, symname);
1facf9fc 30392+ path->dentry = d;
30393+ if (unlikely(err))
30394+ goto out;
30395+
c1595e42 30396+ lockdep_off();
1facf9fc 30397+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 30398+ lockdep_on();
1facf9fc 30399+ if (!err) {
30400+ struct path tmp = *path;
30401+ int did;
30402+
30403+ vfsub_update_h_iattr(&tmp, &did);
30404+ if (did) {
30405+ tmp.dentry = path->dentry->d_parent;
30406+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30407+ }
30408+ /*ignore*/
30409+ }
30410+
4f0767ce 30411+out:
1facf9fc 30412+ return err;
30413+}
30414+
30415+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30416+{
30417+ int err;
30418+ struct dentry *d;
30419+
30420+ IMustLock(dir);
30421+
30422+ d = path->dentry;
30423+ path->dentry = d->d_parent;
027c5e7a 30424+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 30425+ path->dentry = d;
30426+ if (unlikely(err))
30427+ goto out;
30428+
c1595e42 30429+ lockdep_off();
1facf9fc 30430+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 30431+ lockdep_on();
1facf9fc 30432+ if (!err) {
30433+ struct path tmp = *path;
30434+ int did;
30435+
30436+ vfsub_update_h_iattr(&tmp, &did);
30437+ if (did) {
30438+ tmp.dentry = path->dentry->d_parent;
30439+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30440+ }
30441+ /*ignore*/
30442+ }
30443+
4f0767ce 30444+out:
1facf9fc 30445+ return err;
30446+}
30447+
30448+static int au_test_nlink(struct inode *inode)
30449+{
30450+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30451+
30452+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30453+ || inode->i_nlink < link_max)
30454+ return 0;
30455+ return -EMLINK;
30456+}
30457+
523b37e3
AM
30458+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30459+ struct inode **delegated_inode)
1facf9fc 30460+{
30461+ int err;
30462+ struct dentry *d;
30463+
30464+ IMustLock(dir);
30465+
5527c038 30466+ err = au_test_nlink(d_inode(src_dentry));
1facf9fc 30467+ if (unlikely(err))
30468+ return err;
30469+
b4510431 30470+ /* we don't call may_linkat() */
1facf9fc 30471+ d = path->dentry;
30472+ path->dentry = d->d_parent;
b752ccd1 30473+ err = security_path_link(src_dentry, path, d);
1facf9fc 30474+ path->dentry = d;
30475+ if (unlikely(err))
30476+ goto out;
30477+
2cbb1c4b 30478+ lockdep_off();
523b37e3 30479+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 30480+ lockdep_on();
1facf9fc 30481+ if (!err) {
30482+ struct path tmp = *path;
30483+ int did;
30484+
30485+ /* fuse has different memory inode for the same inumber */
30486+ vfsub_update_h_iattr(&tmp, &did);
30487+ if (did) {
30488+ tmp.dentry = path->dentry->d_parent;
30489+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30490+ tmp.dentry = src_dentry;
30491+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30492+ }
30493+ /*ignore*/
30494+ }
30495+
4f0767ce 30496+out:
1facf9fc 30497+ return err;
30498+}
30499+
30500+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3
AM
30501+ struct inode *dir, struct path *path,
30502+ struct inode **delegated_inode)
1facf9fc 30503+{
30504+ int err;
30505+ struct path tmp = {
30506+ .mnt = path->mnt
30507+ };
30508+ struct dentry *d;
30509+
30510+ IMustLock(dir);
30511+ IMustLock(src_dir);
30512+
30513+ d = path->dentry;
30514+ path->dentry = d->d_parent;
30515+ tmp.dentry = src_dentry->d_parent;
38d290e6 30516+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 30517+ path->dentry = d;
30518+ if (unlikely(err))
30519+ goto out;
30520+
2cbb1c4b 30521+ lockdep_off();
523b37e3 30522+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
38d290e6 30523+ delegated_inode, /*flags*/0);
2cbb1c4b 30524+ lockdep_on();
1facf9fc 30525+ if (!err) {
30526+ int did;
30527+
30528+ tmp.dentry = d->d_parent;
30529+ vfsub_update_h_iattr(&tmp, &did);
30530+ if (did) {
30531+ tmp.dentry = src_dentry;
30532+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30533+ tmp.dentry = src_dentry->d_parent;
30534+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30535+ }
30536+ /*ignore*/
30537+ }
30538+
4f0767ce 30539+out:
1facf9fc 30540+ return err;
30541+}
30542+
30543+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
30544+{
30545+ int err;
30546+ struct dentry *d;
30547+
30548+ IMustLock(dir);
30549+
30550+ d = path->dentry;
30551+ path->dentry = d->d_parent;
b752ccd1 30552+ err = security_path_mkdir(path, d, mode);
1facf9fc 30553+ path->dentry = d;
30554+ if (unlikely(err))
30555+ goto out;
30556+
c1595e42 30557+ lockdep_off();
1facf9fc 30558+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 30559+ lockdep_on();
1facf9fc 30560+ if (!err) {
30561+ struct path tmp = *path;
30562+ int did;
30563+
30564+ vfsub_update_h_iattr(&tmp, &did);
30565+ if (did) {
30566+ tmp.dentry = path->dentry->d_parent;
30567+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30568+ }
30569+ /*ignore*/
30570+ }
30571+
4f0767ce 30572+out:
1facf9fc 30573+ return err;
30574+}
30575+
30576+int vfsub_rmdir(struct inode *dir, struct path *path)
30577+{
30578+ int err;
30579+ struct dentry *d;
30580+
30581+ IMustLock(dir);
30582+
30583+ d = path->dentry;
30584+ path->dentry = d->d_parent;
b752ccd1 30585+ err = security_path_rmdir(path, d);
1facf9fc 30586+ path->dentry = d;
30587+ if (unlikely(err))
30588+ goto out;
30589+
2cbb1c4b 30590+ lockdep_off();
1facf9fc 30591+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 30592+ lockdep_on();
1facf9fc 30593+ if (!err) {
30594+ struct path tmp = {
30595+ .dentry = path->dentry->d_parent,
30596+ .mnt = path->mnt
30597+ };
30598+
30599+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30600+ }
30601+
4f0767ce 30602+out:
1facf9fc 30603+ return err;
30604+}
30605+
30606+/* ---------------------------------------------------------------------- */
30607+
9dbd164d 30608+/* todo: support mmap_sem? */
1facf9fc 30609+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
30610+ loff_t *ppos)
30611+{
30612+ ssize_t err;
30613+
2cbb1c4b 30614+ lockdep_off();
1facf9fc 30615+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 30616+ lockdep_on();
1facf9fc 30617+ if (err >= 0)
30618+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30619+ return err;
30620+}
30621+
30622+/* todo: kernel_read()? */
30623+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
30624+ loff_t *ppos)
30625+{
30626+ ssize_t err;
30627+ mm_segment_t oldfs;
b752ccd1
AM
30628+ union {
30629+ void *k;
30630+ char __user *u;
30631+ } buf;
1facf9fc 30632+
b752ccd1 30633+ buf.k = kbuf;
1facf9fc 30634+ oldfs = get_fs();
30635+ set_fs(KERNEL_DS);
b752ccd1 30636+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 30637+ set_fs(oldfs);
30638+ return err;
30639+}
30640+
30641+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
30642+ loff_t *ppos)
30643+{
30644+ ssize_t err;
30645+
2cbb1c4b 30646+ lockdep_off();
1facf9fc 30647+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 30648+ lockdep_on();
1facf9fc 30649+ if (err >= 0)
30650+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30651+ return err;
30652+}
30653+
30654+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
30655+{
30656+ ssize_t err;
30657+ mm_segment_t oldfs;
b752ccd1
AM
30658+ union {
30659+ void *k;
30660+ const char __user *u;
30661+ } buf;
1facf9fc 30662+
b752ccd1 30663+ buf.k = kbuf;
1facf9fc 30664+ oldfs = get_fs();
30665+ set_fs(KERNEL_DS);
b752ccd1 30666+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 30667+ set_fs(oldfs);
30668+ return err;
30669+}
30670+
4a4d8108
AM
30671+int vfsub_flush(struct file *file, fl_owner_t id)
30672+{
30673+ int err;
30674+
30675+ err = 0;
523b37e3 30676+ if (file->f_op->flush) {
2000de60 30677+ if (!au_test_nfs(file->f_path.dentry->d_sb))
2cbb1c4b
JR
30678+ err = file->f_op->flush(file, id);
30679+ else {
30680+ lockdep_off();
30681+ err = file->f_op->flush(file, id);
30682+ lockdep_on();
30683+ }
4a4d8108
AM
30684+ if (!err)
30685+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
30686+ /*ignore*/
30687+ }
30688+ return err;
30689+}
30690+
392086de 30691+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 30692+{
30693+ int err;
30694+
523b37e3 30695+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 30696+
2cbb1c4b 30697+ lockdep_off();
392086de 30698+ err = iterate_dir(file, ctx);
2cbb1c4b 30699+ lockdep_on();
1facf9fc 30700+ if (err >= 0)
30701+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30702+ return err;
30703+}
30704+
30705+long vfsub_splice_to(struct file *in, loff_t *ppos,
30706+ struct pipe_inode_info *pipe, size_t len,
30707+ unsigned int flags)
30708+{
30709+ long err;
30710+
2cbb1c4b 30711+ lockdep_off();
0fc653ad 30712+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 30713+ lockdep_on();
4a4d8108 30714+ file_accessed(in);
1facf9fc 30715+ if (err >= 0)
30716+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
30717+ return err;
30718+}
30719+
30720+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
30721+ loff_t *ppos, size_t len, unsigned int flags)
30722+{
30723+ long err;
30724+
2cbb1c4b 30725+ lockdep_off();
0fc653ad 30726+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 30727+ lockdep_on();
1facf9fc 30728+ if (err >= 0)
30729+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
30730+ return err;
30731+}
30732+
53392da6
AM
30733+int vfsub_fsync(struct file *file, struct path *path, int datasync)
30734+{
30735+ int err;
30736+
30737+ /* file can be NULL */
30738+ lockdep_off();
30739+ err = vfs_fsync(file, datasync);
30740+ lockdep_on();
30741+ if (!err) {
30742+ if (!path) {
30743+ AuDebugOn(!file);
30744+ path = &file->f_path;
30745+ }
30746+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30747+ }
30748+ return err;
30749+}
30750+
1facf9fc 30751+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
30752+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
30753+ struct file *h_file)
30754+{
30755+ int err;
30756+ struct inode *h_inode;
c06a8ce3 30757+ struct super_block *h_sb;
1facf9fc 30758+
1facf9fc 30759+ if (!h_file) {
c06a8ce3
AM
30760+ err = vfsub_truncate(h_path, length);
30761+ goto out;
1facf9fc 30762+ }
30763+
5527c038 30764+ h_inode = d_inode(h_path->dentry);
c06a8ce3
AM
30765+ h_sb = h_inode->i_sb;
30766+ lockdep_off();
30767+ sb_start_write(h_sb);
30768+ lockdep_on();
1facf9fc 30769+ err = locks_verify_truncate(h_inode, h_file, length);
30770+ if (!err)
953406b4 30771+ err = security_path_truncate(h_path);
2cbb1c4b
JR
30772+ if (!err) {
30773+ lockdep_off();
1facf9fc 30774+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
30775+ lockdep_on();
30776+ }
c06a8ce3
AM
30777+ lockdep_off();
30778+ sb_end_write(h_sb);
30779+ lockdep_on();
1facf9fc 30780+
4f0767ce 30781+out:
1facf9fc 30782+ return err;
30783+}
30784+
30785+/* ---------------------------------------------------------------------- */
30786+
30787+struct au_vfsub_mkdir_args {
30788+ int *errp;
30789+ struct inode *dir;
30790+ struct path *path;
30791+ int mode;
30792+};
30793+
30794+static void au_call_vfsub_mkdir(void *args)
30795+{
30796+ struct au_vfsub_mkdir_args *a = args;
30797+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
30798+}
30799+
30800+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
30801+{
30802+ int err, do_sio, wkq_err;
30803+
30804+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30805+ if (!do_sio) {
30806+ lockdep_off();
1facf9fc 30807+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
30808+ lockdep_on();
30809+ } else {
1facf9fc 30810+ struct au_vfsub_mkdir_args args = {
30811+ .errp = &err,
30812+ .dir = dir,
30813+ .path = path,
30814+ .mode = mode
30815+ };
30816+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
30817+ if (unlikely(wkq_err))
30818+ err = wkq_err;
30819+ }
30820+
30821+ return err;
30822+}
30823+
30824+struct au_vfsub_rmdir_args {
30825+ int *errp;
30826+ struct inode *dir;
30827+ struct path *path;
30828+};
30829+
30830+static void au_call_vfsub_rmdir(void *args)
30831+{
30832+ struct au_vfsub_rmdir_args *a = args;
30833+ *a->errp = vfsub_rmdir(a->dir, a->path);
30834+}
30835+
30836+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
30837+{
30838+ int err, do_sio, wkq_err;
30839+
30840+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30841+ if (!do_sio) {
30842+ lockdep_off();
1facf9fc 30843+ err = vfsub_rmdir(dir, path);
c1595e42
JR
30844+ lockdep_on();
30845+ } else {
1facf9fc 30846+ struct au_vfsub_rmdir_args args = {
30847+ .errp = &err,
30848+ .dir = dir,
30849+ .path = path
30850+ };
30851+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
30852+ if (unlikely(wkq_err))
30853+ err = wkq_err;
30854+ }
30855+
30856+ return err;
30857+}
30858+
30859+/* ---------------------------------------------------------------------- */
30860+
30861+struct notify_change_args {
30862+ int *errp;
30863+ struct path *path;
30864+ struct iattr *ia;
523b37e3 30865+ struct inode **delegated_inode;
1facf9fc 30866+};
30867+
30868+static void call_notify_change(void *args)
30869+{
30870+ struct notify_change_args *a = args;
30871+ struct inode *h_inode;
30872+
5527c038 30873+ h_inode = d_inode(a->path->dentry);
1facf9fc 30874+ IMustLock(h_inode);
30875+
30876+ *a->errp = -EPERM;
30877+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 30878+ lockdep_off();
523b37e3
AM
30879+ *a->errp = notify_change(a->path->dentry, a->ia,
30880+ a->delegated_inode);
c1595e42 30881+ lockdep_on();
1facf9fc 30882+ if (!*a->errp)
30883+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
30884+ }
30885+ AuTraceErr(*a->errp);
30886+}
30887+
523b37e3
AM
30888+int vfsub_notify_change(struct path *path, struct iattr *ia,
30889+ struct inode **delegated_inode)
1facf9fc 30890+{
30891+ int err;
30892+ struct notify_change_args args = {
523b37e3
AM
30893+ .errp = &err,
30894+ .path = path,
30895+ .ia = ia,
30896+ .delegated_inode = delegated_inode
1facf9fc 30897+ };
30898+
30899+ call_notify_change(&args);
30900+
30901+ return err;
30902+}
30903+
523b37e3
AM
30904+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
30905+ struct inode **delegated_inode)
1facf9fc 30906+{
30907+ int err, wkq_err;
30908+ struct notify_change_args args = {
523b37e3
AM
30909+ .errp = &err,
30910+ .path = path,
30911+ .ia = ia,
30912+ .delegated_inode = delegated_inode
1facf9fc 30913+ };
30914+
30915+ wkq_err = au_wkq_wait(call_notify_change, &args);
30916+ if (unlikely(wkq_err))
30917+ err = wkq_err;
30918+
30919+ return err;
30920+}
30921+
30922+/* ---------------------------------------------------------------------- */
30923+
30924+struct unlink_args {
30925+ int *errp;
30926+ struct inode *dir;
30927+ struct path *path;
523b37e3 30928+ struct inode **delegated_inode;
1facf9fc 30929+};
30930+
30931+static void call_unlink(void *args)
30932+{
30933+ struct unlink_args *a = args;
30934+ struct dentry *d = a->path->dentry;
30935+ struct inode *h_inode;
30936+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 30937+ && au_dcount(d) == 1);
1facf9fc 30938+
30939+ IMustLock(a->dir);
30940+
30941+ a->path->dentry = d->d_parent;
30942+ *a->errp = security_path_unlink(a->path, d);
30943+ a->path->dentry = d;
30944+ if (unlikely(*a->errp))
30945+ return;
30946+
30947+ if (!stop_sillyrename)
30948+ dget(d);
5527c038
JR
30949+ h_inode = NULL;
30950+ if (d_is_positive(d)) {
30951+ h_inode = d_inode(d);
027c5e7a 30952+ ihold(h_inode);
5527c038 30953+ }
1facf9fc 30954+
2cbb1c4b 30955+ lockdep_off();
523b37e3 30956+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 30957+ lockdep_on();
1facf9fc 30958+ if (!*a->errp) {
30959+ struct path tmp = {
30960+ .dentry = d->d_parent,
30961+ .mnt = a->path->mnt
30962+ };
30963+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30964+ }
30965+
30966+ if (!stop_sillyrename)
30967+ dput(d);
30968+ if (h_inode)
30969+ iput(h_inode);
30970+
30971+ AuTraceErr(*a->errp);
30972+}
30973+
30974+/*
30975+ * @dir: must be locked.
30976+ * @dentry: target dentry.
30977+ */
523b37e3
AM
30978+int vfsub_unlink(struct inode *dir, struct path *path,
30979+ struct inode **delegated_inode, int force)
1facf9fc 30980+{
30981+ int err;
30982+ struct unlink_args args = {
523b37e3
AM
30983+ .errp = &err,
30984+ .dir = dir,
30985+ .path = path,
30986+ .delegated_inode = delegated_inode
1facf9fc 30987+ };
30988+
30989+ if (!force)
30990+ call_unlink(&args);
30991+ else {
30992+ int wkq_err;
30993+
30994+ wkq_err = au_wkq_wait(call_unlink, &args);
30995+ if (unlikely(wkq_err))
30996+ err = wkq_err;
30997+ }
30998+
30999+ return err;
31000+}
7f207e10
AM
31001diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
31002--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066
JR
31003+++ linux/fs/aufs/vfsub.h 2016-02-28 11:26:32.576637942 +0100
31004@@ -0,0 +1,308 @@
1facf9fc 31005+/*
8cdd5066 31006+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31007+ *
31008+ * This program, aufs is free software; you can redistribute it and/or modify
31009+ * it under the terms of the GNU General Public License as published by
31010+ * the Free Software Foundation; either version 2 of the License, or
31011+ * (at your option) any later version.
dece6358
AM
31012+ *
31013+ * This program is distributed in the hope that it will be useful,
31014+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31015+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31016+ * GNU General Public License for more details.
31017+ *
31018+ * You should have received a copy of the GNU General Public License
523b37e3 31019+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31020+ */
31021+
31022+/*
31023+ * sub-routines for VFS
31024+ */
31025+
31026+#ifndef __AUFS_VFSUB_H__
31027+#define __AUFS_VFSUB_H__
31028+
31029+#ifdef __KERNEL__
31030+
31031+#include <linux/fs.h>
b4510431 31032+#include <linux/mount.h>
8cdd5066 31033+#include <linux/posix_acl.h>
c1595e42 31034+#include <linux/xattr.h>
7f207e10 31035+#include "debug.h"
1facf9fc 31036+
7f207e10 31037+/* copied from linux/fs/internal.h */
2cbb1c4b 31038+/* todo: BAD approach!! */
c06a8ce3 31039+extern void __mnt_drop_write(struct vfsmount *);
b912730e 31040+extern int open_check_o_direct(struct file *f);
7f207e10
AM
31041+
31042+/* ---------------------------------------------------------------------- */
1facf9fc 31043+
31044+/* lock subclass for lower inode */
31045+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
31046+/* reduce? gave up. */
31047+enum {
c1595e42 31048+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 31049+ AuLsc_I_PARENT, /* lower inode, parent first */
31050+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 31051+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 31052+ AuLsc_I_CHILD,
31053+ AuLsc_I_CHILD2,
31054+ AuLsc_I_End
31055+};
31056+
31057+/* to debug easier, do not make them inlined functions */
31058+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
31059+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
31060+
31061+/* ---------------------------------------------------------------------- */
31062+
7f207e10
AM
31063+static inline void vfsub_drop_nlink(struct inode *inode)
31064+{
31065+ AuDebugOn(!inode->i_nlink);
31066+ drop_nlink(inode);
31067+}
31068+
027c5e7a
AM
31069+static inline void vfsub_dead_dir(struct inode *inode)
31070+{
31071+ AuDebugOn(!S_ISDIR(inode->i_mode));
31072+ inode->i_flags |= S_DEAD;
31073+ clear_nlink(inode);
31074+}
31075+
392086de
AM
31076+static inline int vfsub_native_ro(struct inode *inode)
31077+{
31078+ return (inode->i_sb->s_flags & MS_RDONLY)
31079+ || IS_RDONLY(inode)
31080+ /* || IS_APPEND(inode) */
31081+ || IS_IMMUTABLE(inode);
31082+}
31083+
8cdd5066
JR
31084+#ifdef CONFIG_AUFS_BR_FUSE
31085+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
31086+#else
31087+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
31088+#endif
31089+
7f207e10
AM
31090+/* ---------------------------------------------------------------------- */
31091+
31092+int vfsub_update_h_iattr(struct path *h_path, int *did);
31093+struct file *vfsub_dentry_open(struct path *path, int flags);
31094+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
b912730e
AM
31095+struct vfsub_aopen_args {
31096+ struct file *file;
31097+ unsigned int open_flag;
31098+ umode_t create_mode;
31099+ int *opened;
31100+};
31101+struct au_branch;
31102+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
31103+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 31104+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 31105+
1facf9fc 31106+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
31107+ int len);
b4510431
AM
31108+
31109+struct vfsub_lkup_one_args {
31110+ struct dentry **errp;
31111+ struct qstr *name;
31112+ struct dentry *parent;
31113+};
31114+
31115+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
31116+ struct dentry *parent)
31117+{
31118+ return vfsub_lookup_one_len(name->name, parent, name->len);
31119+}
31120+
31121+void vfsub_call_lkup_one(void *args);
31122+
31123+/* ---------------------------------------------------------------------- */
31124+
31125+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
31126+{
31127+ int err;
076b876e 31128+
b4510431
AM
31129+ lockdep_off();
31130+ err = mnt_want_write(mnt);
31131+ lockdep_on();
31132+ return err;
31133+}
31134+
31135+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
31136+{
31137+ lockdep_off();
31138+ mnt_drop_write(mnt);
31139+ lockdep_on();
31140+}
1facf9fc 31141+
7e9cd9fe 31142+#if 0 /* reserved */
c06a8ce3
AM
31143+static inline void vfsub_mnt_drop_write_file(struct file *file)
31144+{
31145+ lockdep_off();
31146+ mnt_drop_write_file(file);
31147+ lockdep_on();
31148+}
7e9cd9fe 31149+#endif
c06a8ce3 31150+
1facf9fc 31151+/* ---------------------------------------------------------------------- */
31152+
31153+struct au_hinode;
31154+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31155+ struct dentry *d2, struct au_hinode *hdir2);
31156+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31157+ struct dentry *d2, struct au_hinode *hdir2);
31158+
537831f9
AM
31159+int vfsub_create(struct inode *dir, struct path *path, int mode,
31160+ bool want_excl);
1facf9fc 31161+int vfsub_symlink(struct inode *dir, struct path *path,
31162+ const char *symname);
31163+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31164+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 31165+ struct path *path, struct inode **delegated_inode);
1facf9fc 31166+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3
AM
31167+ struct inode *hdir, struct path *path,
31168+ struct inode **delegated_inode);
1facf9fc 31169+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31170+int vfsub_rmdir(struct inode *dir, struct path *path);
31171+
31172+/* ---------------------------------------------------------------------- */
31173+
31174+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31175+ loff_t *ppos);
31176+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31177+ loff_t *ppos);
31178+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31179+ loff_t *ppos);
31180+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31181+ loff_t *ppos);
4a4d8108 31182+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
31183+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31184+
c06a8ce3
AM
31185+static inline loff_t vfsub_f_size_read(struct file *file)
31186+{
31187+ return i_size_read(file_inode(file));
31188+}
31189+
4a4d8108
AM
31190+static inline unsigned int vfsub_file_flags(struct file *file)
31191+{
31192+ unsigned int flags;
31193+
31194+ spin_lock(&file->f_lock);
31195+ flags = file->f_flags;
31196+ spin_unlock(&file->f_lock);
31197+
31198+ return flags;
31199+}
1308ab2a 31200+
7e9cd9fe 31201+#if 0 /* reserved */
1facf9fc 31202+static inline void vfsub_file_accessed(struct file *h_file)
31203+{
31204+ file_accessed(h_file);
31205+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31206+}
7e9cd9fe 31207+#endif
1facf9fc 31208+
79b8bda9 31209+#if 0 /* reserved */
1facf9fc 31210+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31211+ struct dentry *h_dentry)
31212+{
31213+ struct path h_path = {
31214+ .dentry = h_dentry,
31215+ .mnt = h_mnt
31216+ };
92d182d2 31217+ touch_atime(&h_path);
1facf9fc 31218+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31219+}
79b8bda9 31220+#endif
1facf9fc 31221+
0c3ec466
AM
31222+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31223+ int flags)
31224+{
7e9cd9fe 31225+ return generic_update_time(h_inode, ts, flags);
0c3ec466
AM
31226+ /* no vfsub_update_h_iattr() since we don't have struct path */
31227+}
31228+
8cdd5066
JR
31229+#ifdef CONFIG_FS_POSIX_ACL
31230+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
31231+{
31232+ int err;
31233+
31234+ err = posix_acl_chmod(h_inode, h_mode);
31235+ if (err == -EOPNOTSUPP)
31236+ err = 0;
31237+ return err;
31238+}
31239+#else
31240+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
31241+#endif
31242+
4a4d8108
AM
31243+long vfsub_splice_to(struct file *in, loff_t *ppos,
31244+ struct pipe_inode_info *pipe, size_t len,
31245+ unsigned int flags);
31246+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31247+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
31248+
31249+static inline long vfsub_truncate(struct path *path, loff_t length)
31250+{
31251+ long err;
076b876e 31252+
c06a8ce3
AM
31253+ lockdep_off();
31254+ err = vfs_truncate(path, length);
31255+ lockdep_on();
31256+ return err;
31257+}
31258+
4a4d8108
AM
31259+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31260+ struct file *h_file);
53392da6 31261+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 31262+
1facf9fc 31263+/* ---------------------------------------------------------------------- */
31264+
31265+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31266+{
31267+ loff_t err;
31268+
2cbb1c4b 31269+ lockdep_off();
1facf9fc 31270+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 31271+ lockdep_on();
1facf9fc 31272+ return err;
31273+}
31274+
31275+/* ---------------------------------------------------------------------- */
31276+
4a4d8108
AM
31277+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31278+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
31279+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31280+ struct inode **delegated_inode);
31281+int vfsub_notify_change(struct path *path, struct iattr *ia,
31282+ struct inode **delegated_inode);
31283+int vfsub_unlink(struct inode *dir, struct path *path,
31284+ struct inode **delegated_inode, int force);
4a4d8108 31285+
c1595e42
JR
31286+/* ---------------------------------------------------------------------- */
31287+
31288+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31289+ const void *value, size_t size, int flags)
31290+{
31291+ int err;
31292+
31293+ lockdep_off();
31294+ err = vfs_setxattr(dentry, name, value, size, flags);
31295+ lockdep_on();
31296+
31297+ return err;
31298+}
31299+
31300+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31301+{
31302+ int err;
31303+
31304+ lockdep_off();
31305+ err = vfs_removexattr(dentry, name);
31306+ lockdep_on();
31307+
31308+ return err;
31309+}
31310+
1facf9fc 31311+#endif /* __KERNEL__ */
31312+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
31313diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31314--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 31315+++ linux/fs/aufs/wbr_policy.c 2016-02-28 11:26:32.576637942 +0100
076b876e 31316@@ -0,0 +1,765 @@
1facf9fc 31317+/*
8cdd5066 31318+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31319+ *
31320+ * This program, aufs is free software; you can redistribute it and/or modify
31321+ * it under the terms of the GNU General Public License as published by
31322+ * the Free Software Foundation; either version 2 of the License, or
31323+ * (at your option) any later version.
dece6358
AM
31324+ *
31325+ * This program is distributed in the hope that it will be useful,
31326+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31327+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31328+ * GNU General Public License for more details.
31329+ *
31330+ * You should have received a copy of the GNU General Public License
523b37e3 31331+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31332+ */
31333+
31334+/*
31335+ * policies for selecting one among multiple writable branches
31336+ */
31337+
31338+#include <linux/statfs.h>
31339+#include "aufs.h"
31340+
31341+/* subset of cpup_attr() */
31342+static noinline_for_stack
31343+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31344+{
31345+ int err, sbits;
31346+ struct iattr ia;
31347+ struct inode *h_isrc;
31348+
5527c038 31349+ h_isrc = d_inode(h_src);
1facf9fc 31350+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31351+ ia.ia_mode = h_isrc->i_mode;
31352+ ia.ia_uid = h_isrc->i_uid;
31353+ ia.ia_gid = h_isrc->i_gid;
31354+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
5527c038 31355+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
523b37e3
AM
31356+ /* no delegation since it is just created */
31357+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31358+
31359+ /* is this nfs only? */
31360+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31361+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31362+ ia.ia_mode = h_isrc->i_mode;
523b37e3 31363+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31364+ }
31365+
31366+ return err;
31367+}
31368+
31369+#define AuCpdown_PARENT_OPQ 1
31370+#define AuCpdown_WHED (1 << 1)
31371+#define AuCpdown_MADE_DIR (1 << 2)
31372+#define AuCpdown_DIROPQ (1 << 3)
31373+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
31374+#define au_fset_cpdown(flags, name) \
31375+ do { (flags) |= AuCpdown_##name; } while (0)
31376+#define au_fclr_cpdown(flags, name) \
31377+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 31378+
1facf9fc 31379+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 31380+ unsigned int *flags)
1facf9fc 31381+{
31382+ int err;
31383+ struct dentry *opq_dentry;
31384+
31385+ opq_dentry = au_diropq_create(dentry, bdst);
31386+ err = PTR_ERR(opq_dentry);
31387+ if (IS_ERR(opq_dentry))
31388+ goto out;
31389+ dput(opq_dentry);
c2b27bf2 31390+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 31391+
4f0767ce 31392+out:
1facf9fc 31393+ return err;
31394+}
31395+
31396+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31397+ struct inode *dir, aufs_bindex_t bdst)
31398+{
31399+ int err;
31400+ struct path h_path;
31401+ struct au_branch *br;
31402+
31403+ br = au_sbr(dentry->d_sb, bdst);
31404+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31405+ err = PTR_ERR(h_path.dentry);
31406+ if (IS_ERR(h_path.dentry))
31407+ goto out;
31408+
31409+ err = 0;
5527c038 31410+ if (d_is_positive(h_path.dentry)) {
86dc4139 31411+ h_path.mnt = au_br_mnt(br);
1facf9fc 31412+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31413+ dentry);
31414+ }
31415+ dput(h_path.dentry);
31416+
4f0767ce 31417+out:
1facf9fc 31418+ return err;
31419+}
31420+
31421+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 31422+ struct au_pin *pin,
1facf9fc 31423+ struct dentry *h_parent, void *arg)
31424+{
31425+ int err, rerr;
4a4d8108 31426+ aufs_bindex_t bopq, bstart;
1facf9fc 31427+ struct path h_path;
31428+ struct dentry *parent;
31429+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 31430+ unsigned int *flags = arg;
1facf9fc 31431+
31432+ bstart = au_dbstart(dentry);
31433+ /* dentry is di-locked */
31434+ parent = dget_parent(dentry);
5527c038
JR
31435+ dir = d_inode(parent);
31436+ h_dir = d_inode(h_parent);
1facf9fc 31437+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31438+ IMustLock(h_dir);
31439+
86dc4139 31440+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 31441+ if (unlikely(err < 0))
31442+ goto out;
31443+ h_path.dentry = au_h_dptr(dentry, bdst);
31444+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31445+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31446+ S_IRWXU | S_IRUGO | S_IXUGO);
31447+ if (unlikely(err))
31448+ goto out_put;
c2b27bf2 31449+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 31450+
1facf9fc 31451+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
31452+ au_fclr_cpdown(*flags, WHED);
31453+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 31454+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
31455+ au_fset_cpdown(*flags, WHED);
31456+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31457+ au_fset_cpdown(*flags, PARENT_OPQ);
5527c038 31458+ h_inode = d_inode(h_path.dentry);
1facf9fc 31459+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2
AM
31460+ if (au_ftest_cpdown(*flags, WHED)) {
31461+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 31462+ if (unlikely(err)) {
31463+ mutex_unlock(&h_inode->i_mutex);
31464+ goto out_dir;
31465+ }
31466+ }
31467+
31468+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
31469+ mutex_unlock(&h_inode->i_mutex);
31470+ if (unlikely(err))
31471+ goto out_opq;
31472+
c2b27bf2 31473+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 31474+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31475+ if (unlikely(err))
31476+ goto out_opq;
31477+ }
31478+
5527c038 31479+ inode = d_inode(dentry);
1facf9fc 31480+ if (au_ibend(inode) < bdst)
31481+ au_set_ibend(inode, bdst);
31482+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31483+ au_hi_flags(inode, /*isdir*/1));
076b876e 31484+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 31485+ goto out; /* success */
31486+
31487+ /* revert */
4f0767ce 31488+out_opq:
c2b27bf2 31489+ if (au_ftest_cpdown(*flags, DIROPQ)) {
1facf9fc 31490+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
31491+ rerr = au_diropq_remove(dentry, bdst);
31492+ mutex_unlock(&h_inode->i_mutex);
31493+ if (unlikely(rerr)) {
523b37e3
AM
31494+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
31495+ dentry, bdst, rerr);
1facf9fc 31496+ err = -EIO;
31497+ goto out;
31498+ }
31499+ }
4f0767ce 31500+out_dir:
c2b27bf2 31501+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 31502+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
31503+ if (unlikely(rerr)) {
523b37e3
AM
31504+ AuIOErr("failed removing %pd b%d (%d)\n",
31505+ dentry, bdst, rerr);
1facf9fc 31506+ err = -EIO;
31507+ }
31508+ }
4f0767ce 31509+out_put:
1facf9fc 31510+ au_set_h_dptr(dentry, bdst, NULL);
31511+ if (au_dbend(dentry) == bdst)
31512+ au_update_dbend(dentry);
4f0767ce 31513+out:
1facf9fc 31514+ dput(parent);
31515+ return err;
31516+}
31517+
31518+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
31519+{
31520+ int err;
c2b27bf2 31521+ unsigned int flags;
1facf9fc 31522+
c2b27bf2
AM
31523+ flags = 0;
31524+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 31525+
31526+ return err;
31527+}
31528+
31529+/* ---------------------------------------------------------------------- */
31530+
31531+/* policies for create */
31532+
c2b27bf2 31533+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
31534+{
31535+ int err, i, j, ndentry;
31536+ aufs_bindex_t bopq;
31537+ struct au_dcsub_pages dpages;
31538+ struct au_dpage *dpage;
31539+ struct dentry **dentries, *parent, *d;
31540+
31541+ err = au_dpages_init(&dpages, GFP_NOFS);
31542+ if (unlikely(err))
31543+ goto out;
31544+ parent = dget_parent(dentry);
027c5e7a 31545+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
31546+ if (unlikely(err))
31547+ goto out_free;
31548+
31549+ err = bindex;
31550+ for (i = 0; i < dpages.ndpage; i++) {
31551+ dpage = dpages.dpages + i;
31552+ dentries = dpage->dentries;
31553+ ndentry = dpage->ndentry;
31554+ for (j = 0; j < ndentry; j++) {
31555+ d = dentries[j];
31556+ di_read_lock_parent2(d, !AuLock_IR);
31557+ bopq = au_dbdiropq(d);
31558+ di_read_unlock(d, !AuLock_IR);
31559+ if (bopq >= 0 && bopq < err)
31560+ err = bopq;
31561+ }
31562+ }
31563+
31564+out_free:
31565+ dput(parent);
31566+ au_dpages_free(&dpages);
31567+out:
31568+ return err;
31569+}
31570+
1facf9fc 31571+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
31572+{
31573+ for (; bindex >= 0; bindex--)
31574+ if (!au_br_rdonly(au_sbr(sb, bindex)))
31575+ return bindex;
31576+ return -EROFS;
31577+}
31578+
31579+/* top down parent */
392086de
AM
31580+static int au_wbr_create_tdp(struct dentry *dentry,
31581+ unsigned int flags __maybe_unused)
1facf9fc 31582+{
31583+ int err;
31584+ aufs_bindex_t bstart, bindex;
31585+ struct super_block *sb;
31586+ struct dentry *parent, *h_parent;
31587+
31588+ sb = dentry->d_sb;
31589+ bstart = au_dbstart(dentry);
31590+ err = bstart;
31591+ if (!au_br_rdonly(au_sbr(sb, bstart)))
31592+ goto out;
31593+
31594+ err = -EROFS;
31595+ parent = dget_parent(dentry);
31596+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
31597+ h_parent = au_h_dptr(parent, bindex);
5527c038 31598+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31599+ continue;
31600+
31601+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31602+ err = bindex;
31603+ break;
31604+ }
31605+ }
31606+ dput(parent);
31607+
31608+ /* bottom up here */
4a4d8108 31609+ if (unlikely(err < 0)) {
1facf9fc 31610+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
31611+ if (err >= 0)
31612+ err = au_wbr_nonopq(dentry, err);
31613+ }
1facf9fc 31614+
4f0767ce 31615+out:
1facf9fc 31616+ AuDbg("b%d\n", err);
31617+ return err;
31618+}
31619+
31620+/* ---------------------------------------------------------------------- */
31621+
31622+/* an exception for the policy other than tdp */
31623+static int au_wbr_create_exp(struct dentry *dentry)
31624+{
31625+ int err;
31626+ aufs_bindex_t bwh, bdiropq;
31627+ struct dentry *parent;
31628+
31629+ err = -1;
31630+ bwh = au_dbwh(dentry);
31631+ parent = dget_parent(dentry);
31632+ bdiropq = au_dbdiropq(parent);
31633+ if (bwh >= 0) {
31634+ if (bdiropq >= 0)
31635+ err = min(bdiropq, bwh);
31636+ else
31637+ err = bwh;
31638+ AuDbg("%d\n", err);
31639+ } else if (bdiropq >= 0) {
31640+ err = bdiropq;
31641+ AuDbg("%d\n", err);
31642+ }
31643+ dput(parent);
31644+
4a4d8108
AM
31645+ if (err >= 0)
31646+ err = au_wbr_nonopq(dentry, err);
31647+
1facf9fc 31648+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
31649+ err = -1;
31650+
31651+ AuDbg("%d\n", err);
31652+ return err;
31653+}
31654+
31655+/* ---------------------------------------------------------------------- */
31656+
31657+/* round robin */
31658+static int au_wbr_create_init_rr(struct super_block *sb)
31659+{
31660+ int err;
31661+
31662+ err = au_wbr_bu(sb, au_sbend(sb));
31663+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 31664+ /* smp_mb(); */
1facf9fc 31665+
31666+ AuDbg("b%d\n", err);
31667+ return err;
31668+}
31669+
392086de 31670+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 31671+{
31672+ int err, nbr;
31673+ unsigned int u;
31674+ aufs_bindex_t bindex, bend;
31675+ struct super_block *sb;
31676+ atomic_t *next;
31677+
31678+ err = au_wbr_create_exp(dentry);
31679+ if (err >= 0)
31680+ goto out;
31681+
31682+ sb = dentry->d_sb;
31683+ next = &au_sbi(sb)->si_wbr_rr_next;
31684+ bend = au_sbend(sb);
31685+ nbr = bend + 1;
31686+ for (bindex = 0; bindex <= bend; bindex++) {
392086de 31687+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 31688+ err = atomic_dec_return(next) + 1;
31689+ /* modulo for 0 is meaningless */
31690+ if (unlikely(!err))
31691+ err = atomic_dec_return(next) + 1;
31692+ } else
31693+ err = atomic_read(next);
31694+ AuDbg("%d\n", err);
31695+ u = err;
31696+ err = u % nbr;
31697+ AuDbg("%d\n", err);
31698+ if (!au_br_rdonly(au_sbr(sb, err)))
31699+ break;
31700+ err = -EROFS;
31701+ }
31702+
4a4d8108
AM
31703+ if (err >= 0)
31704+ err = au_wbr_nonopq(dentry, err);
31705+
4f0767ce 31706+out:
1facf9fc 31707+ AuDbg("%d\n", err);
31708+ return err;
31709+}
31710+
31711+/* ---------------------------------------------------------------------- */
31712+
31713+/* most free space */
392086de 31714+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 31715+{
31716+ struct super_block *sb;
31717+ struct au_branch *br;
31718+ struct au_wbr_mfs *mfs;
392086de 31719+ struct dentry *h_parent;
1facf9fc 31720+ aufs_bindex_t bindex, bend;
31721+ int err;
31722+ unsigned long long b, bavail;
7f207e10 31723+ struct path h_path;
1facf9fc 31724+ /* reduce the stack usage */
31725+ struct kstatfs *st;
31726+
31727+ st = kmalloc(sizeof(*st), GFP_NOFS);
31728+ if (unlikely(!st)) {
31729+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
31730+ return;
31731+ }
31732+
31733+ bavail = 0;
31734+ sb = dentry->d_sb;
31735+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 31736+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 31737+ mfs->mfs_bindex = -EROFS;
31738+ mfs->mfsrr_bytes = 0;
392086de
AM
31739+ if (!parent) {
31740+ bindex = 0;
31741+ bend = au_sbend(sb);
31742+ } else {
31743+ bindex = au_dbstart(parent);
31744+ bend = au_dbtaildir(parent);
31745+ }
31746+
31747+ for (; bindex <= bend; bindex++) {
31748+ if (parent) {
31749+ h_parent = au_h_dptr(parent, bindex);
5527c038 31750+ if (!h_parent || d_is_negative(h_parent))
392086de
AM
31751+ continue;
31752+ }
1facf9fc 31753+ br = au_sbr(sb, bindex);
31754+ if (au_br_rdonly(br))
31755+ continue;
31756+
31757+ /* sb->s_root for NFS is unreliable */
86dc4139 31758+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
31759+ h_path.dentry = h_path.mnt->mnt_root;
31760+ err = vfs_statfs(&h_path, st);
1facf9fc 31761+ if (unlikely(err)) {
31762+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
31763+ continue;
31764+ }
31765+
31766+ /* when the available size is equal, select the lower one */
31767+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
31768+ || sizeof(b) < sizeof(st->f_bsize));
31769+ b = st->f_bavail * st->f_bsize;
31770+ br->br_wbr->wbr_bytes = b;
31771+ if (b >= bavail) {
31772+ bavail = b;
31773+ mfs->mfs_bindex = bindex;
31774+ mfs->mfs_jiffy = jiffies;
31775+ }
31776+ }
31777+
31778+ mfs->mfsrr_bytes = bavail;
31779+ AuDbg("b%d\n", mfs->mfs_bindex);
31780+ kfree(st);
31781+}
31782+
392086de 31783+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31784+{
31785+ int err;
392086de 31786+ struct dentry *parent;
1facf9fc 31787+ struct super_block *sb;
31788+ struct au_wbr_mfs *mfs;
31789+
31790+ err = au_wbr_create_exp(dentry);
31791+ if (err >= 0)
31792+ goto out;
31793+
31794+ sb = dentry->d_sb;
392086de
AM
31795+ parent = NULL;
31796+ if (au_ftest_wbr(flags, PARENT))
31797+ parent = dget_parent(dentry);
1facf9fc 31798+ mfs = &au_sbi(sb)->si_wbr_mfs;
31799+ mutex_lock(&mfs->mfs_lock);
31800+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
31801+ || mfs->mfs_bindex < 0
31802+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 31803+ au_mfs(dentry, parent);
1facf9fc 31804+ mutex_unlock(&mfs->mfs_lock);
31805+ err = mfs->mfs_bindex;
392086de 31806+ dput(parent);
1facf9fc 31807+
4a4d8108
AM
31808+ if (err >= 0)
31809+ err = au_wbr_nonopq(dentry, err);
31810+
4f0767ce 31811+out:
1facf9fc 31812+ AuDbg("b%d\n", err);
31813+ return err;
31814+}
31815+
31816+static int au_wbr_create_init_mfs(struct super_block *sb)
31817+{
31818+ struct au_wbr_mfs *mfs;
31819+
31820+ mfs = &au_sbi(sb)->si_wbr_mfs;
31821+ mutex_init(&mfs->mfs_lock);
31822+ mfs->mfs_jiffy = 0;
31823+ mfs->mfs_bindex = -EROFS;
31824+
31825+ return 0;
31826+}
31827+
31828+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
31829+{
31830+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
31831+ return 0;
31832+}
31833+
31834+/* ---------------------------------------------------------------------- */
31835+
31836+/* most free space and then round robin */
392086de 31837+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 31838+{
31839+ int err;
31840+ struct au_wbr_mfs *mfs;
31841+
392086de 31842+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 31843+ if (err >= 0) {
31844+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 31845+ mutex_lock(&mfs->mfs_lock);
1facf9fc 31846+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 31847+ err = au_wbr_create_rr(dentry, flags);
dece6358 31848+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 31849+ }
31850+
31851+ AuDbg("b%d\n", err);
31852+ return err;
31853+}
31854+
31855+static int au_wbr_create_init_mfsrr(struct super_block *sb)
31856+{
31857+ int err;
31858+
31859+ au_wbr_create_init_mfs(sb); /* ignore */
31860+ err = au_wbr_create_init_rr(sb);
31861+
31862+ return err;
31863+}
31864+
31865+/* ---------------------------------------------------------------------- */
31866+
31867+/* top down parent and most free space */
392086de 31868+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31869+{
31870+ int err, e2;
31871+ unsigned long long b;
31872+ aufs_bindex_t bindex, bstart, bend;
31873+ struct super_block *sb;
31874+ struct dentry *parent, *h_parent;
31875+ struct au_branch *br;
31876+
392086de 31877+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 31878+ if (unlikely(err < 0))
31879+ goto out;
31880+ parent = dget_parent(dentry);
31881+ bstart = au_dbstart(parent);
31882+ bend = au_dbtaildir(parent);
31883+ if (bstart == bend)
31884+ goto out_parent; /* success */
31885+
392086de 31886+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 31887+ if (e2 < 0)
31888+ goto out_parent; /* success */
31889+
31890+ /* when the available size is equal, select upper one */
31891+ sb = dentry->d_sb;
31892+ br = au_sbr(sb, err);
31893+ b = br->br_wbr->wbr_bytes;
31894+ AuDbg("b%d, %llu\n", err, b);
31895+
31896+ for (bindex = bstart; bindex <= bend; bindex++) {
31897+ h_parent = au_h_dptr(parent, bindex);
5527c038 31898+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31899+ continue;
31900+
31901+ br = au_sbr(sb, bindex);
31902+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
31903+ b = br->br_wbr->wbr_bytes;
31904+ err = bindex;
31905+ AuDbg("b%d, %llu\n", err, b);
31906+ }
31907+ }
31908+
4a4d8108
AM
31909+ if (err >= 0)
31910+ err = au_wbr_nonopq(dentry, err);
31911+
4f0767ce 31912+out_parent:
1facf9fc 31913+ dput(parent);
4f0767ce 31914+out:
1facf9fc 31915+ AuDbg("b%d\n", err);
31916+ return err;
31917+}
31918+
31919+/* ---------------------------------------------------------------------- */
31920+
392086de
AM
31921+/*
31922+ * - top down parent
31923+ * - most free space with parent
31924+ * - most free space round-robin regardless parent
31925+ */
31926+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
31927+{
31928+ int err;
31929+ unsigned long long watermark;
31930+ struct super_block *sb;
31931+ struct au_branch *br;
31932+ struct au_wbr_mfs *mfs;
31933+
31934+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
31935+ if (unlikely(err < 0))
31936+ goto out;
31937+
31938+ sb = dentry->d_sb;
31939+ br = au_sbr(sb, err);
31940+ mfs = &au_sbi(sb)->si_wbr_mfs;
31941+ mutex_lock(&mfs->mfs_lock);
31942+ watermark = mfs->mfsrr_watermark;
31943+ mutex_unlock(&mfs->mfs_lock);
31944+ if (br->br_wbr->wbr_bytes < watermark)
31945+ /* regardless the parent dir */
31946+ err = au_wbr_create_mfsrr(dentry, flags);
31947+
31948+out:
31949+ AuDbg("b%d\n", err);
31950+ return err;
31951+}
31952+
31953+/* ---------------------------------------------------------------------- */
31954+
1facf9fc 31955+/* policies for copyup */
31956+
31957+/* top down parent */
31958+static int au_wbr_copyup_tdp(struct dentry *dentry)
31959+{
392086de 31960+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 31961+}
31962+
31963+/* bottom up parent */
31964+static int au_wbr_copyup_bup(struct dentry *dentry)
31965+{
31966+ int err;
31967+ aufs_bindex_t bindex, bstart;
31968+ struct dentry *parent, *h_parent;
31969+ struct super_block *sb;
31970+
31971+ err = -EROFS;
31972+ sb = dentry->d_sb;
31973+ parent = dget_parent(dentry);
31974+ bstart = au_dbstart(parent);
31975+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
31976+ h_parent = au_h_dptr(parent, bindex);
5527c038 31977+ if (!h_parent || d_is_negative(h_parent))
1facf9fc 31978+ continue;
31979+
31980+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31981+ err = bindex;
31982+ break;
31983+ }
31984+ }
31985+ dput(parent);
31986+
31987+ /* bottom up here */
31988+ if (unlikely(err < 0))
31989+ err = au_wbr_bu(sb, bstart - 1);
31990+
31991+ AuDbg("b%d\n", err);
31992+ return err;
31993+}
31994+
31995+/* bottom up */
076b876e 31996+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
1facf9fc 31997+{
31998+ int err;
31999+
4a4d8108
AM
32000+ err = au_wbr_bu(dentry->d_sb, bstart);
32001+ AuDbg("b%d\n", err);
32002+ if (err > bstart)
32003+ err = au_wbr_nonopq(dentry, err);
1facf9fc 32004+
32005+ AuDbg("b%d\n", err);
32006+ return err;
32007+}
32008+
076b876e
AM
32009+static int au_wbr_copyup_bu(struct dentry *dentry)
32010+{
32011+ int err;
32012+ aufs_bindex_t bstart;
32013+
32014+ bstart = au_dbstart(dentry);
32015+ err = au_wbr_do_copyup_bu(dentry, bstart);
32016+ return err;
32017+}
32018+
1facf9fc 32019+/* ---------------------------------------------------------------------- */
32020+
32021+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
32022+ [AuWbrCopyup_TDP] = {
32023+ .copyup = au_wbr_copyup_tdp
32024+ },
32025+ [AuWbrCopyup_BUP] = {
32026+ .copyup = au_wbr_copyup_bup
32027+ },
32028+ [AuWbrCopyup_BU] = {
32029+ .copyup = au_wbr_copyup_bu
32030+ }
32031+};
32032+
32033+struct au_wbr_create_operations au_wbr_create_ops[] = {
32034+ [AuWbrCreate_TDP] = {
32035+ .create = au_wbr_create_tdp
32036+ },
32037+ [AuWbrCreate_RR] = {
32038+ .create = au_wbr_create_rr,
32039+ .init = au_wbr_create_init_rr
32040+ },
32041+ [AuWbrCreate_MFS] = {
32042+ .create = au_wbr_create_mfs,
32043+ .init = au_wbr_create_init_mfs,
32044+ .fin = au_wbr_create_fin_mfs
32045+ },
32046+ [AuWbrCreate_MFSV] = {
32047+ .create = au_wbr_create_mfs,
32048+ .init = au_wbr_create_init_mfs,
32049+ .fin = au_wbr_create_fin_mfs
32050+ },
32051+ [AuWbrCreate_MFSRR] = {
32052+ .create = au_wbr_create_mfsrr,
32053+ .init = au_wbr_create_init_mfsrr,
32054+ .fin = au_wbr_create_fin_mfs
32055+ },
32056+ [AuWbrCreate_MFSRRV] = {
32057+ .create = au_wbr_create_mfsrr,
32058+ .init = au_wbr_create_init_mfsrr,
32059+ .fin = au_wbr_create_fin_mfs
32060+ },
32061+ [AuWbrCreate_PMFS] = {
32062+ .create = au_wbr_create_pmfs,
32063+ .init = au_wbr_create_init_mfs,
32064+ .fin = au_wbr_create_fin_mfs
32065+ },
32066+ [AuWbrCreate_PMFSV] = {
32067+ .create = au_wbr_create_pmfs,
32068+ .init = au_wbr_create_init_mfs,
32069+ .fin = au_wbr_create_fin_mfs
392086de
AM
32070+ },
32071+ [AuWbrCreate_PMFSRR] = {
32072+ .create = au_wbr_create_pmfsrr,
32073+ .init = au_wbr_create_init_mfsrr,
32074+ .fin = au_wbr_create_fin_mfs
32075+ },
32076+ [AuWbrCreate_PMFSRRV] = {
32077+ .create = au_wbr_create_pmfsrr,
32078+ .init = au_wbr_create_init_mfsrr,
32079+ .fin = au_wbr_create_fin_mfs
1facf9fc 32080+ }
32081+};
7f207e10
AM
32082diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
32083--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 32084+++ linux/fs/aufs/whout.c 2016-02-28 11:26:32.576637942 +0100
be52b249 32085@@ -0,0 +1,1060 @@
1facf9fc 32086+/*
8cdd5066 32087+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 32088+ *
32089+ * This program, aufs is free software; you can redistribute it and/or modify
32090+ * it under the terms of the GNU General Public License as published by
32091+ * the Free Software Foundation; either version 2 of the License, or
32092+ * (at your option) any later version.
dece6358
AM
32093+ *
32094+ * This program is distributed in the hope that it will be useful,
32095+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32096+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32097+ * GNU General Public License for more details.
32098+ *
32099+ * You should have received a copy of the GNU General Public License
523b37e3 32100+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32101+ */
32102+
32103+/*
32104+ * whiteout for logical deletion and opaque directory
32105+ */
32106+
1facf9fc 32107+#include "aufs.h"
32108+
32109+#define WH_MASK S_IRUGO
32110+
32111+/*
32112+ * If a directory contains this file, then it is opaque. We start with the
32113+ * .wh. flag so that it is blocked by lookup.
32114+ */
0c3ec466
AM
32115+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
32116+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 32117+
32118+/*
32119+ * generate whiteout name, which is NOT terminated by NULL.
32120+ * @name: original d_name.name
32121+ * @len: original d_name.len
32122+ * @wh: whiteout qstr
32123+ * returns zero when succeeds, otherwise error.
32124+ * succeeded value as wh->name should be freed by kfree().
32125+ */
32126+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
32127+{
32128+ char *p;
32129+
32130+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
32131+ return -ENAMETOOLONG;
32132+
32133+ wh->len = name->len + AUFS_WH_PFX_LEN;
32134+ p = kmalloc(wh->len, GFP_NOFS);
32135+ wh->name = p;
32136+ if (p) {
32137+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32138+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
32139+ /* smp_mb(); */
32140+ return 0;
32141+ }
32142+ return -ENOMEM;
32143+}
32144+
32145+/* ---------------------------------------------------------------------- */
32146+
32147+/*
32148+ * test if the @wh_name exists under @h_parent.
32149+ * @try_sio specifies the necessary of super-io.
32150+ */
076b876e 32151+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 32152+{
32153+ int err;
32154+ struct dentry *wh_dentry;
1facf9fc 32155+
1facf9fc 32156+ if (!try_sio)
b4510431 32157+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 32158+ else
076b876e 32159+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 32160+ err = PTR_ERR(wh_dentry);
2000de60
JR
32161+ if (IS_ERR(wh_dentry)) {
32162+ if (err == -ENAMETOOLONG)
32163+ err = 0;
1facf9fc 32164+ goto out;
2000de60 32165+ }
1facf9fc 32166+
32167+ err = 0;
5527c038 32168+ if (d_is_negative(wh_dentry))
1facf9fc 32169+ goto out_wh; /* success */
32170+
32171+ err = 1;
7e9cd9fe 32172+ if (d_is_reg(wh_dentry))
1facf9fc 32173+ goto out_wh; /* success */
32174+
32175+ err = -EIO;
523b37e3 32176+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
5527c038 32177+ wh_dentry, d_inode(wh_dentry)->i_mode);
1facf9fc 32178+
4f0767ce 32179+out_wh:
1facf9fc 32180+ dput(wh_dentry);
4f0767ce 32181+out:
1facf9fc 32182+ return err;
32183+}
32184+
32185+/*
32186+ * test if the @h_dentry sets opaque or not.
32187+ */
076b876e 32188+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 32189+{
32190+ int err;
32191+ struct inode *h_dir;
32192+
5527c038 32193+ h_dir = d_inode(h_dentry);
076b876e 32194+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 32195+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32196+ return err;
32197+}
32198+
32199+/*
32200+ * returns a negative dentry whose name is unique and temporary.
32201+ */
32202+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32203+ struct qstr *prefix)
32204+{
1facf9fc 32205+ struct dentry *dentry;
32206+ int i;
027c5e7a 32207+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 32208+ *name, *p;
027c5e7a 32209+ /* strict atomic_t is unnecessary here */
1facf9fc 32210+ static unsigned short cnt;
32211+ struct qstr qs;
32212+
4a4d8108
AM
32213+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32214+
1facf9fc 32215+ name = defname;
027c5e7a
AM
32216+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32217+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 32218+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 32219+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 32220+ goto out;
32221+ dentry = ERR_PTR(-ENOMEM);
32222+ name = kmalloc(qs.len + 1, GFP_NOFS);
32223+ if (unlikely(!name))
32224+ goto out;
32225+ }
32226+
32227+ /* doubly whiteout-ed */
32228+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32229+ p = name + AUFS_WH_PFX_LEN * 2;
32230+ memcpy(p, prefix->name, prefix->len);
32231+ p += prefix->len;
32232+ *p++ = '.';
4a4d8108 32233+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 32234+
32235+ qs.name = name;
32236+ for (i = 0; i < 3; i++) {
b752ccd1 32237+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 32238+ dentry = au_sio_lkup_one(&qs, h_parent);
5527c038 32239+ if (IS_ERR(dentry) || d_is_negative(dentry))
1facf9fc 32240+ goto out_name;
32241+ dput(dentry);
32242+ }
0c3ec466 32243+ /* pr_warn("could not get random name\n"); */
1facf9fc 32244+ dentry = ERR_PTR(-EEXIST);
32245+ AuDbg("%.*s\n", AuLNPair(&qs));
32246+ BUG();
32247+
4f0767ce 32248+out_name:
1facf9fc 32249+ if (name != defname)
32250+ kfree(name);
4f0767ce 32251+out:
4a4d8108 32252+ AuTraceErrPtr(dentry);
1facf9fc 32253+ return dentry;
1facf9fc 32254+}
32255+
32256+/*
32257+ * rename the @h_dentry on @br to the whiteouted temporary name.
32258+ */
32259+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32260+{
32261+ int err;
32262+ struct path h_path = {
86dc4139 32263+ .mnt = au_br_mnt(br)
1facf9fc 32264+ };
523b37e3 32265+ struct inode *h_dir, *delegated;
1facf9fc 32266+ struct dentry *h_parent;
32267+
32268+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5527c038 32269+ h_dir = d_inode(h_parent);
1facf9fc 32270+ IMustLock(h_dir);
32271+
32272+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32273+ err = PTR_ERR(h_path.dentry);
32274+ if (IS_ERR(h_path.dentry))
32275+ goto out;
32276+
32277+ /* under the same dir, no need to lock_rename() */
523b37e3
AM
32278+ delegated = NULL;
32279+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated);
1facf9fc 32280+ AuTraceErr(err);
523b37e3
AM
32281+ if (unlikely(err == -EWOULDBLOCK)) {
32282+ pr_warn("cannot retry for NFSv4 delegation"
32283+ " for an internal rename\n");
32284+ iput(delegated);
32285+ }
1facf9fc 32286+ dput(h_path.dentry);
32287+
4f0767ce 32288+out:
4a4d8108 32289+ AuTraceErr(err);
1facf9fc 32290+ return err;
32291+}
32292+
32293+/* ---------------------------------------------------------------------- */
32294+/*
32295+ * functions for removing a whiteout
32296+ */
32297+
32298+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32299+{
523b37e3
AM
32300+ int err, force;
32301+ struct inode *delegated;
1facf9fc 32302+
32303+ /*
32304+ * forces superio when the dir has a sticky bit.
32305+ * this may be a violation of unix fs semantics.
32306+ */
32307+ force = (h_dir->i_mode & S_ISVTX)
5527c038 32308+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
523b37e3
AM
32309+ delegated = NULL;
32310+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32311+ if (unlikely(err == -EWOULDBLOCK)) {
32312+ pr_warn("cannot retry for NFSv4 delegation"
32313+ " for an internal unlink\n");
32314+ iput(delegated);
32315+ }
32316+ return err;
1facf9fc 32317+}
32318+
32319+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32320+ struct dentry *dentry)
32321+{
32322+ int err;
32323+
32324+ err = do_unlink_wh(h_dir, h_path);
32325+ if (!err && dentry)
32326+ au_set_dbwh(dentry, -1);
32327+
32328+ return err;
32329+}
32330+
32331+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32332+ struct au_branch *br)
32333+{
32334+ int err;
32335+ struct path h_path = {
86dc4139 32336+ .mnt = au_br_mnt(br)
1facf9fc 32337+ };
32338+
32339+ err = 0;
b4510431 32340+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 32341+ if (IS_ERR(h_path.dentry))
32342+ err = PTR_ERR(h_path.dentry);
32343+ else {
5527c038
JR
32344+ if (d_is_reg(h_path.dentry))
32345+ err = do_unlink_wh(d_inode(h_parent), &h_path);
1facf9fc 32346+ dput(h_path.dentry);
32347+ }
32348+
32349+ return err;
32350+}
32351+
32352+/* ---------------------------------------------------------------------- */
32353+/*
32354+ * initialize/clean whiteout for a branch
32355+ */
32356+
32357+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32358+ const int isdir)
32359+{
32360+ int err;
523b37e3 32361+ struct inode *delegated;
1facf9fc 32362+
5527c038 32363+ if (d_is_negative(whpath->dentry))
1facf9fc 32364+ return;
32365+
86dc4139
AM
32366+ if (isdir)
32367+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
32368+ else {
32369+ delegated = NULL;
32370+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32371+ if (unlikely(err == -EWOULDBLOCK)) {
32372+ pr_warn("cannot retry for NFSv4 delegation"
32373+ " for an internal unlink\n");
32374+ iput(delegated);
32375+ }
32376+ }
1facf9fc 32377+ if (unlikely(err))
523b37e3
AM
32378+ pr_warn("failed removing %pd (%d), ignored.\n",
32379+ whpath->dentry, err);
1facf9fc 32380+}
32381+
32382+static int test_linkable(struct dentry *h_root)
32383+{
5527c038 32384+ struct inode *h_dir = d_inode(h_root);
1facf9fc 32385+
32386+ if (h_dir->i_op->link)
32387+ return 0;
32388+
523b37e3
AM
32389+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32390+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 32391+ return -ENOSYS;
32392+}
32393+
32394+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32395+static int au_whdir(struct inode *h_dir, struct path *path)
32396+{
32397+ int err;
32398+
32399+ err = -EEXIST;
5527c038 32400+ if (d_is_negative(path->dentry)) {
1facf9fc 32401+ int mode = S_IRWXU;
32402+
32403+ if (au_test_nfs(path->dentry->d_sb))
32404+ mode |= S_IXUGO;
86dc4139 32405+ err = vfsub_mkdir(h_dir, path, mode);
2000de60 32406+ } else if (d_is_dir(path->dentry))
1facf9fc 32407+ err = 0;
32408+ else
523b37e3 32409+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 32410+
32411+ return err;
32412+}
32413+
32414+struct au_wh_base {
32415+ const struct qstr *name;
32416+ struct dentry *dentry;
32417+};
32418+
32419+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32420+ struct path *h_path)
32421+{
32422+ h_path->dentry = base[AuBrWh_BASE].dentry;
32423+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32424+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32425+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32426+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32427+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32428+}
32429+
32430+/*
32431+ * returns tri-state,
c1595e42 32432+ * minus: error, caller should print the message
1facf9fc 32433+ * zero: succuess
c1595e42 32434+ * plus: error, caller should NOT print the message
1facf9fc 32435+ */
32436+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
32437+ int do_plink, struct au_wh_base base[],
32438+ struct path *h_path)
32439+{
32440+ int err;
32441+ struct inode *h_dir;
32442+
5527c038 32443+ h_dir = d_inode(h_root);
1facf9fc 32444+ h_path->dentry = base[AuBrWh_BASE].dentry;
32445+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32446+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32447+ if (do_plink) {
32448+ err = test_linkable(h_root);
32449+ if (unlikely(err)) {
32450+ err = 1;
32451+ goto out;
32452+ }
32453+
32454+ err = au_whdir(h_dir, h_path);
32455+ if (unlikely(err))
32456+ goto out;
32457+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32458+ } else
32459+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32460+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32461+ err = au_whdir(h_dir, h_path);
32462+ if (unlikely(err))
32463+ goto out;
32464+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32465+
4f0767ce 32466+out:
1facf9fc 32467+ return err;
32468+}
32469+
32470+/*
32471+ * for the moment, aufs supports the branch filesystem which does not support
32472+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
32473+ * copyup failed. finally, such filesystem will not be used as the writable
32474+ * branch.
32475+ *
32476+ * returns tri-state, see above.
32477+ */
32478+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
32479+ int do_plink, struct au_wh_base base[],
32480+ struct path *h_path)
32481+{
32482+ int err;
32483+ struct inode *h_dir;
32484+
1308ab2a 32485+ WbrWhMustWriteLock(wbr);
32486+
1facf9fc 32487+ err = test_linkable(h_root);
32488+ if (unlikely(err)) {
32489+ err = 1;
32490+ goto out;
32491+ }
32492+
32493+ /*
32494+ * todo: should this create be done in /sbin/mount.aufs helper?
32495+ */
32496+ err = -EEXIST;
5527c038
JR
32497+ h_dir = d_inode(h_root);
32498+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
86dc4139
AM
32499+ h_path->dentry = base[AuBrWh_BASE].dentry;
32500+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
7e9cd9fe 32501+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
1facf9fc 32502+ err = 0;
32503+ else
523b37e3 32504+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 32505+ if (unlikely(err))
32506+ goto out;
32507+
32508+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32509+ if (do_plink) {
32510+ err = au_whdir(h_dir, h_path);
32511+ if (unlikely(err))
32512+ goto out;
32513+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32514+ } else
32515+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32516+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
32517+
32518+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32519+ err = au_whdir(h_dir, h_path);
32520+ if (unlikely(err))
32521+ goto out;
32522+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32523+
4f0767ce 32524+out:
1facf9fc 32525+ return err;
32526+}
32527+
32528+/*
32529+ * initialize the whiteout base file/dir for @br.
32530+ */
86dc4139 32531+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 32532+{
32533+ int err, i;
32534+ const unsigned char do_plink
32535+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 32536+ struct inode *h_dir;
86dc4139
AM
32537+ struct path path = br->br_path;
32538+ struct dentry *h_root = path.dentry;
1facf9fc 32539+ struct au_wbr *wbr = br->br_wbr;
32540+ static const struct qstr base_name[] = {
0c3ec466
AM
32541+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
32542+ sizeof(AUFS_BASE_NAME) - 1),
32543+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
32544+ sizeof(AUFS_PLINKDIR_NAME) - 1),
32545+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
32546+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 32547+ };
32548+ struct au_wh_base base[] = {
32549+ [AuBrWh_BASE] = {
32550+ .name = base_name + AuBrWh_BASE,
32551+ .dentry = NULL
32552+ },
32553+ [AuBrWh_PLINK] = {
32554+ .name = base_name + AuBrWh_PLINK,
32555+ .dentry = NULL
32556+ },
32557+ [AuBrWh_ORPH] = {
32558+ .name = base_name + AuBrWh_ORPH,
32559+ .dentry = NULL
32560+ }
32561+ };
32562+
1308ab2a 32563+ if (wbr)
32564+ WbrWhMustWriteLock(wbr);
1facf9fc 32565+
1facf9fc 32566+ for (i = 0; i < AuBrWh_Last; i++) {
32567+ /* doubly whiteouted */
32568+ struct dentry *d;
32569+
32570+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
32571+ err = PTR_ERR(d);
32572+ if (IS_ERR(d))
32573+ goto out;
32574+
32575+ base[i].dentry = d;
32576+ AuDebugOn(wbr
32577+ && wbr->wbr_wh[i]
32578+ && wbr->wbr_wh[i] != base[i].dentry);
32579+ }
32580+
32581+ if (wbr)
32582+ for (i = 0; i < AuBrWh_Last; i++) {
32583+ dput(wbr->wbr_wh[i]);
32584+ wbr->wbr_wh[i] = NULL;
32585+ }
32586+
32587+ err = 0;
1e00d052 32588+ if (!au_br_writable(br->br_perm)) {
5527c038 32589+ h_dir = d_inode(h_root);
1facf9fc 32590+ au_wh_init_ro(h_dir, base, &path);
1e00d052 32591+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 32592+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
32593+ if (err > 0)
32594+ goto out;
32595+ else if (err)
32596+ goto out_err;
1e00d052 32597+ } else {
1facf9fc 32598+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
32599+ if (err > 0)
32600+ goto out;
32601+ else if (err)
32602+ goto out_err;
1facf9fc 32603+ }
32604+ goto out; /* success */
32605+
4f0767ce 32606+out_err:
523b37e3
AM
32607+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
32608+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 32609+out:
1facf9fc 32610+ for (i = 0; i < AuBrWh_Last; i++)
32611+ dput(base[i].dentry);
32612+ return err;
32613+}
32614+
32615+/* ---------------------------------------------------------------------- */
32616+/*
32617+ * whiteouts are all hard-linked usually.
32618+ * when its link count reaches a ceiling, we create a new whiteout base
32619+ * asynchronously.
32620+ */
32621+
32622+struct reinit_br_wh {
32623+ struct super_block *sb;
32624+ struct au_branch *br;
32625+};
32626+
32627+static void reinit_br_wh(void *arg)
32628+{
32629+ int err;
32630+ aufs_bindex_t bindex;
32631+ struct path h_path;
32632+ struct reinit_br_wh *a = arg;
32633+ struct au_wbr *wbr;
523b37e3 32634+ struct inode *dir, *delegated;
1facf9fc 32635+ struct dentry *h_root;
32636+ struct au_hinode *hdir;
32637+
32638+ err = 0;
32639+ wbr = a->br->br_wbr;
32640+ /* big aufs lock */
32641+ si_noflush_write_lock(a->sb);
32642+ if (!au_br_writable(a->br->br_perm))
32643+ goto out;
32644+ bindex = au_br_index(a->sb, a->br->br_id);
32645+ if (unlikely(bindex < 0))
32646+ goto out;
32647+
1308ab2a 32648+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
5527c038 32649+ dir = d_inode(a->sb->s_root);
1facf9fc 32650+ hdir = au_hi(dir, bindex);
32651+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 32652+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 32653+
4a4d8108 32654+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 32655+ wbr_wh_write_lock(wbr);
32656+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
32657+ h_root, a->br);
32658+ if (!err) {
86dc4139
AM
32659+ h_path.dentry = wbr->wbr_whbase;
32660+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
32661+ delegated = NULL;
32662+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
32663+ /*force*/0);
32664+ if (unlikely(err == -EWOULDBLOCK)) {
32665+ pr_warn("cannot retry for NFSv4 delegation"
32666+ " for an internal unlink\n");
32667+ iput(delegated);
32668+ }
1facf9fc 32669+ } else {
523b37e3 32670+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 32671+ err = 0;
32672+ }
32673+ dput(wbr->wbr_whbase);
32674+ wbr->wbr_whbase = NULL;
32675+ if (!err)
86dc4139 32676+ err = au_wh_init(a->br, a->sb);
1facf9fc 32677+ wbr_wh_write_unlock(wbr);
4a4d8108 32678+ au_hn_imtx_unlock(hdir);
1308ab2a 32679+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
32680+ if (!err)
32681+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 32682+
4f0767ce 32683+out:
1facf9fc 32684+ if (wbr)
32685+ atomic_dec(&wbr->wbr_wh_running);
32686+ atomic_dec(&a->br->br_count);
1facf9fc 32687+ si_write_unlock(a->sb);
027c5e7a 32688+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 32689+ kfree(arg);
32690+ if (unlikely(err))
32691+ AuIOErr("err %d\n", err);
32692+}
32693+
32694+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
32695+{
32696+ int do_dec, wkq_err;
32697+ struct reinit_br_wh *arg;
32698+
32699+ do_dec = 1;
32700+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
32701+ goto out;
32702+
32703+ /* ignore ENOMEM */
32704+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
32705+ if (arg) {
32706+ /*
32707+ * dec(wh_running), kfree(arg) and dec(br_count)
32708+ * in reinit function
32709+ */
32710+ arg->sb = sb;
32711+ arg->br = br;
32712+ atomic_inc(&br->br_count);
53392da6 32713+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 32714+ if (unlikely(wkq_err)) {
32715+ atomic_dec(&br->br_wbr->wbr_wh_running);
32716+ atomic_dec(&br->br_count);
32717+ kfree(arg);
32718+ }
32719+ do_dec = 0;
32720+ }
32721+
4f0767ce 32722+out:
1facf9fc 32723+ if (do_dec)
32724+ atomic_dec(&br->br_wbr->wbr_wh_running);
32725+}
32726+
32727+/* ---------------------------------------------------------------------- */
32728+
32729+/*
32730+ * create the whiteout @wh.
32731+ */
32732+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
32733+ struct dentry *wh)
32734+{
32735+ int err;
32736+ struct path h_path = {
32737+ .dentry = wh
32738+ };
32739+ struct au_branch *br;
32740+ struct au_wbr *wbr;
32741+ struct dentry *h_parent;
523b37e3 32742+ struct inode *h_dir, *delegated;
1facf9fc 32743+
32744+ h_parent = wh->d_parent; /* dir inode is locked */
5527c038 32745+ h_dir = d_inode(h_parent);
1facf9fc 32746+ IMustLock(h_dir);
32747+
32748+ br = au_sbr(sb, bindex);
86dc4139 32749+ h_path.mnt = au_br_mnt(br);
1facf9fc 32750+ wbr = br->br_wbr;
32751+ wbr_wh_read_lock(wbr);
32752+ if (wbr->wbr_whbase) {
523b37e3
AM
32753+ delegated = NULL;
32754+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
32755+ if (unlikely(err == -EWOULDBLOCK)) {
32756+ pr_warn("cannot retry for NFSv4 delegation"
32757+ " for an internal link\n");
32758+ iput(delegated);
32759+ }
1facf9fc 32760+ if (!err || err != -EMLINK)
32761+ goto out;
32762+
32763+ /* link count full. re-initialize br_whbase. */
32764+ kick_reinit_br_wh(sb, br);
32765+ }
32766+
32767+ /* return this error in this context */
b4510431 32768+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
32769+ if (!err)
32770+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 32771+
4f0767ce 32772+out:
1facf9fc 32773+ wbr_wh_read_unlock(wbr);
32774+ return err;
32775+}
32776+
32777+/* ---------------------------------------------------------------------- */
32778+
32779+/*
32780+ * create or remove the diropq.
32781+ */
32782+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
32783+ unsigned int flags)
32784+{
32785+ struct dentry *opq_dentry, *h_dentry;
32786+ struct super_block *sb;
32787+ struct au_branch *br;
32788+ int err;
32789+
32790+ sb = dentry->d_sb;
32791+ br = au_sbr(sb, bindex);
32792+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 32793+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 32794+ if (IS_ERR(opq_dentry))
32795+ goto out;
32796+
32797+ if (au_ftest_diropq(flags, CREATE)) {
32798+ err = link_or_create_wh(sb, bindex, opq_dentry);
32799+ if (!err) {
32800+ au_set_dbdiropq(dentry, bindex);
32801+ goto out; /* success */
32802+ }
32803+ } else {
32804+ struct path tmp = {
32805+ .dentry = opq_dentry,
86dc4139 32806+ .mnt = au_br_mnt(br)
1facf9fc 32807+ };
5527c038 32808+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &tmp);
1facf9fc 32809+ if (!err)
32810+ au_set_dbdiropq(dentry, -1);
32811+ }
32812+ dput(opq_dentry);
32813+ opq_dentry = ERR_PTR(err);
32814+
4f0767ce 32815+out:
1facf9fc 32816+ return opq_dentry;
32817+}
32818+
32819+struct do_diropq_args {
32820+ struct dentry **errp;
32821+ struct dentry *dentry;
32822+ aufs_bindex_t bindex;
32823+ unsigned int flags;
32824+};
32825+
32826+static void call_do_diropq(void *args)
32827+{
32828+ struct do_diropq_args *a = args;
32829+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
32830+}
32831+
32832+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
32833+ unsigned int flags)
32834+{
32835+ struct dentry *diropq, *h_dentry;
32836+
32837+ h_dentry = au_h_dptr(dentry, bindex);
5527c038 32838+ if (!au_test_h_perm_sio(d_inode(h_dentry), MAY_EXEC | MAY_WRITE))
1facf9fc 32839+ diropq = do_diropq(dentry, bindex, flags);
32840+ else {
32841+ int wkq_err;
32842+ struct do_diropq_args args = {
32843+ .errp = &diropq,
32844+ .dentry = dentry,
32845+ .bindex = bindex,
32846+ .flags = flags
32847+ };
32848+
32849+ wkq_err = au_wkq_wait(call_do_diropq, &args);
32850+ if (unlikely(wkq_err))
32851+ diropq = ERR_PTR(wkq_err);
32852+ }
32853+
32854+ return diropq;
32855+}
32856+
32857+/* ---------------------------------------------------------------------- */
32858+
32859+/*
32860+ * lookup whiteout dentry.
32861+ * @h_parent: lower parent dentry which must exist and be locked
32862+ * @base_name: name of dentry which will be whiteouted
32863+ * returns dentry for whiteout.
32864+ */
32865+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
32866+ struct au_branch *br)
32867+{
32868+ int err;
32869+ struct qstr wh_name;
32870+ struct dentry *wh_dentry;
32871+
32872+ err = au_wh_name_alloc(&wh_name, base_name);
32873+ wh_dentry = ERR_PTR(err);
32874+ if (!err) {
b4510431 32875+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
1facf9fc 32876+ kfree(wh_name.name);
32877+ }
32878+ return wh_dentry;
32879+}
32880+
32881+/*
32882+ * link/create a whiteout for @dentry on @bindex.
32883+ */
32884+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
32885+ struct dentry *h_parent)
32886+{
32887+ struct dentry *wh_dentry;
32888+ struct super_block *sb;
32889+ int err;
32890+
32891+ sb = dentry->d_sb;
32892+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
5527c038 32893+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
1facf9fc 32894+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 32895+ if (!err) {
1facf9fc 32896+ au_set_dbwh(dentry, bindex);
076b876e
AM
32897+ au_fhsm_wrote(sb, bindex, /*force*/0);
32898+ } else {
1facf9fc 32899+ dput(wh_dentry);
32900+ wh_dentry = ERR_PTR(err);
32901+ }
32902+ }
32903+
32904+ return wh_dentry;
32905+}
32906+
32907+/* ---------------------------------------------------------------------- */
32908+
32909+/* Delete all whiteouts in this directory on branch bindex. */
32910+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
32911+ aufs_bindex_t bindex, struct au_branch *br)
32912+{
32913+ int err;
32914+ unsigned long ul, n;
32915+ struct qstr wh_name;
32916+ char *p;
32917+ struct hlist_head *head;
c06a8ce3 32918+ struct au_vdir_wh *pos;
1facf9fc 32919+ struct au_vdir_destr *str;
32920+
32921+ err = -ENOMEM;
537831f9 32922+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 32923+ wh_name.name = p;
32924+ if (unlikely(!wh_name.name))
32925+ goto out;
32926+
32927+ err = 0;
32928+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32929+ p += AUFS_WH_PFX_LEN;
32930+ n = whlist->nh_num;
32931+ head = whlist->nh_head;
32932+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
32933+ hlist_for_each_entry(pos, head, wh_hash) {
32934+ if (pos->wh_bindex != bindex)
1facf9fc 32935+ continue;
32936+
c06a8ce3 32937+ str = &pos->wh_str;
1facf9fc 32938+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
32939+ memcpy(p, str->name, str->len);
32940+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
32941+ err = unlink_wh_name(h_dentry, &wh_name, br);
32942+ if (!err)
32943+ continue;
32944+ break;
32945+ }
32946+ AuIOErr("whiteout name too long %.*s\n",
32947+ str->len, str->name);
32948+ err = -EIO;
32949+ break;
32950+ }
32951+ }
537831f9 32952+ free_page((unsigned long)wh_name.name);
1facf9fc 32953+
4f0767ce 32954+out:
1facf9fc 32955+ return err;
32956+}
32957+
32958+struct del_wh_children_args {
32959+ int *errp;
32960+ struct dentry *h_dentry;
1308ab2a 32961+ struct au_nhash *whlist;
1facf9fc 32962+ aufs_bindex_t bindex;
32963+ struct au_branch *br;
32964+};
32965+
32966+static void call_del_wh_children(void *args)
32967+{
32968+ struct del_wh_children_args *a = args;
1308ab2a 32969+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 32970+}
32971+
32972+/* ---------------------------------------------------------------------- */
32973+
32974+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
32975+{
32976+ struct au_whtmp_rmdir *whtmp;
dece6358 32977+ int err;
1308ab2a 32978+ unsigned int rdhash;
dece6358
AM
32979+
32980+ SiMustAnyLock(sb);
1facf9fc 32981+
be52b249 32982+ whtmp = kzalloc(sizeof(*whtmp), gfp);
dece6358
AM
32983+ if (unlikely(!whtmp)) {
32984+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 32985+ goto out;
dece6358 32986+ }
1facf9fc 32987+
1308ab2a 32988+ /* no estimation for dir size */
32989+ rdhash = au_sbi(sb)->si_rdhash;
32990+ if (!rdhash)
32991+ rdhash = AUFS_RDHASH_DEF;
32992+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
32993+ if (unlikely(err)) {
32994+ kfree(whtmp);
32995+ whtmp = ERR_PTR(err);
32996+ }
dece6358 32997+
4f0767ce 32998+out:
dece6358 32999+ return whtmp;
1facf9fc 33000+}
33001+
33002+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
33003+{
027c5e7a
AM
33004+ if (whtmp->br)
33005+ atomic_dec(&whtmp->br->br_count);
1facf9fc 33006+ dput(whtmp->wh_dentry);
33007+ iput(whtmp->dir);
dece6358 33008+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 33009+ kfree(whtmp);
33010+}
33011+
33012+/*
33013+ * rmdir the whiteouted temporary named dir @h_dentry.
33014+ * @whlist: whiteouted children.
33015+ */
33016+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33017+ struct dentry *wh_dentry, struct au_nhash *whlist)
33018+{
33019+ int err;
2000de60 33020+ unsigned int h_nlink;
1facf9fc 33021+ struct path h_tmp;
33022+ struct inode *wh_inode, *h_dir;
33023+ struct au_branch *br;
33024+
5527c038 33025+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
1facf9fc 33026+ IMustLock(h_dir);
33027+
33028+ br = au_sbr(dir->i_sb, bindex);
5527c038 33029+ wh_inode = d_inode(wh_dentry);
1facf9fc 33030+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
33031+
33032+ /*
33033+ * someone else might change some whiteouts while we were sleeping.
33034+ * it means this whlist may have an obsoleted entry.
33035+ */
33036+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
33037+ err = del_wh_children(wh_dentry, whlist, bindex, br);
33038+ else {
33039+ int wkq_err;
33040+ struct del_wh_children_args args = {
33041+ .errp = &err,
33042+ .h_dentry = wh_dentry,
1308ab2a 33043+ .whlist = whlist,
1facf9fc 33044+ .bindex = bindex,
33045+ .br = br
33046+ };
33047+
33048+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
33049+ if (unlikely(wkq_err))
33050+ err = wkq_err;
33051+ }
33052+ mutex_unlock(&wh_inode->i_mutex);
33053+
33054+ if (!err) {
33055+ h_tmp.dentry = wh_dentry;
86dc4139 33056+ h_tmp.mnt = au_br_mnt(br);
2000de60 33057+ h_nlink = h_dir->i_nlink;
1facf9fc 33058+ err = vfsub_rmdir(h_dir, &h_tmp);
2000de60
JR
33059+ /* some fs doesn't change the parent nlink in some cases */
33060+ h_nlink -= h_dir->i_nlink;
1facf9fc 33061+ }
33062+
33063+ if (!err) {
33064+ if (au_ibstart(dir) == bindex) {
7f207e10 33065+ /* todo: dir->i_mutex is necessary */
1facf9fc 33066+ au_cpup_attr_timesizes(dir);
2000de60
JR
33067+ if (h_nlink)
33068+ vfsub_drop_nlink(dir);
1facf9fc 33069+ }
33070+ return 0; /* success */
33071+ }
33072+
523b37e3 33073+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 33074+ return err;
33075+}
33076+
33077+static void call_rmdir_whtmp(void *args)
33078+{
33079+ int err;
e49829fe 33080+ aufs_bindex_t bindex;
1facf9fc 33081+ struct au_whtmp_rmdir *a = args;
33082+ struct super_block *sb;
33083+ struct dentry *h_parent;
33084+ struct inode *h_dir;
1facf9fc 33085+ struct au_hinode *hdir;
33086+
33087+ /* rmdir by nfsd may cause deadlock with this i_mutex */
33088+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 33089+ err = -EROFS;
1facf9fc 33090+ sb = a->dir->i_sb;
e49829fe
JR
33091+ si_read_lock(sb, !AuLock_FLUSH);
33092+ if (!au_br_writable(a->br->br_perm))
33093+ goto out;
33094+ bindex = au_br_index(sb, a->br->br_id);
33095+ if (unlikely(bindex < 0))
1facf9fc 33096+ goto out;
33097+
33098+ err = -EIO;
1facf9fc 33099+ ii_write_lock_parent(a->dir);
33100+ h_parent = dget_parent(a->wh_dentry);
5527c038 33101+ h_dir = d_inode(h_parent);
e49829fe 33102+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
33103+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
33104+ if (unlikely(err))
33105+ goto out_mnt;
4a4d8108 33106+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
33107+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
33108+ a->br);
86dc4139
AM
33109+ if (!err)
33110+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
4a4d8108 33111+ au_hn_imtx_unlock(hdir);
86dc4139
AM
33112+ vfsub_mnt_drop_write(au_br_mnt(a->br));
33113+
33114+out_mnt:
1facf9fc 33115+ dput(h_parent);
33116+ ii_write_unlock(a->dir);
4f0767ce 33117+out:
1facf9fc 33118+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 33119+ au_whtmp_rmdir_free(a);
027c5e7a
AM
33120+ si_read_unlock(sb);
33121+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33122+ if (unlikely(err))
33123+ AuIOErr("err %d\n", err);
33124+}
33125+
33126+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33127+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
33128+{
33129+ int wkq_err;
e49829fe 33130+ struct super_block *sb;
1facf9fc 33131+
33132+ IMustLock(dir);
33133+
33134+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 33135+ sb = dir->i_sb;
1facf9fc 33136+ args->dir = au_igrab(dir);
e49829fe
JR
33137+ args->br = au_sbr(sb, bindex);
33138+ atomic_inc(&args->br->br_count);
1facf9fc 33139+ args->wh_dentry = dget(wh_dentry);
53392da6 33140+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 33141+ if (unlikely(wkq_err)) {
523b37e3 33142+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 33143+ au_whtmp_rmdir_free(args);
33144+ }
33145+}
7f207e10
AM
33146diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
33147--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 33148+++ linux/fs/aufs/whout.h 2016-02-28 11:26:32.576637942 +0100
076b876e 33149@@ -0,0 +1,85 @@
1facf9fc 33150+/*
8cdd5066 33151+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33152+ *
33153+ * This program, aufs is free software; you can redistribute it and/or modify
33154+ * it under the terms of the GNU General Public License as published by
33155+ * the Free Software Foundation; either version 2 of the License, or
33156+ * (at your option) any later version.
dece6358
AM
33157+ *
33158+ * This program is distributed in the hope that it will be useful,
33159+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33160+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33161+ * GNU General Public License for more details.
33162+ *
33163+ * You should have received a copy of the GNU General Public License
523b37e3 33164+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33165+ */
33166+
33167+/*
33168+ * whiteout for logical deletion and opaque directory
33169+ */
33170+
33171+#ifndef __AUFS_WHOUT_H__
33172+#define __AUFS_WHOUT_H__
33173+
33174+#ifdef __KERNEL__
33175+
1facf9fc 33176+#include "dir.h"
33177+
33178+/* whout.c */
33179+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
33180+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33181+int au_diropq_test(struct dentry *h_dentry);
7e9cd9fe 33182+struct au_branch;
1facf9fc 33183+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33184+ struct qstr *prefix);
33185+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33186+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33187+ struct dentry *dentry);
86dc4139 33188+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 33189+
33190+/* diropq flags */
33191+#define AuDiropq_CREATE 1
33192+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
33193+#define au_fset_diropq(flags, name) \
33194+ do { (flags) |= AuDiropq_##name; } while (0)
33195+#define au_fclr_diropq(flags, name) \
33196+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 33197+
33198+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33199+ unsigned int flags);
33200+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33201+ struct au_branch *br);
33202+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33203+ struct dentry *h_parent);
33204+
33205+/* real rmdir for the whiteout-ed dir */
33206+struct au_whtmp_rmdir {
33207+ struct inode *dir;
e49829fe 33208+ struct au_branch *br;
1facf9fc 33209+ struct dentry *wh_dentry;
dece6358 33210+ struct au_nhash whlist;
1facf9fc 33211+};
33212+
33213+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33214+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33215+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33216+ struct dentry *wh_dentry, struct au_nhash *whlist);
33217+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33218+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33219+
33220+/* ---------------------------------------------------------------------- */
33221+
33222+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33223+ aufs_bindex_t bindex)
33224+{
33225+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33226+}
33227+
33228+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33229+{
33230+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33231+}
33232+
33233+#endif /* __KERNEL__ */
33234+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
33235diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33236--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 33237+++ linux/fs/aufs/wkq.c 2016-02-28 11:26:32.576637942 +0100
38d290e6 33238@@ -0,0 +1,213 @@
1facf9fc 33239+/*
8cdd5066 33240+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33241+ *
33242+ * This program, aufs is free software; you can redistribute it and/or modify
33243+ * it under the terms of the GNU General Public License as published by
33244+ * the Free Software Foundation; either version 2 of the License, or
33245+ * (at your option) any later version.
dece6358
AM
33246+ *
33247+ * This program is distributed in the hope that it will be useful,
33248+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33249+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33250+ * GNU General Public License for more details.
33251+ *
33252+ * You should have received a copy of the GNU General Public License
523b37e3 33253+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33254+ */
33255+
33256+/*
33257+ * workqueue for asynchronous/super-io operations
33258+ * todo: try new dredential scheme
33259+ */
33260+
dece6358 33261+#include <linux/module.h>
1facf9fc 33262+#include "aufs.h"
33263+
9dbd164d 33264+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 33265+
9dbd164d 33266+static struct workqueue_struct *au_wkq;
1facf9fc 33267+
33268+struct au_wkinfo {
33269+ struct work_struct wk;
7f207e10 33270+ struct kobject *kobj;
1facf9fc 33271+
33272+ unsigned int flags; /* see wkq.h */
33273+
33274+ au_wkq_func_t func;
33275+ void *args;
33276+
1facf9fc 33277+ struct completion *comp;
33278+};
33279+
33280+/* ---------------------------------------------------------------------- */
33281+
1facf9fc 33282+static void wkq_func(struct work_struct *wk)
33283+{
33284+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33285+
2dfbb274 33286+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
33287+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33288+
1facf9fc 33289+ wkinfo->func(wkinfo->args);
1facf9fc 33290+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33291+ complete(wkinfo->comp);
33292+ else {
7f207e10 33293+ kobject_put(wkinfo->kobj);
9dbd164d 33294+ module_put(THIS_MODULE); /* todo: ?? */
1facf9fc 33295+ kfree(wkinfo);
33296+ }
33297+}
33298+
33299+/*
33300+ * Since struct completion is large, try allocating it dynamically.
33301+ */
c2b27bf2 33302+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 33303+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33304+
33305+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33306+{
33307+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33308+ if (*comp) {
33309+ init_completion(*comp);
33310+ wkinfo->comp = *comp;
33311+ return 0;
33312+ }
33313+ return -ENOMEM;
33314+}
33315+
33316+static void au_wkq_comp_free(struct completion *comp)
33317+{
33318+ kfree(comp);
33319+}
33320+
33321+#else
33322+
33323+/* no braces */
33324+#define AuWkqCompDeclare(name) \
33325+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33326+ struct completion *comp = &_ ## name
33327+
33328+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33329+{
33330+ wkinfo->comp = *comp;
33331+ return 0;
33332+}
33333+
33334+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33335+{
33336+ /* empty */
33337+}
33338+#endif /* 4KSTACKS */
33339+
53392da6 33340+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 33341+{
53392da6
AM
33342+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33343+ if (au_wkq_test()) {
38d290e6
JR
33344+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33345+ " due to a dead dir by UDBA?\n");
53392da6
AM
33346+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33347+ }
33348+ } else
33349+ au_dbg_verify_kthread();
33350+
33351+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 33352+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 33353+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
33354+ } else {
33355+ INIT_WORK(&wkinfo->wk, wkq_func);
33356+ schedule_work(&wkinfo->wk);
33357+ }
1facf9fc 33358+}
33359+
7f207e10
AM
33360+/*
33361+ * Be careful. It is easy to make deadlock happen.
33362+ * processA: lock, wkq and wait
33363+ * processB: wkq and wait, lock in wkq
33364+ * --> deadlock
33365+ */
b752ccd1 33366+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 33367+{
33368+ int err;
33369+ AuWkqCompDeclare(comp);
33370+ struct au_wkinfo wkinfo = {
b752ccd1 33371+ .flags = flags,
1facf9fc 33372+ .func = func,
33373+ .args = args
33374+ };
33375+
33376+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33377+ if (!err) {
53392da6 33378+ au_wkq_run(&wkinfo);
1facf9fc 33379+ /* no timeout, no interrupt */
33380+ wait_for_completion(wkinfo.comp);
33381+ au_wkq_comp_free(comp);
4a4d8108 33382+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 33383+ }
33384+
33385+ return err;
33386+
33387+}
33388+
027c5e7a
AM
33389+/*
33390+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33391+ * problem in a concurrent umounting.
33392+ */
53392da6
AM
33393+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33394+ unsigned int flags)
1facf9fc 33395+{
33396+ int err;
33397+ struct au_wkinfo *wkinfo;
33398+
33399+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
33400+
33401+ /*
33402+ * wkq_func() must free this wkinfo.
33403+ * it highly depends upon the implementation of workqueue.
33404+ */
33405+ err = 0;
33406+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33407+ if (wkinfo) {
7f207e10 33408+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 33409+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 33410+ wkinfo->func = func;
33411+ wkinfo->args = args;
33412+ wkinfo->comp = NULL;
7f207e10 33413+ kobject_get(wkinfo->kobj);
9dbd164d 33414+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 33415+
53392da6 33416+ au_wkq_run(wkinfo);
1facf9fc 33417+ } else {
33418+ err = -ENOMEM;
e49829fe 33419+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33420+ }
33421+
33422+ return err;
33423+}
33424+
33425+/* ---------------------------------------------------------------------- */
33426+
33427+void au_nwt_init(struct au_nowait_tasks *nwt)
33428+{
33429+ atomic_set(&nwt->nw_len, 0);
4a4d8108 33430+ /* smp_mb(); */ /* atomic_set */
1facf9fc 33431+ init_waitqueue_head(&nwt->nw_wq);
33432+}
33433+
33434+void au_wkq_fin(void)
33435+{
9dbd164d 33436+ destroy_workqueue(au_wkq);
1facf9fc 33437+}
33438+
33439+int __init au_wkq_init(void)
33440+{
9dbd164d 33441+ int err;
b752ccd1
AM
33442+
33443+ err = 0;
86dc4139 33444+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
33445+ if (IS_ERR(au_wkq))
33446+ err = PTR_ERR(au_wkq);
33447+ else if (!au_wkq)
33448+ err = -ENOMEM;
b752ccd1
AM
33449+
33450+ return err;
1facf9fc 33451+}
7f207e10
AM
33452diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
33453--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 33454+++ linux/fs/aufs/wkq.h 2016-02-28 11:26:32.576637942 +0100
523b37e3 33455@@ -0,0 +1,91 @@
1facf9fc 33456+/*
8cdd5066 33457+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33458+ *
33459+ * This program, aufs is free software; you can redistribute it and/or modify
33460+ * it under the terms of the GNU General Public License as published by
33461+ * the Free Software Foundation; either version 2 of the License, or
33462+ * (at your option) any later version.
dece6358
AM
33463+ *
33464+ * This program is distributed in the hope that it will be useful,
33465+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33466+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33467+ * GNU General Public License for more details.
33468+ *
33469+ * You should have received a copy of the GNU General Public License
523b37e3 33470+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33471+ */
33472+
33473+/*
33474+ * workqueue for asynchronous/super-io operations
33475+ * todo: try new credentials management scheme
33476+ */
33477+
33478+#ifndef __AUFS_WKQ_H__
33479+#define __AUFS_WKQ_H__
33480+
33481+#ifdef __KERNEL__
33482+
dece6358
AM
33483+struct super_block;
33484+
1facf9fc 33485+/* ---------------------------------------------------------------------- */
33486+
33487+/*
33488+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
33489+ */
33490+struct au_nowait_tasks {
33491+ atomic_t nw_len;
33492+ wait_queue_head_t nw_wq;
33493+};
33494+
33495+/* ---------------------------------------------------------------------- */
33496+
33497+typedef void (*au_wkq_func_t)(void *args);
33498+
33499+/* wkq flags */
33500+#define AuWkq_WAIT 1
9dbd164d 33501+#define AuWkq_NEST (1 << 1)
1facf9fc 33502+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
33503+#define au_fset_wkq(flags, name) \
33504+ do { (flags) |= AuWkq_##name; } while (0)
33505+#define au_fclr_wkq(flags, name) \
33506+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 33507+
9dbd164d
AM
33508+#ifndef CONFIG_AUFS_HNOTIFY
33509+#undef AuWkq_NEST
33510+#define AuWkq_NEST 0
33511+#endif
33512+
1facf9fc 33513+/* wkq.c */
b752ccd1 33514+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
33515+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33516+ unsigned int flags);
1facf9fc 33517+void au_nwt_init(struct au_nowait_tasks *nwt);
33518+int __init au_wkq_init(void);
33519+void au_wkq_fin(void);
33520+
33521+/* ---------------------------------------------------------------------- */
33522+
53392da6
AM
33523+static inline int au_wkq_test(void)
33524+{
33525+ return current->flags & PF_WQ_WORKER;
33526+}
33527+
b752ccd1 33528+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 33529+{
b752ccd1 33530+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 33531+}
33532+
33533+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
33534+{
e49829fe 33535+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 33536+ wake_up_all(&nwt->nw_wq);
33537+}
33538+
33539+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
33540+{
33541+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
33542+ return 0;
33543+}
33544+
33545+#endif /* __KERNEL__ */
33546+#endif /* __AUFS_WKQ_H__ */
c1595e42
JR
33547diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
33548--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 33549+++ linux/fs/aufs/xattr.c 2016-02-28 11:26:32.576637942 +0100
b912730e 33550@@ -0,0 +1,344 @@
c1595e42 33551+/*
8cdd5066 33552+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
33553+ *
33554+ * This program, aufs is free software; you can redistribute it and/or modify
33555+ * it under the terms of the GNU General Public License as published by
33556+ * the Free Software Foundation; either version 2 of the License, or
33557+ * (at your option) any later version.
33558+ *
33559+ * This program is distributed in the hope that it will be useful,
33560+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33561+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33562+ * GNU General Public License for more details.
33563+ *
33564+ * You should have received a copy of the GNU General Public License
33565+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33566+ */
33567+
33568+/*
33569+ * handling xattr functions
33570+ */
33571+
33572+#include <linux/xattr.h>
33573+#include "aufs.h"
33574+
33575+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
33576+{
33577+ if (!ignore_flags)
33578+ goto out;
33579+ switch (err) {
33580+ case -ENOMEM:
33581+ case -EDQUOT:
33582+ goto out;
33583+ }
33584+
33585+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
33586+ err = 0;
33587+ goto out;
33588+ }
33589+
33590+#define cmp(brattr, prefix) do { \
33591+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
33592+ XATTR_##prefix##_PREFIX_LEN)) { \
33593+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
33594+ err = 0; \
33595+ goto out; \
33596+ } \
33597+ } while (0)
33598+
33599+ cmp(SEC, SECURITY);
33600+ cmp(SYS, SYSTEM);
33601+ cmp(TR, TRUSTED);
33602+ cmp(USR, USER);
33603+#undef cmp
33604+
33605+ if (ignore_flags & AuBrAttr_ICEX_OTH)
33606+ err = 0;
33607+
33608+out:
33609+ return err;
33610+}
33611+
33612+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
33613+
33614+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7e9cd9fe
AM
33615+ char *name, char **buf, unsigned int ignore_flags,
33616+ unsigned int verbose)
c1595e42
JR
33617+{
33618+ int err;
33619+ ssize_t ssz;
33620+ struct inode *h_idst;
33621+
33622+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
33623+ err = ssz;
33624+ if (unlikely(err <= 0)) {
c1595e42
JR
33625+ if (err == -ENODATA
33626+ || (err == -EOPNOTSUPP
b912730e 33627+ && ((ignore_flags & au_xattr_out_of_list)
5527c038 33628+ || (au_test_nfs_noacl(d_inode(h_src))
b912730e
AM
33629+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
33630+ || !strcmp(name,
33631+ XATTR_NAME_POSIX_ACL_DEFAULT))))
33632+ ))
c1595e42 33633+ err = 0;
b912730e
AM
33634+ if (err && (verbose || au_debug_test()))
33635+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33636+ goto out;
33637+ }
33638+
33639+ /* unlock it temporary */
5527c038 33640+ h_idst = d_inode(h_dst);
c1595e42
JR
33641+ mutex_unlock(&h_idst->i_mutex);
33642+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
33643+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33644+ if (unlikely(err)) {
7e9cd9fe
AM
33645+ if (verbose || au_debug_test())
33646+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33647+ err = au_xattr_ignore(err, name, ignore_flags);
33648+ }
33649+
33650+out:
33651+ return err;
33652+}
33653+
7e9cd9fe
AM
33654+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
33655+ unsigned int verbose)
c1595e42
JR
33656+{
33657+ int err, unlocked, acl_access, acl_default;
33658+ ssize_t ssz;
33659+ struct inode *h_isrc, *h_idst;
33660+ char *value, *p, *o, *e;
33661+
33662+ /* try stopping to update the source inode while we are referencing */
7e9cd9fe 33663+ /* there should not be the parent-child relationship between them */
5527c038
JR
33664+ h_isrc = d_inode(h_src);
33665+ h_idst = d_inode(h_dst);
c1595e42
JR
33666+ mutex_unlock(&h_idst->i_mutex);
33667+ mutex_lock_nested(&h_isrc->i_mutex, AuLsc_I_CHILD);
33668+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33669+ unlocked = 0;
33670+
33671+ /* some filesystems don't list POSIX ACL, for example tmpfs */
33672+ ssz = vfs_listxattr(h_src, NULL, 0);
33673+ err = ssz;
33674+ if (unlikely(err < 0)) {
33675+ AuTraceErr(err);
33676+ if (err == -ENODATA
33677+ || err == -EOPNOTSUPP)
33678+ err = 0; /* ignore */
33679+ goto out;
33680+ }
33681+
33682+ err = 0;
33683+ p = NULL;
33684+ o = NULL;
33685+ if (ssz) {
33686+ err = -ENOMEM;
33687+ p = kmalloc(ssz, GFP_NOFS);
33688+ o = p;
33689+ if (unlikely(!p))
33690+ goto out;
33691+ err = vfs_listxattr(h_src, p, ssz);
33692+ }
33693+ mutex_unlock(&h_isrc->i_mutex);
33694+ unlocked = 1;
33695+ AuDbg("err %d, ssz %zd\n", err, ssz);
33696+ if (unlikely(err < 0))
33697+ goto out_free;
33698+
33699+ err = 0;
33700+ e = p + ssz;
33701+ value = NULL;
33702+ acl_access = 0;
33703+ acl_default = 0;
33704+ while (!err && p < e) {
33705+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
33706+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
33707+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
33708+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
33709+ - 1);
7e9cd9fe
AM
33710+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
33711+ verbose);
c1595e42
JR
33712+ p += strlen(p) + 1;
33713+ }
33714+ AuTraceErr(err);
33715+ ignore_flags |= au_xattr_out_of_list;
33716+ if (!err && !acl_access) {
33717+ err = au_do_cpup_xattr(h_dst, h_src,
33718+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7e9cd9fe 33719+ ignore_flags, verbose);
c1595e42
JR
33720+ AuTraceErr(err);
33721+ }
33722+ if (!err && !acl_default) {
33723+ err = au_do_cpup_xattr(h_dst, h_src,
33724+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7e9cd9fe 33725+ ignore_flags, verbose);
c1595e42
JR
33726+ AuTraceErr(err);
33727+ }
33728+
33729+ kfree(value);
33730+
33731+out_free:
33732+ kfree(o);
33733+out:
33734+ if (!unlocked)
33735+ mutex_unlock(&h_isrc->i_mutex);
33736+ AuTraceErr(err);
33737+ return err;
33738+}
33739+
33740+/* ---------------------------------------------------------------------- */
33741+
33742+enum {
33743+ AU_XATTR_LIST,
33744+ AU_XATTR_GET
33745+};
33746+
33747+struct au_lgxattr {
33748+ int type;
33749+ union {
33750+ struct {
33751+ char *list;
33752+ size_t size;
33753+ } list;
33754+ struct {
33755+ const char *name;
33756+ void *value;
33757+ size_t size;
33758+ } get;
33759+ } u;
33760+};
33761+
33762+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
33763+{
33764+ ssize_t err;
33765+ struct path h_path;
33766+ struct super_block *sb;
33767+
33768+ sb = dentry->d_sb;
33769+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
33770+ if (unlikely(err))
33771+ goto out;
33772+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
33773+ if (unlikely(err))
33774+ goto out_si;
33775+ if (unlikely(!h_path.dentry))
33776+ /* illegally overlapped or something */
33777+ goto out_di; /* pretending success */
33778+
33779+ /* always topmost entry only */
33780+ switch (arg->type) {
33781+ case AU_XATTR_LIST:
33782+ err = vfs_listxattr(h_path.dentry,
33783+ arg->u.list.list, arg->u.list.size);
33784+ break;
33785+ case AU_XATTR_GET:
33786+ err = vfs_getxattr(h_path.dentry,
33787+ arg->u.get.name, arg->u.get.value,
33788+ arg->u.get.size);
33789+ break;
33790+ }
33791+
33792+out_di:
33793+ di_read_unlock(dentry, AuLock_IR);
33794+out_si:
33795+ si_read_unlock(sb);
33796+out:
33797+ AuTraceErr(err);
33798+ return err;
33799+}
33800+
33801+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
33802+{
33803+ struct au_lgxattr arg = {
33804+ .type = AU_XATTR_LIST,
33805+ .u.list = {
33806+ .list = list,
33807+ .size = size
33808+ },
33809+ };
33810+
33811+ return au_lgxattr(dentry, &arg);
33812+}
33813+
33814+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
33815+ size_t size)
33816+{
33817+ struct au_lgxattr arg = {
33818+ .type = AU_XATTR_GET,
33819+ .u.get = {
33820+ .name = name,
33821+ .value = value,
33822+ .size = size
33823+ },
33824+ };
33825+
33826+ return au_lgxattr(dentry, &arg);
33827+}
33828+
33829+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
33830+ size_t size, int flags)
33831+{
33832+ struct au_srxattr arg = {
33833+ .type = AU_XATTR_SET,
33834+ .u.set = {
33835+ .name = name,
33836+ .value = value,
33837+ .size = size,
33838+ .flags = flags
33839+ },
33840+ };
33841+
33842+ return au_srxattr(dentry, &arg);
33843+}
33844+
33845+int aufs_removexattr(struct dentry *dentry, const char *name)
33846+{
33847+ struct au_srxattr arg = {
33848+ .type = AU_XATTR_REMOVE,
33849+ .u.remove = {
33850+ .name = name
33851+ },
33852+ };
33853+
33854+ return au_srxattr(dentry, &arg);
33855+}
33856+
33857+/* ---------------------------------------------------------------------- */
33858+
33859+#if 0
33860+static size_t au_xattr_list(struct dentry *dentry, char *list, size_t list_size,
33861+ const char *name, size_t name_len, int type)
33862+{
33863+ return aufs_listxattr(dentry, list, list_size);
33864+}
33865+
33866+static int au_xattr_get(struct dentry *dentry, const char *name, void *buffer,
33867+ size_t size, int type)
33868+{
33869+ return aufs_getxattr(dentry, name, buffer, size);
33870+}
33871+
33872+static int au_xattr_set(struct dentry *dentry, const char *name,
33873+ const void *value, size_t size, int flags, int type)
33874+{
33875+ return aufs_setxattr(dentry, name, value, size, flags);
33876+}
33877+
33878+static const struct xattr_handler au_xattr_handler = {
33879+ /* no prefix, no flags */
33880+ .list = au_xattr_list,
33881+ .get = au_xattr_get,
33882+ .set = au_xattr_set
33883+ /* why no remove? */
33884+};
33885+
33886+static const struct xattr_handler *au_xattr_handlers[] = {
33887+ &au_xattr_handler
33888+};
33889+
33890+void au_xattr_init(struct super_block *sb)
33891+{
33892+ /* sb->s_xattr = au_xattr_handlers; */
33893+}
33894+#endif
7f207e10
AM
33895diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
33896--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
8cdd5066 33897+++ linux/fs/aufs/xino.c 2016-02-28 11:26:32.576637942 +0100
be52b249 33898@@ -0,0 +1,1318 @@
1facf9fc 33899+/*
8cdd5066 33900+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33901+ *
33902+ * This program, aufs is free software; you can redistribute it and/or modify
33903+ * it under the terms of the GNU General Public License as published by
33904+ * the Free Software Foundation; either version 2 of the License, or
33905+ * (at your option) any later version.
dece6358
AM
33906+ *
33907+ * This program is distributed in the hope that it will be useful,
33908+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33909+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33910+ * GNU General Public License for more details.
33911+ *
33912+ * You should have received a copy of the GNU General Public License
523b37e3 33913+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33914+ */
33915+
33916+/*
33917+ * external inode number translation table and bitmap
33918+ */
33919+
33920+#include <linux/seq_file.h>
392086de 33921+#include <linux/statfs.h>
1facf9fc 33922+#include "aufs.h"
33923+
9dbd164d 33924+/* todo: unnecessary to support mmap_sem since kernel-space? */
5527c038 33925+ssize_t xino_fread(vfs_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 33926+ loff_t *pos)
33927+{
33928+ ssize_t err;
33929+ mm_segment_t oldfs;
b752ccd1
AM
33930+ union {
33931+ void *k;
33932+ char __user *u;
33933+ } buf;
1facf9fc 33934+
b752ccd1 33935+ buf.k = kbuf;
1facf9fc 33936+ oldfs = get_fs();
33937+ set_fs(KERNEL_DS);
33938+ do {
33939+ /* todo: signal_pending? */
b752ccd1 33940+ err = func(file, buf.u, size, pos);
1facf9fc 33941+ } while (err == -EAGAIN || err == -EINTR);
33942+ set_fs(oldfs);
33943+
33944+#if 0 /* reserved for future use */
33945+ if (err > 0)
2000de60 33946+ fsnotify_access(file->f_path.dentry);
1facf9fc 33947+#endif
33948+
33949+ return err;
33950+}
33951+
33952+/* ---------------------------------------------------------------------- */
33953+
be52b249
AM
33954+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
33955+ size_t size, loff_t *pos);
33956+
5527c038 33957+static ssize_t do_xino_fwrite(vfs_writef_t func, struct file *file, void *kbuf,
1facf9fc 33958+ size_t size, loff_t *pos)
33959+{
33960+ ssize_t err;
33961+ mm_segment_t oldfs;
b752ccd1
AM
33962+ union {
33963+ void *k;
33964+ const char __user *u;
33965+ } buf;
be52b249
AM
33966+ int i;
33967+ const int prevent_endless = 10;
1facf9fc 33968+
be52b249 33969+ i = 0;
b752ccd1 33970+ buf.k = kbuf;
1facf9fc 33971+ oldfs = get_fs();
33972+ set_fs(KERNEL_DS);
1facf9fc 33973+ do {
b752ccd1 33974+ err = func(file, buf.u, size, pos);
be52b249
AM
33975+ if (err == -EINTR
33976+ && !au_wkq_test()
33977+ && fatal_signal_pending(current)) {
33978+ set_fs(oldfs);
33979+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
33980+ BUG_ON(err == -EINTR);
33981+ oldfs = get_fs();
33982+ set_fs(KERNEL_DS);
33983+ }
33984+ } while (i++ < prevent_endless
33985+ && (err == -EAGAIN || err == -EINTR));
1facf9fc 33986+ set_fs(oldfs);
33987+
33988+#if 0 /* reserved for future use */
33989+ if (err > 0)
2000de60 33990+ fsnotify_modify(file->f_path.dentry);
1facf9fc 33991+#endif
33992+
33993+ return err;
33994+}
33995+
33996+struct do_xino_fwrite_args {
33997+ ssize_t *errp;
5527c038 33998+ vfs_writef_t func;
1facf9fc 33999+ struct file *file;
34000+ void *buf;
34001+ size_t size;
34002+ loff_t *pos;
34003+};
34004+
34005+static void call_do_xino_fwrite(void *args)
34006+{
34007+ struct do_xino_fwrite_args *a = args;
34008+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
34009+}
34010+
be52b249
AM
34011+static ssize_t xino_fwrite_wkq(vfs_writef_t func, struct file *file, void *buf,
34012+ size_t size, loff_t *pos)
34013+{
34014+ ssize_t err;
34015+ int wkq_err;
34016+ struct do_xino_fwrite_args args = {
34017+ .errp = &err,
34018+ .func = func,
34019+ .file = file,
34020+ .buf = buf,
34021+ .size = size,
34022+ .pos = pos
34023+ };
34024+
34025+ /*
34026+ * it breaks RLIMIT_FSIZE and normal user's limit,
34027+ * users should care about quota and real 'filesystem full.'
34028+ */
34029+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
34030+ if (unlikely(wkq_err))
34031+ err = wkq_err;
34032+
34033+ return err;
34034+}
34035+
5527c038
JR
34036+ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
34037+ size_t size, loff_t *pos)
1facf9fc 34038+{
34039+ ssize_t err;
34040+
b752ccd1
AM
34041+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
34042+ lockdep_off();
34043+ err = do_xino_fwrite(func, file, buf, size, pos);
34044+ lockdep_on();
be52b249
AM
34045+ } else
34046+ err = xino_fwrite_wkq(func, file, buf, size, pos);
1facf9fc 34047+
34048+ return err;
34049+}
34050+
34051+/* ---------------------------------------------------------------------- */
34052+
34053+/*
34054+ * create a new xinofile at the same place/path as @base_file.
34055+ */
34056+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
34057+{
34058+ struct file *file;
4a4d8108 34059+ struct dentry *base, *parent;
523b37e3 34060+ struct inode *dir, *delegated;
1facf9fc 34061+ struct qstr *name;
1308ab2a 34062+ struct path path;
4a4d8108 34063+ int err;
1facf9fc 34064+
2000de60 34065+ base = base_file->f_path.dentry;
1facf9fc 34066+ parent = base->d_parent; /* dir inode is locked */
5527c038 34067+ dir = d_inode(parent);
1facf9fc 34068+ IMustLock(dir);
34069+
34070+ file = ERR_PTR(-EINVAL);
34071+ name = &base->d_name;
4a4d8108
AM
34072+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
34073+ if (IS_ERR(path.dentry)) {
34074+ file = (void *)path.dentry;
523b37e3
AM
34075+ pr_err("%pd lookup err %ld\n",
34076+ base, PTR_ERR(path.dentry));
1facf9fc 34077+ goto out;
34078+ }
34079+
34080+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 34081+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 34082+ if (unlikely(err)) {
34083+ file = ERR_PTR(err);
523b37e3 34084+ pr_err("%pd create err %d\n", base, err);
1facf9fc 34085+ goto out_dput;
34086+ }
34087+
c06a8ce3 34088+ path.mnt = base_file->f_path.mnt;
4a4d8108 34089+ file = vfsub_dentry_open(&path,
7f207e10 34090+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34091+ /* | __FMODE_NONOTIFY */);
1facf9fc 34092+ if (IS_ERR(file)) {
523b37e3 34093+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 34094+ goto out_dput;
34095+ }
34096+
523b37e3
AM
34097+ delegated = NULL;
34098+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
34099+ if (unlikely(err == -EWOULDBLOCK)) {
34100+ pr_warn("cannot retry for NFSv4 delegation"
34101+ " for an internal unlink\n");
34102+ iput(delegated);
34103+ }
1facf9fc 34104+ if (unlikely(err)) {
523b37e3 34105+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 34106+ goto out_fput;
34107+ }
34108+
34109+ if (copy_src) {
34110+ /* no one can touch copy_src xino */
c06a8ce3 34111+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 34112+ if (unlikely(err)) {
523b37e3 34113+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 34114+ goto out_fput;
34115+ }
34116+ }
34117+ goto out_dput; /* success */
34118+
4f0767ce 34119+out_fput:
1facf9fc 34120+ fput(file);
34121+ file = ERR_PTR(err);
4f0767ce 34122+out_dput:
4a4d8108 34123+ dput(path.dentry);
4f0767ce 34124+out:
1facf9fc 34125+ return file;
34126+}
34127+
34128+struct au_xino_lock_dir {
34129+ struct au_hinode *hdir;
34130+ struct dentry *parent;
34131+ struct mutex *mtx;
34132+};
34133+
34134+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
34135+ struct au_xino_lock_dir *ldir)
34136+{
34137+ aufs_bindex_t brid, bindex;
34138+
34139+ ldir->hdir = NULL;
34140+ bindex = -1;
34141+ brid = au_xino_brid(sb);
34142+ if (brid >= 0)
34143+ bindex = au_br_index(sb, brid);
34144+ if (bindex >= 0) {
5527c038 34145+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
4a4d8108 34146+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 34147+ } else {
2000de60 34148+ ldir->parent = dget_parent(xino->f_path.dentry);
5527c038 34149+ ldir->mtx = &d_inode(ldir->parent)->i_mutex;
1facf9fc 34150+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
34151+ }
34152+}
34153+
34154+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
34155+{
34156+ if (ldir->hdir)
4a4d8108 34157+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 34158+ else {
34159+ mutex_unlock(ldir->mtx);
34160+ dput(ldir->parent);
34161+ }
34162+}
34163+
34164+/* ---------------------------------------------------------------------- */
34165+
34166+/* trucate xino files asynchronously */
34167+
34168+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
34169+{
34170+ int err;
392086de
AM
34171+ unsigned long jiffy;
34172+ blkcnt_t blocks;
1facf9fc 34173+ aufs_bindex_t bi, bend;
392086de 34174+ struct kstatfs *st;
1facf9fc 34175+ struct au_branch *br;
34176+ struct file *new_xino, *file;
34177+ struct super_block *h_sb;
34178+ struct au_xino_lock_dir ldir;
34179+
392086de 34180+ err = -ENOMEM;
be52b249 34181+ st = kmalloc(sizeof(*st), GFP_NOFS);
392086de
AM
34182+ if (unlikely(!st))
34183+ goto out;
34184+
1facf9fc 34185+ err = -EINVAL;
34186+ bend = au_sbend(sb);
34187+ if (unlikely(bindex < 0 || bend < bindex))
392086de 34188+ goto out_st;
1facf9fc 34189+ br = au_sbr(sb, bindex);
34190+ file = br->br_xino.xi_file;
34191+ if (!file)
392086de
AM
34192+ goto out_st;
34193+
34194+ err = vfs_statfs(&file->f_path, st);
34195+ if (unlikely(err))
34196+ AuErr1("statfs err %d, ignored\n", err);
34197+ jiffy = jiffies;
34198+ blocks = file_inode(file)->i_blocks;
34199+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34200+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 34201+
34202+ au_xino_lock_dir(sb, file, &ldir);
34203+ /* mnt_want_write() is unnecessary here */
34204+ new_xino = au_xino_create2(file, file);
34205+ au_xino_unlock_dir(&ldir);
34206+ err = PTR_ERR(new_xino);
392086de
AM
34207+ if (IS_ERR(new_xino)) {
34208+ pr_err("err %d, ignored\n", err);
34209+ goto out_st;
34210+ }
1facf9fc 34211+ err = 0;
34212+ fput(file);
34213+ br->br_xino.xi_file = new_xino;
34214+
86dc4139 34215+ h_sb = au_br_sb(br);
1facf9fc 34216+ for (bi = 0; bi <= bend; bi++) {
34217+ if (unlikely(bi == bindex))
34218+ continue;
34219+ br = au_sbr(sb, bi);
86dc4139 34220+ if (au_br_sb(br) != h_sb)
1facf9fc 34221+ continue;
34222+
34223+ fput(br->br_xino.xi_file);
34224+ br->br_xino.xi_file = new_xino;
34225+ get_file(new_xino);
34226+ }
34227+
392086de
AM
34228+ err = vfs_statfs(&new_xino->f_path, st);
34229+ if (!err) {
34230+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34231+ bindex, (u64)file_inode(new_xino)->i_blocks,
34232+ st->f_bfree, st->f_blocks);
34233+ if (file_inode(new_xino)->i_blocks < blocks)
34234+ au_sbi(sb)->si_xino_jiffy = jiffy;
34235+ } else
34236+ AuErr1("statfs err %d, ignored\n", err);
34237+
34238+out_st:
34239+ kfree(st);
4f0767ce 34240+out:
1facf9fc 34241+ return err;
34242+}
34243+
34244+struct xino_do_trunc_args {
34245+ struct super_block *sb;
34246+ struct au_branch *br;
34247+};
34248+
34249+static void xino_do_trunc(void *_args)
34250+{
34251+ struct xino_do_trunc_args *args = _args;
34252+ struct super_block *sb;
34253+ struct au_branch *br;
34254+ struct inode *dir;
34255+ int err;
34256+ aufs_bindex_t bindex;
34257+
34258+ err = 0;
34259+ sb = args->sb;
5527c038 34260+ dir = d_inode(sb->s_root);
1facf9fc 34261+ br = args->br;
34262+
34263+ si_noflush_write_lock(sb);
34264+ ii_read_lock_parent(dir);
34265+ bindex = au_br_index(sb, br->br_id);
34266+ err = au_xino_trunc(sb, bindex);
1facf9fc 34267+ ii_read_unlock(dir);
34268+ if (unlikely(err))
392086de 34269+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 34270+ atomic_dec(&br->br_xino_running);
34271+ atomic_dec(&br->br_count);
1facf9fc 34272+ si_write_unlock(sb);
027c5e7a 34273+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 34274+ kfree(args);
34275+}
34276+
392086de
AM
34277+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34278+{
34279+ int err;
34280+ struct kstatfs st;
34281+ struct au_sbinfo *sbinfo;
34282+
34283+ /* todo: si_xino_expire and the ratio should be customizable */
34284+ sbinfo = au_sbi(sb);
34285+ if (time_before(jiffies,
34286+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34287+ return 0;
34288+
34289+ /* truncation border */
34290+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34291+ if (unlikely(err)) {
34292+ AuErr1("statfs err %d, ignored\n", err);
34293+ return 0;
34294+ }
34295+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34296+ return 0;
34297+
34298+ return 1;
34299+}
34300+
1facf9fc 34301+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34302+{
34303+ struct xino_do_trunc_args *args;
34304+ int wkq_err;
34305+
392086de 34306+ if (!xino_trunc_test(sb, br))
1facf9fc 34307+ return;
34308+
34309+ if (atomic_inc_return(&br->br_xino_running) > 1)
34310+ goto out;
34311+
34312+ /* lock and kfree() will be called in trunc_xino() */
34313+ args = kmalloc(sizeof(*args), GFP_NOFS);
34314+ if (unlikely(!args)) {
34315+ AuErr1("no memory\n");
34316+ goto out_args;
34317+ }
34318+
e49829fe 34319+ atomic_inc(&br->br_count);
1facf9fc 34320+ args->sb = sb;
34321+ args->br = br;
53392da6 34322+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 34323+ if (!wkq_err)
34324+ return; /* success */
34325+
4a4d8108 34326+ pr_err("wkq %d\n", wkq_err);
e49829fe 34327+ atomic_dec(&br->br_count);
1facf9fc 34328+
4f0767ce 34329+out_args:
1facf9fc 34330+ kfree(args);
4f0767ce 34331+out:
e49829fe 34332+ atomic_dec(&br->br_xino_running);
1facf9fc 34333+}
34334+
34335+/* ---------------------------------------------------------------------- */
34336+
5527c038 34337+static int au_xino_do_write(vfs_writef_t write, struct file *file,
1facf9fc 34338+ ino_t h_ino, ino_t ino)
34339+{
34340+ loff_t pos;
34341+ ssize_t sz;
34342+
34343+ pos = h_ino;
34344+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34345+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34346+ return -EFBIG;
34347+ }
34348+ pos *= sizeof(ino);
34349+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34350+ if (sz == sizeof(ino))
34351+ return 0; /* success */
34352+
34353+ AuIOErr("write failed (%zd)\n", sz);
34354+ return -EIO;
34355+}
34356+
34357+/*
34358+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34359+ * at the position of @h_ino.
34360+ * even if @ino is zero, it is written to the xinofile and means no entry.
34361+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34362+ * try truncating it.
34363+ */
34364+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34365+ ino_t ino)
34366+{
34367+ int err;
34368+ unsigned int mnt_flags;
34369+ struct au_branch *br;
34370+
34371+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34372+ || ((loff_t)-1) > 0);
dece6358 34373+ SiMustAnyLock(sb);
1facf9fc 34374+
34375+ mnt_flags = au_mntflags(sb);
34376+ if (!au_opt_test(mnt_flags, XINO))
34377+ return 0;
34378+
34379+ br = au_sbr(sb, bindex);
34380+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34381+ h_ino, ino);
34382+ if (!err) {
34383+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 34384+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 34385+ xino_try_trunc(sb, br);
34386+ return 0; /* success */
34387+ }
34388+
34389+ AuIOErr("write failed (%d)\n", err);
34390+ return -EIO;
34391+}
34392+
34393+/* ---------------------------------------------------------------------- */
34394+
34395+/* aufs inode number bitmap */
34396+
34397+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34398+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34399+{
34400+ ino_t ino;
34401+
34402+ AuDebugOn(bit < 0 || page_bits <= bit);
34403+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34404+ return ino;
34405+}
34406+
34407+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34408+{
34409+ AuDebugOn(ino < AUFS_FIRST_INO);
34410+ ino -= AUFS_FIRST_INO;
34411+ *pindex = ino / page_bits;
34412+ *bit = ino % page_bits;
34413+}
34414+
34415+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34416+{
34417+ int err;
34418+ loff_t pos;
34419+ ssize_t sz;
34420+ struct au_sbinfo *sbinfo;
34421+ struct file *xib;
34422+ unsigned long *p;
34423+
34424+ sbinfo = au_sbi(sb);
34425+ MtxMustLock(&sbinfo->si_xib_mtx);
34426+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34427+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34428+
34429+ if (pindex == sbinfo->si_xib_last_pindex)
34430+ return 0;
34431+
34432+ xib = sbinfo->si_xib;
34433+ p = sbinfo->si_xib_buf;
34434+ pos = sbinfo->si_xib_last_pindex;
34435+ pos *= PAGE_SIZE;
34436+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34437+ if (unlikely(sz != PAGE_SIZE))
34438+ goto out;
34439+
34440+ pos = pindex;
34441+ pos *= PAGE_SIZE;
c06a8ce3 34442+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 34443+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
34444+ else {
34445+ memset(p, 0, PAGE_SIZE);
34446+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34447+ }
34448+ if (sz == PAGE_SIZE) {
34449+ sbinfo->si_xib_last_pindex = pindex;
34450+ return 0; /* success */
34451+ }
34452+
4f0767ce 34453+out:
b752ccd1
AM
34454+ AuIOErr1("write failed (%zd)\n", sz);
34455+ err = sz;
34456+ if (sz >= 0)
34457+ err = -EIO;
34458+ return err;
34459+}
34460+
34461+/* ---------------------------------------------------------------------- */
34462+
34463+static void au_xib_clear_bit(struct inode *inode)
34464+{
34465+ int err, bit;
34466+ unsigned long pindex;
34467+ struct super_block *sb;
34468+ struct au_sbinfo *sbinfo;
34469+
34470+ AuDebugOn(inode->i_nlink);
34471+
34472+ sb = inode->i_sb;
34473+ xib_calc_bit(inode->i_ino, &pindex, &bit);
34474+ AuDebugOn(page_bits <= bit);
34475+ sbinfo = au_sbi(sb);
34476+ mutex_lock(&sbinfo->si_xib_mtx);
34477+ err = xib_pindex(sb, pindex);
34478+ if (!err) {
34479+ clear_bit(bit, sbinfo->si_xib_buf);
34480+ sbinfo->si_xib_next_bit = bit;
34481+ }
34482+ mutex_unlock(&sbinfo->si_xib_mtx);
34483+}
34484+
34485+/* for s_op->delete_inode() */
34486+void au_xino_delete_inode(struct inode *inode, const int unlinked)
34487+{
34488+ int err;
34489+ unsigned int mnt_flags;
34490+ aufs_bindex_t bindex, bend, bi;
34491+ unsigned char try_trunc;
34492+ struct au_iinfo *iinfo;
34493+ struct super_block *sb;
34494+ struct au_hinode *hi;
34495+ struct inode *h_inode;
34496+ struct au_branch *br;
5527c038 34497+ vfs_writef_t xwrite;
b752ccd1
AM
34498+
34499+ sb = inode->i_sb;
34500+ mnt_flags = au_mntflags(sb);
34501+ if (!au_opt_test(mnt_flags, XINO)
34502+ || inode->i_ino == AUFS_ROOT_INO)
34503+ return;
34504+
34505+ if (unlinked) {
34506+ au_xigen_inc(inode);
34507+ au_xib_clear_bit(inode);
34508+ }
34509+
34510+ iinfo = au_ii(inode);
34511+ if (!iinfo)
34512+ return;
1facf9fc 34513+
b752ccd1
AM
34514+ bindex = iinfo->ii_bstart;
34515+ if (bindex < 0)
34516+ return;
1facf9fc 34517+
b752ccd1
AM
34518+ xwrite = au_sbi(sb)->si_xwrite;
34519+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
34520+ hi = iinfo->ii_hinode + bindex;
34521+ bend = iinfo->ii_bend;
34522+ for (; bindex <= bend; bindex++, hi++) {
34523+ h_inode = hi->hi_inode;
34524+ if (!h_inode
34525+ || (!unlinked && h_inode->i_nlink))
34526+ continue;
1facf9fc 34527+
b752ccd1
AM
34528+ /* inode may not be revalidated */
34529+ bi = au_br_index(sb, hi->hi_id);
34530+ if (bi < 0)
34531+ continue;
1facf9fc 34532+
b752ccd1
AM
34533+ br = au_sbr(sb, bi);
34534+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
34535+ h_inode->i_ino, /*ino*/0);
34536+ if (!err && try_trunc
86dc4139 34537+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 34538+ xino_try_trunc(sb, br);
1facf9fc 34539+ }
1facf9fc 34540+}
34541+
34542+/* get an unused inode number from bitmap */
34543+ino_t au_xino_new_ino(struct super_block *sb)
34544+{
34545+ ino_t ino;
34546+ unsigned long *p, pindex, ul, pend;
34547+ struct au_sbinfo *sbinfo;
34548+ struct file *file;
34549+ int free_bit, err;
34550+
34551+ if (!au_opt_test(au_mntflags(sb), XINO))
34552+ return iunique(sb, AUFS_FIRST_INO);
34553+
34554+ sbinfo = au_sbi(sb);
34555+ mutex_lock(&sbinfo->si_xib_mtx);
34556+ p = sbinfo->si_xib_buf;
34557+ free_bit = sbinfo->si_xib_next_bit;
34558+ if (free_bit < page_bits && !test_bit(free_bit, p))
34559+ goto out; /* success */
34560+ free_bit = find_first_zero_bit(p, page_bits);
34561+ if (free_bit < page_bits)
34562+ goto out; /* success */
34563+
34564+ pindex = sbinfo->si_xib_last_pindex;
34565+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
34566+ err = xib_pindex(sb, ul);
34567+ if (unlikely(err))
34568+ goto out_err;
34569+ free_bit = find_first_zero_bit(p, page_bits);
34570+ if (free_bit < page_bits)
34571+ goto out; /* success */
34572+ }
34573+
34574+ file = sbinfo->si_xib;
c06a8ce3 34575+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 34576+ for (ul = pindex + 1; ul <= pend; ul++) {
34577+ err = xib_pindex(sb, ul);
34578+ if (unlikely(err))
34579+ goto out_err;
34580+ free_bit = find_first_zero_bit(p, page_bits);
34581+ if (free_bit < page_bits)
34582+ goto out; /* success */
34583+ }
34584+ BUG();
34585+
4f0767ce 34586+out:
1facf9fc 34587+ set_bit(free_bit, p);
7f207e10 34588+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 34589+ pindex = sbinfo->si_xib_last_pindex;
34590+ mutex_unlock(&sbinfo->si_xib_mtx);
34591+ ino = xib_calc_ino(pindex, free_bit);
34592+ AuDbg("i%lu\n", (unsigned long)ino);
34593+ return ino;
4f0767ce 34594+out_err:
1facf9fc 34595+ mutex_unlock(&sbinfo->si_xib_mtx);
34596+ AuDbg("i0\n");
34597+ return 0;
34598+}
34599+
34600+/*
34601+ * read @ino from xinofile for the specified branch{@sb, @bindex}
34602+ * at the position of @h_ino.
34603+ * if @ino does not exist and @do_new is true, get new one.
34604+ */
34605+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34606+ ino_t *ino)
34607+{
34608+ int err;
34609+ ssize_t sz;
34610+ loff_t pos;
34611+ struct file *file;
34612+ struct au_sbinfo *sbinfo;
34613+
34614+ *ino = 0;
34615+ if (!au_opt_test(au_mntflags(sb), XINO))
34616+ return 0; /* no xino */
34617+
34618+ err = 0;
34619+ sbinfo = au_sbi(sb);
34620+ pos = h_ino;
34621+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
34622+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34623+ return -EFBIG;
34624+ }
34625+ pos *= sizeof(*ino);
34626+
34627+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 34628+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 34629+ return 0; /* no ino */
34630+
34631+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
34632+ if (sz == sizeof(*ino))
34633+ return 0; /* success */
34634+
34635+ err = sz;
34636+ if (unlikely(sz >= 0)) {
34637+ err = -EIO;
34638+ AuIOErr("xino read error (%zd)\n", sz);
34639+ }
34640+
34641+ return err;
34642+}
34643+
34644+/* ---------------------------------------------------------------------- */
34645+
34646+/* create and set a new xino file */
34647+
34648+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
34649+{
34650+ struct file *file;
34651+ struct dentry *h_parent, *d;
b912730e 34652+ struct inode *h_dir, *inode;
1facf9fc 34653+ int err;
34654+
34655+ /*
34656+ * at mount-time, and the xino file is the default path,
4a4d8108 34657+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 34658+ * when a user specified the xino, we cannot get au_hdir to be ignored.
34659+ */
7f207e10 34660+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34661+ /* | __FMODE_NONOTIFY */,
1facf9fc 34662+ S_IRUGO | S_IWUGO);
34663+ if (IS_ERR(file)) {
34664+ if (!silent)
4a4d8108 34665+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 34666+ return file;
34667+ }
34668+
34669+ /* keep file count */
b912730e
AM
34670+ err = 0;
34671+ inode = file_inode(file);
2000de60 34672+ h_parent = dget_parent(file->f_path.dentry);
5527c038 34673+ h_dir = d_inode(h_parent);
1facf9fc 34674+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
34675+ /* mnt_want_write() is unnecessary here */
523b37e3 34676+ /* no delegation since it is just created */
b912730e
AM
34677+ if (inode->i_nlink)
34678+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
34679+ /*force*/0);
1facf9fc 34680+ mutex_unlock(&h_dir->i_mutex);
34681+ dput(h_parent);
34682+ if (unlikely(err)) {
34683+ if (!silent)
4a4d8108 34684+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 34685+ goto out;
34686+ }
34687+
34688+ err = -EINVAL;
2000de60 34689+ d = file->f_path.dentry;
1facf9fc 34690+ if (unlikely(sb == d->d_sb)) {
34691+ if (!silent)
4a4d8108 34692+ pr_err("%s must be outside\n", fname);
1facf9fc 34693+ goto out;
34694+ }
34695+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
34696+ if (!silent)
4a4d8108
AM
34697+ pr_err("xino doesn't support %s(%s)\n",
34698+ fname, au_sbtype(d->d_sb));
1facf9fc 34699+ goto out;
34700+ }
34701+ return file; /* success */
34702+
4f0767ce 34703+out:
1facf9fc 34704+ fput(file);
34705+ file = ERR_PTR(err);
34706+ return file;
34707+}
34708+
34709+/*
34710+ * find another branch who is on the same filesystem of the specified
34711+ * branch{@btgt}. search until @bend.
34712+ */
34713+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
34714+ aufs_bindex_t bend)
34715+{
34716+ aufs_bindex_t bindex;
34717+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
34718+
34719+ for (bindex = 0; bindex < btgt; bindex++)
34720+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34721+ return bindex;
34722+ for (bindex++; bindex <= bend; bindex++)
34723+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34724+ return bindex;
34725+ return -1;
34726+}
34727+
34728+/* ---------------------------------------------------------------------- */
34729+
34730+/*
34731+ * initialize the xinofile for the specified branch @br
34732+ * at the place/path where @base_file indicates.
34733+ * test whether another branch is on the same filesystem or not,
34734+ * if @do_test is true.
34735+ */
34736+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
34737+ struct file *base_file, int do_test)
34738+{
34739+ int err;
34740+ ino_t ino;
34741+ aufs_bindex_t bend, bindex;
34742+ struct au_branch *shared_br, *b;
34743+ struct file *file;
34744+ struct super_block *tgt_sb;
34745+
34746+ shared_br = NULL;
34747+ bend = au_sbend(sb);
34748+ if (do_test) {
86dc4139 34749+ tgt_sb = au_br_sb(br);
1facf9fc 34750+ for (bindex = 0; bindex <= bend; bindex++) {
34751+ b = au_sbr(sb, bindex);
86dc4139 34752+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 34753+ shared_br = b;
34754+ break;
34755+ }
34756+ }
34757+ }
34758+
34759+ if (!shared_br || !shared_br->br_xino.xi_file) {
34760+ struct au_xino_lock_dir ldir;
34761+
34762+ au_xino_lock_dir(sb, base_file, &ldir);
34763+ /* mnt_want_write() is unnecessary here */
34764+ file = au_xino_create2(base_file, NULL);
34765+ au_xino_unlock_dir(&ldir);
34766+ err = PTR_ERR(file);
34767+ if (IS_ERR(file))
34768+ goto out;
34769+ br->br_xino.xi_file = file;
34770+ } else {
34771+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
34772+ get_file(br->br_xino.xi_file);
34773+ }
34774+
34775+ ino = AUFS_ROOT_INO;
34776+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34777+ h_ino, ino);
b752ccd1
AM
34778+ if (unlikely(err)) {
34779+ fput(br->br_xino.xi_file);
34780+ br->br_xino.xi_file = NULL;
34781+ }
1facf9fc 34782+
4f0767ce 34783+out:
1facf9fc 34784+ return err;
34785+}
34786+
34787+/* ---------------------------------------------------------------------- */
34788+
34789+/* trucate a xino bitmap file */
34790+
34791+/* todo: slow */
34792+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
34793+{
34794+ int err, bit;
34795+ ssize_t sz;
34796+ unsigned long pindex;
34797+ loff_t pos, pend;
34798+ struct au_sbinfo *sbinfo;
5527c038 34799+ vfs_readf_t func;
1facf9fc 34800+ ino_t *ino;
34801+ unsigned long *p;
34802+
34803+ err = 0;
34804+ sbinfo = au_sbi(sb);
dece6358 34805+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 34806+ p = sbinfo->si_xib_buf;
34807+ func = sbinfo->si_xread;
c06a8ce3 34808+ pend = vfsub_f_size_read(file);
1facf9fc 34809+ pos = 0;
34810+ while (pos < pend) {
34811+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
34812+ err = sz;
34813+ if (unlikely(sz <= 0))
34814+ goto out;
34815+
34816+ err = 0;
34817+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
34818+ if (unlikely(*ino < AUFS_FIRST_INO))
34819+ continue;
34820+
34821+ xib_calc_bit(*ino, &pindex, &bit);
34822+ AuDebugOn(page_bits <= bit);
34823+ err = xib_pindex(sb, pindex);
34824+ if (!err)
34825+ set_bit(bit, p);
34826+ else
34827+ goto out;
34828+ }
34829+ }
34830+
4f0767ce 34831+out:
1facf9fc 34832+ return err;
34833+}
34834+
34835+static int xib_restore(struct super_block *sb)
34836+{
34837+ int err;
34838+ aufs_bindex_t bindex, bend;
34839+ void *page;
34840+
34841+ err = -ENOMEM;
34842+ page = (void *)__get_free_page(GFP_NOFS);
34843+ if (unlikely(!page))
34844+ goto out;
34845+
34846+ err = 0;
34847+ bend = au_sbend(sb);
34848+ for (bindex = 0; !err && bindex <= bend; bindex++)
34849+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
34850+ err = do_xib_restore
34851+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
34852+ else
34853+ AuDbg("b%d\n", bindex);
34854+ free_page((unsigned long)page);
34855+
4f0767ce 34856+out:
1facf9fc 34857+ return err;
34858+}
34859+
34860+int au_xib_trunc(struct super_block *sb)
34861+{
34862+ int err;
34863+ ssize_t sz;
34864+ loff_t pos;
34865+ struct au_xino_lock_dir ldir;
34866+ struct au_sbinfo *sbinfo;
34867+ unsigned long *p;
34868+ struct file *file;
34869+
dece6358
AM
34870+ SiMustWriteLock(sb);
34871+
1facf9fc 34872+ err = 0;
34873+ sbinfo = au_sbi(sb);
34874+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
34875+ goto out;
34876+
34877+ file = sbinfo->si_xib;
c06a8ce3 34878+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 34879+ goto out;
34880+
34881+ au_xino_lock_dir(sb, file, &ldir);
34882+ /* mnt_want_write() is unnecessary here */
34883+ file = au_xino_create2(sbinfo->si_xib, NULL);
34884+ au_xino_unlock_dir(&ldir);
34885+ err = PTR_ERR(file);
34886+ if (IS_ERR(file))
34887+ goto out;
34888+ fput(sbinfo->si_xib);
34889+ sbinfo->si_xib = file;
34890+
34891+ p = sbinfo->si_xib_buf;
34892+ memset(p, 0, PAGE_SIZE);
34893+ pos = 0;
34894+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
34895+ if (unlikely(sz != PAGE_SIZE)) {
34896+ err = sz;
34897+ AuIOErr("err %d\n", err);
34898+ if (sz >= 0)
34899+ err = -EIO;
34900+ goto out;
34901+ }
34902+
34903+ mutex_lock(&sbinfo->si_xib_mtx);
34904+ /* mnt_want_write() is unnecessary here */
34905+ err = xib_restore(sb);
34906+ mutex_unlock(&sbinfo->si_xib_mtx);
34907+
34908+out:
34909+ return err;
34910+}
34911+
34912+/* ---------------------------------------------------------------------- */
34913+
34914+/*
34915+ * xino mount option handlers
34916+ */
1facf9fc 34917+
34918+/* xino bitmap */
34919+static void xino_clear_xib(struct super_block *sb)
34920+{
34921+ struct au_sbinfo *sbinfo;
34922+
dece6358
AM
34923+ SiMustWriteLock(sb);
34924+
1facf9fc 34925+ sbinfo = au_sbi(sb);
34926+ sbinfo->si_xread = NULL;
34927+ sbinfo->si_xwrite = NULL;
34928+ if (sbinfo->si_xib)
34929+ fput(sbinfo->si_xib);
34930+ sbinfo->si_xib = NULL;
34931+ free_page((unsigned long)sbinfo->si_xib_buf);
34932+ sbinfo->si_xib_buf = NULL;
34933+}
34934+
34935+static int au_xino_set_xib(struct super_block *sb, struct file *base)
34936+{
34937+ int err;
34938+ loff_t pos;
34939+ struct au_sbinfo *sbinfo;
34940+ struct file *file;
34941+
dece6358
AM
34942+ SiMustWriteLock(sb);
34943+
1facf9fc 34944+ sbinfo = au_sbi(sb);
34945+ file = au_xino_create2(base, sbinfo->si_xib);
34946+ err = PTR_ERR(file);
34947+ if (IS_ERR(file))
34948+ goto out;
34949+ if (sbinfo->si_xib)
34950+ fput(sbinfo->si_xib);
34951+ sbinfo->si_xib = file;
5527c038
JR
34952+ sbinfo->si_xread = vfs_readf(file);
34953+ sbinfo->si_xwrite = vfs_writef(file);
1facf9fc 34954+
34955+ err = -ENOMEM;
34956+ if (!sbinfo->si_xib_buf)
34957+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
34958+ if (unlikely(!sbinfo->si_xib_buf))
34959+ goto out_unset;
34960+
34961+ sbinfo->si_xib_last_pindex = 0;
34962+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 34963+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 34964+ pos = 0;
34965+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
34966+ PAGE_SIZE, &pos);
34967+ if (unlikely(err != PAGE_SIZE))
34968+ goto out_free;
34969+ }
34970+ err = 0;
34971+ goto out; /* success */
34972+
4f0767ce 34973+out_free:
1facf9fc 34974+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
34975+ sbinfo->si_xib_buf = NULL;
34976+ if (err >= 0)
34977+ err = -EIO;
4f0767ce 34978+out_unset:
b752ccd1
AM
34979+ fput(sbinfo->si_xib);
34980+ sbinfo->si_xib = NULL;
34981+ sbinfo->si_xread = NULL;
34982+ sbinfo->si_xwrite = NULL;
4f0767ce 34983+out:
b752ccd1 34984+ return err;
1facf9fc 34985+}
34986+
b752ccd1
AM
34987+/* xino for each branch */
34988+static void xino_clear_br(struct super_block *sb)
34989+{
34990+ aufs_bindex_t bindex, bend;
34991+ struct au_branch *br;
1facf9fc 34992+
b752ccd1
AM
34993+ bend = au_sbend(sb);
34994+ for (bindex = 0; bindex <= bend; bindex++) {
34995+ br = au_sbr(sb, bindex);
34996+ if (!br || !br->br_xino.xi_file)
34997+ continue;
34998+
34999+ fput(br->br_xino.xi_file);
35000+ br->br_xino.xi_file = NULL;
35001+ }
35002+}
35003+
35004+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 35005+{
35006+ int err;
b752ccd1
AM
35007+ ino_t ino;
35008+ aufs_bindex_t bindex, bend, bshared;
35009+ struct {
35010+ struct file *old, *new;
35011+ } *fpair, *p;
35012+ struct au_branch *br;
35013+ struct inode *inode;
5527c038 35014+ vfs_writef_t writef;
1facf9fc 35015+
b752ccd1
AM
35016+ SiMustWriteLock(sb);
35017+
35018+ err = -ENOMEM;
35019+ bend = au_sbend(sb);
35020+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
35021+ if (unlikely(!fpair))
1facf9fc 35022+ goto out;
35023+
5527c038 35024+ inode = d_inode(sb->s_root);
b752ccd1
AM
35025+ ino = AUFS_ROOT_INO;
35026+ writef = au_sbi(sb)->si_xwrite;
35027+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
35028+ br = au_sbr(sb, bindex);
35029+ bshared = is_sb_shared(sb, bindex, bindex - 1);
35030+ if (bshared >= 0) {
35031+ /* shared xino */
35032+ *p = fpair[bshared];
35033+ get_file(p->new);
35034+ }
35035+
35036+ if (!p->new) {
35037+ /* new xino */
35038+ p->old = br->br_xino.xi_file;
35039+ p->new = au_xino_create2(base, br->br_xino.xi_file);
35040+ err = PTR_ERR(p->new);
35041+ if (IS_ERR(p->new)) {
35042+ p->new = NULL;
35043+ goto out_pair;
35044+ }
35045+ }
35046+
35047+ err = au_xino_do_write(writef, p->new,
35048+ au_h_iptr(inode, bindex)->i_ino, ino);
35049+ if (unlikely(err))
35050+ goto out_pair;
35051+ }
35052+
35053+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
35054+ br = au_sbr(sb, bindex);
35055+ if (br->br_xino.xi_file)
35056+ fput(br->br_xino.xi_file);
35057+ get_file(p->new);
35058+ br->br_xino.xi_file = p->new;
35059+ }
1facf9fc 35060+
4f0767ce 35061+out_pair:
b752ccd1
AM
35062+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
35063+ if (p->new)
35064+ fput(p->new);
35065+ else
35066+ break;
35067+ kfree(fpair);
4f0767ce 35068+out:
1facf9fc 35069+ return err;
35070+}
b752ccd1
AM
35071+
35072+void au_xino_clr(struct super_block *sb)
35073+{
35074+ struct au_sbinfo *sbinfo;
35075+
35076+ au_xigen_clr(sb);
35077+ xino_clear_xib(sb);
35078+ xino_clear_br(sb);
35079+ sbinfo = au_sbi(sb);
35080+ /* lvalue, do not call au_mntflags() */
35081+ au_opt_clr(sbinfo->si_mntflags, XINO);
35082+}
35083+
35084+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
35085+{
35086+ int err, skip;
35087+ struct dentry *parent, *cur_parent;
35088+ struct qstr *dname, *cur_name;
35089+ struct file *cur_xino;
35090+ struct inode *dir;
35091+ struct au_sbinfo *sbinfo;
35092+
35093+ SiMustWriteLock(sb);
35094+
35095+ err = 0;
35096+ sbinfo = au_sbi(sb);
2000de60 35097+ parent = dget_parent(xino->file->f_path.dentry);
b752ccd1
AM
35098+ if (remount) {
35099+ skip = 0;
2000de60 35100+ dname = &xino->file->f_path.dentry->d_name;
b752ccd1
AM
35101+ cur_xino = sbinfo->si_xib;
35102+ if (cur_xino) {
2000de60
JR
35103+ cur_parent = dget_parent(cur_xino->f_path.dentry);
35104+ cur_name = &cur_xino->f_path.dentry->d_name;
b752ccd1 35105+ skip = (cur_parent == parent
38d290e6 35106+ && au_qstreq(dname, cur_name));
b752ccd1
AM
35107+ dput(cur_parent);
35108+ }
35109+ if (skip)
35110+ goto out;
35111+ }
35112+
35113+ au_opt_set(sbinfo->si_mntflags, XINO);
5527c038 35114+ dir = d_inode(parent);
b752ccd1
AM
35115+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
35116+ /* mnt_want_write() is unnecessary here */
35117+ err = au_xino_set_xib(sb, xino->file);
35118+ if (!err)
35119+ err = au_xigen_set(sb, xino->file);
35120+ if (!err)
35121+ err = au_xino_set_br(sb, xino->file);
35122+ mutex_unlock(&dir->i_mutex);
35123+ if (!err)
35124+ goto out; /* success */
35125+
35126+ /* reset all */
35127+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
35128+ au_xigen_clr(sb);
35129+ xino_clear_xib(sb);
b752ccd1 35130+
4f0767ce 35131+out:
b752ccd1
AM
35132+ dput(parent);
35133+ return err;
35134+}
35135+
35136+/* ---------------------------------------------------------------------- */
35137+
35138+/*
35139+ * create a xinofile at the default place/path.
35140+ */
35141+struct file *au_xino_def(struct super_block *sb)
35142+{
35143+ struct file *file;
35144+ char *page, *p;
35145+ struct au_branch *br;
35146+ struct super_block *h_sb;
35147+ struct path path;
35148+ aufs_bindex_t bend, bindex, bwr;
35149+
35150+ br = NULL;
35151+ bend = au_sbend(sb);
35152+ bwr = -1;
35153+ for (bindex = 0; bindex <= bend; bindex++) {
35154+ br = au_sbr(sb, bindex);
35155+ if (au_br_writable(br->br_perm)
86dc4139 35156+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
35157+ bwr = bindex;
35158+ break;
35159+ }
35160+ }
35161+
7f207e10
AM
35162+ if (bwr >= 0) {
35163+ file = ERR_PTR(-ENOMEM);
537831f9 35164+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
35165+ if (unlikely(!page))
35166+ goto out;
86dc4139 35167+ path.mnt = au_br_mnt(br);
7f207e10
AM
35168+ path.dentry = au_h_dptr(sb->s_root, bwr);
35169+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
35170+ file = (void *)p;
35171+ if (!IS_ERR(p)) {
35172+ strcat(p, "/" AUFS_XINO_FNAME);
35173+ AuDbg("%s\n", p);
35174+ file = au_xino_create(sb, p, /*silent*/0);
35175+ if (!IS_ERR(file))
35176+ au_xino_brid_set(sb, br->br_id);
35177+ }
537831f9 35178+ free_page((unsigned long)page);
7f207e10
AM
35179+ } else {
35180+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
35181+ if (IS_ERR(file))
35182+ goto out;
2000de60 35183+ h_sb = file->f_path.dentry->d_sb;
7f207e10
AM
35184+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
35185+ pr_err("xino doesn't support %s(%s)\n",
35186+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
35187+ fput(file);
35188+ file = ERR_PTR(-EINVAL);
35189+ }
35190+ if (!IS_ERR(file))
35191+ au_xino_brid_set(sb, -1);
35192+ }
0c5527e5 35193+
7f207e10
AM
35194+out:
35195+ return file;
35196+}
35197+
35198+/* ---------------------------------------------------------------------- */
35199+
35200+int au_xino_path(struct seq_file *seq, struct file *file)
35201+{
35202+ int err;
35203+
35204+ err = au_seq_path(seq, &file->f_path);
79b8bda9 35205+ if (unlikely(err))
7f207e10
AM
35206+ goto out;
35207+
7f207e10
AM
35208+#define Deleted "\\040(deleted)"
35209+ seq->count -= sizeof(Deleted) - 1;
35210+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35211+ sizeof(Deleted) - 1));
35212+#undef Deleted
35213+
35214+out:
35215+ return err;
35216+}
537831f9
AM
35217diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35218--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
8cdd5066 35219+++ linux/include/uapi/linux/aufs_type.h 2016-02-28 11:26:32.576637942 +0100
c1595e42 35220@@ -0,0 +1,419 @@
7f207e10 35221+/*
8cdd5066 35222+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
35223+ *
35224+ * This program, aufs is free software; you can redistribute it and/or modify
35225+ * it under the terms of the GNU General Public License as published by
35226+ * the Free Software Foundation; either version 2 of the License, or
35227+ * (at your option) any later version.
35228+ *
35229+ * This program is distributed in the hope that it will be useful,
35230+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35231+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35232+ * GNU General Public License for more details.
35233+ *
35234+ * You should have received a copy of the GNU General Public License
523b37e3 35235+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
35236+ */
35237+
35238+#ifndef __AUFS_TYPE_H__
35239+#define __AUFS_TYPE_H__
35240+
f6c5ef8b
AM
35241+#define AUFS_NAME "aufs"
35242+
9dbd164d 35243+#ifdef __KERNEL__
f6c5ef8b
AM
35244+/*
35245+ * define it before including all other headers.
35246+ * sched.h may use pr_* macros before defining "current", so define the
35247+ * no-current version first, and re-define later.
35248+ */
35249+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35250+#include <linux/sched.h>
35251+#undef pr_fmt
a2a7ad62
AM
35252+#define pr_fmt(fmt) \
35253+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35254+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
35255+#else
35256+#include <stdint.h>
35257+#include <sys/types.h>
f6c5ef8b 35258+#endif /* __KERNEL__ */
7f207e10 35259+
f6c5ef8b
AM
35260+#include <linux/limits.h>
35261+
8cdd5066 35262+#define AUFS_VERSION "4.4-20160223"
7f207e10
AM
35263+
35264+/* todo? move this to linux-2.6.19/include/magic.h */
35265+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35266+
35267+/* ---------------------------------------------------------------------- */
35268+
35269+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 35270+typedef int8_t aufs_bindex_t;
7f207e10
AM
35271+#define AUFS_BRANCH_MAX 127
35272+#else
9dbd164d 35273+typedef int16_t aufs_bindex_t;
7f207e10
AM
35274+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35275+#define AUFS_BRANCH_MAX 511
35276+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35277+#define AUFS_BRANCH_MAX 1023
35278+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35279+#define AUFS_BRANCH_MAX 32767
35280+#endif
35281+#endif
35282+
35283+#ifdef __KERNEL__
35284+#ifndef AUFS_BRANCH_MAX
35285+#error unknown CONFIG_AUFS_BRANCH_MAX value
35286+#endif
35287+#endif /* __KERNEL__ */
35288+
35289+/* ---------------------------------------------------------------------- */
35290+
7f207e10
AM
35291+#define AUFS_FSTYPE AUFS_NAME
35292+
35293+#define AUFS_ROOT_INO 2
35294+#define AUFS_FIRST_INO 11
35295+
35296+#define AUFS_WH_PFX ".wh."
35297+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35298+#define AUFS_WH_TMP_LEN 4
86dc4139 35299+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
35300+#define AUFS_MAX_NAMELEN (NAME_MAX \
35301+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35302+ - 1 /* dot */\
35303+ - AUFS_WH_TMP_LEN) /* hex */
35304+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35305+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
35306+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35307+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
35308+#define AUFS_DIRWH_DEF 3
35309+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 35310+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
35311+#define AUFS_RDBLK_DEF 512 /* bytes */
35312+#define AUFS_RDHASH_DEF 32
35313+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
35314+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35315+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 35316+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 35317+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
35318+
35319+/* pseudo-link maintenace under /proc */
35320+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35321+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35322+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35323+
35324+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35325+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35326+
35327+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35328+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35329+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35330+
35331+/* doubly whiteouted */
35332+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35333+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35334+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35335+
1e00d052 35336+/* branch permissions and attributes */
7f207e10
AM
35337+#define AUFS_BRPERM_RW "rw"
35338+#define AUFS_BRPERM_RO "ro"
35339+#define AUFS_BRPERM_RR "rr"
076b876e
AM
35340+#define AUFS_BRATTR_COO_REG "coo_reg"
35341+#define AUFS_BRATTR_COO_ALL "coo_all"
35342+#define AUFS_BRATTR_FHSM "fhsm"
35343+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
35344+#define AUFS_BRATTR_ICEX "icex"
35345+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35346+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35347+#define AUFS_BRATTR_ICEX_TR "icextr"
35348+#define AUFS_BRATTR_ICEX_USR "icexusr"
35349+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
35350+#define AUFS_BRRATTR_WH "wh"
35351+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
35352+#define AUFS_BRWATTR_MOO "moo"
35353+
35354+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35355+#define AuBrPerm_RO (1 << 1) /* readonly */
35356+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35357+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35358+
35359+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35360+#define AuBrAttr_COO_ALL (1 << 4)
35361+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35362+
35363+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35364+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
35365+ branch. meaningless since
35366+ linux-3.18-rc1 */
35367+
35368+/* ignore error in copying XATTR */
35369+#define AuBrAttr_ICEX_SEC (1 << 7)
35370+#define AuBrAttr_ICEX_SYS (1 << 8)
35371+#define AuBrAttr_ICEX_TR (1 << 9)
35372+#define AuBrAttr_ICEX_USR (1 << 10)
35373+#define AuBrAttr_ICEX_OTH (1 << 11)
35374+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35375+ | AuBrAttr_ICEX_SYS \
35376+ | AuBrAttr_ICEX_TR \
35377+ | AuBrAttr_ICEX_USR \
35378+ | AuBrAttr_ICEX_OTH)
35379+
35380+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
35381+#define AuBrRAttr_Mask AuBrRAttr_WH
35382+
c1595e42
JR
35383+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35384+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
35385+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35386+
35387+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35388+
c1595e42 35389+/* #warning test userspace */
076b876e
AM
35390+#ifdef __KERNEL__
35391+#ifndef CONFIG_AUFS_FHSM
35392+#undef AuBrAttr_FHSM
35393+#define AuBrAttr_FHSM 0
35394+#endif
c1595e42
JR
35395+#ifndef CONFIG_AUFS_XATTR
35396+#undef AuBrAttr_ICEX
35397+#define AuBrAttr_ICEX 0
35398+#undef AuBrAttr_ICEX_SEC
35399+#define AuBrAttr_ICEX_SEC 0
35400+#undef AuBrAttr_ICEX_SYS
35401+#define AuBrAttr_ICEX_SYS 0
35402+#undef AuBrAttr_ICEX_TR
35403+#define AuBrAttr_ICEX_TR 0
35404+#undef AuBrAttr_ICEX_USR
35405+#define AuBrAttr_ICEX_USR 0
35406+#undef AuBrAttr_ICEX_OTH
35407+#define AuBrAttr_ICEX_OTH 0
35408+#endif
076b876e
AM
35409+#endif
35410+
35411+/* the longest combination */
c1595e42
JR
35412+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35413+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35414+ "+" AUFS_BRATTR_COO_REG \
35415+ "+" AUFS_BRATTR_FHSM \
35416+ "+" AUFS_BRATTR_UNPIN \
7e9cd9fe
AM
35417+ "+" AUFS_BRATTR_ICEX_SEC \
35418+ "+" AUFS_BRATTR_ICEX_SYS \
35419+ "+" AUFS_BRATTR_ICEX_USR \
35420+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
35421+ "+" AUFS_BRWATTR_NLWH)
35422+
35423+typedef struct {
35424+ char a[AuBrPermStrSz];
35425+} au_br_perm_str_t;
35426+
35427+static inline int au_br_writable(int brperm)
35428+{
35429+ return brperm & AuBrPerm_RW;
35430+}
35431+
35432+static inline int au_br_whable(int brperm)
35433+{
35434+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35435+}
35436+
35437+static inline int au_br_wh_linkable(int brperm)
35438+{
35439+ return !(brperm & AuBrWAttr_NoLinkWH);
35440+}
35441+
35442+static inline int au_br_cmoo(int brperm)
35443+{
35444+ return brperm & AuBrAttr_CMOO_Mask;
35445+}
35446+
35447+static inline int au_br_fhsm(int brperm)
35448+{
35449+ return brperm & AuBrAttr_FHSM;
35450+}
7f207e10
AM
35451+
35452+/* ---------------------------------------------------------------------- */
35453+
35454+/* ioctl */
35455+enum {
35456+ /* readdir in userspace */
35457+ AuCtl_RDU,
35458+ AuCtl_RDU_INO,
35459+
076b876e
AM
35460+ AuCtl_WBR_FD, /* pathconf wrapper */
35461+ AuCtl_IBUSY, /* busy inode */
35462+ AuCtl_MVDOWN, /* move-down */
35463+ AuCtl_BR, /* info about branches */
35464+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
35465+};
35466+
35467+/* borrowed from linux/include/linux/kernel.h */
35468+#ifndef ALIGN
35469+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
35470+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
35471+#endif
35472+
35473+/* borrowed from linux/include/linux/compiler-gcc3.h */
35474+#ifndef __aligned
35475+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
35476+#endif
35477+
35478+#ifdef __KERNEL__
35479+#ifndef __packed
7f207e10
AM
35480+#define __packed __attribute__((packed))
35481+#endif
53392da6 35482+#endif
7f207e10
AM
35483+
35484+struct au_rdu_cookie {
9dbd164d
AM
35485+ uint64_t h_pos;
35486+ int16_t bindex;
35487+ uint8_t flags;
35488+ uint8_t pad;
35489+ uint32_t generation;
7f207e10
AM
35490+} __aligned(8);
35491+
35492+struct au_rdu_ent {
9dbd164d
AM
35493+ uint64_t ino;
35494+ int16_t bindex;
35495+ uint8_t type;
35496+ uint8_t nlen;
35497+ uint8_t wh;
7f207e10
AM
35498+ char name[0];
35499+} __aligned(8);
35500+
35501+static inline int au_rdu_len(int nlen)
35502+{
35503+ /* include the terminating NULL */
35504+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 35505+ sizeof(uint64_t));
7f207e10
AM
35506+}
35507+
35508+union au_rdu_ent_ul {
35509+ struct au_rdu_ent __user *e;
9dbd164d 35510+ uint64_t ul;
7f207e10
AM
35511+};
35512+
35513+enum {
35514+ AufsCtlRduV_SZ,
35515+ AufsCtlRduV_End
35516+};
35517+
35518+struct aufs_rdu {
35519+ /* input */
35520+ union {
9dbd164d
AM
35521+ uint64_t sz; /* AuCtl_RDU */
35522+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
35523+ };
35524+ union au_rdu_ent_ul ent;
9dbd164d 35525+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
35526+
35527+ /* input/output */
9dbd164d 35528+ uint32_t blk;
7f207e10
AM
35529+
35530+ /* output */
35531+ union au_rdu_ent_ul tail;
35532+ /* number of entries which were added in a single call */
9dbd164d
AM
35533+ uint64_t rent;
35534+ uint8_t full;
35535+ uint8_t shwh;
7f207e10
AM
35536+
35537+ struct au_rdu_cookie cookie;
35538+} __aligned(8);
35539+
1e00d052
AM
35540+/* ---------------------------------------------------------------------- */
35541+
35542+struct aufs_wbr_fd {
9dbd164d
AM
35543+ uint32_t oflags;
35544+ int16_t brid;
1e00d052
AM
35545+} __aligned(8);
35546+
35547+/* ---------------------------------------------------------------------- */
35548+
027c5e7a 35549+struct aufs_ibusy {
9dbd164d
AM
35550+ uint64_t ino, h_ino;
35551+ int16_t bindex;
027c5e7a
AM
35552+} __aligned(8);
35553+
1e00d052
AM
35554+/* ---------------------------------------------------------------------- */
35555+
392086de
AM
35556+/* error code for move-down */
35557+/* the actual message strings are implemented in aufs-util.git */
35558+enum {
35559+ EAU_MVDOWN_OPAQUE = 1,
35560+ EAU_MVDOWN_WHITEOUT,
35561+ EAU_MVDOWN_UPPER,
35562+ EAU_MVDOWN_BOTTOM,
35563+ EAU_MVDOWN_NOUPPER,
35564+ EAU_MVDOWN_NOLOWERBR,
35565+ EAU_Last
35566+};
35567+
c2b27bf2 35568+/* flags for move-down */
392086de
AM
35569+#define AUFS_MVDOWN_DMSG 1
35570+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
35571+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
35572+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
35573+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
35574+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
35575+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
35576+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
35577+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
35578+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
35579+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
35580+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
35581+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 35582+
076b876e 35583+/* index for move-down */
392086de
AM
35584+enum {
35585+ AUFS_MVDOWN_UPPER,
35586+ AUFS_MVDOWN_LOWER,
35587+ AUFS_MVDOWN_NARRAY
35588+};
35589+
076b876e
AM
35590+/*
35591+ * additional info of move-down
35592+ * number of free blocks and inodes.
35593+ * subset of struct kstatfs, but smaller and always 64bit.
35594+ */
35595+struct aufs_stfs {
35596+ uint64_t f_blocks;
35597+ uint64_t f_bavail;
35598+ uint64_t f_files;
35599+ uint64_t f_ffree;
35600+};
35601+
35602+struct aufs_stbr {
35603+ int16_t brid; /* optional input */
35604+ int16_t bindex; /* output */
35605+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
35606+} __aligned(8);
35607+
c2b27bf2 35608+struct aufs_mvdown {
076b876e
AM
35609+ uint32_t flags; /* input/output */
35610+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
35611+ int8_t au_errno; /* output */
35612+} __aligned(8);
35613+
35614+/* ---------------------------------------------------------------------- */
35615+
35616+union aufs_brinfo {
35617+ /* PATH_MAX may differ between kernel-space and user-space */
35618+ char _spacer[4096];
392086de 35619+ struct {
076b876e
AM
35620+ int16_t id;
35621+ int perm;
35622+ char path[0];
35623+ };
c2b27bf2
AM
35624+} __aligned(8);
35625+
35626+/* ---------------------------------------------------------------------- */
35627+
7f207e10
AM
35628+#define AuCtlType 'A'
35629+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
35630+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
35631+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
35632+ struct aufs_wbr_fd)
027c5e7a 35633+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
35634+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
35635+ struct aufs_mvdown)
076b876e
AM
35636+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
35637+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
35638+
35639+#endif /* __AUFS_TYPE_H__ */
cfc41e69 35640aufs4.4 loopback patch
5527c038
JR
35641
35642diff --git a/drivers/block/loop.c b/drivers/block/loop.c
be52b249 35643index abfdd2b..a2e3c43 100644
5527c038
JR
35644--- a/drivers/block/loop.c
35645+++ b/drivers/block/loop.c
be52b249 35646@@ -556,7 +556,7 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq)
5527c038
JR
35647 }
35648
35649 struct switch_request {
35650- struct file *file;
35651+ struct file *file, *virt_file;
35652 struct completion wait;
35653 };
35654
be52b249 35655@@ -582,6 +582,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
35656 mapping = file->f_mapping;
35657 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
35658 lo->lo_backing_file = file;
35659+ lo->lo_backing_virt_file = p->virt_file;
35660 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
35661 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
35662 lo->old_gfp_mask = mapping_gfp_mask(mapping);
be52b249 35663@@ -594,11 +595,13 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
5527c038
JR
35664 * First it needs to flush existing IO, it does this by sending a magic
35665 * BIO down the pipe. The completion of this BIO does the actual switch.
35666 */
35667-static int loop_switch(struct loop_device *lo, struct file *file)
35668+static int loop_switch(struct loop_device *lo, struct file *file,
35669+ struct file *virt_file)
35670 {
35671 struct switch_request w;
35672
35673 w.file = file;
35674+ w.virt_file = virt_file;
35675
35676 /* freeze queue and wait for completion of scheduled requests */
35677 blk_mq_freeze_queue(lo->lo_queue);
be52b249 35678@@ -617,7 +620,16 @@ static int loop_switch(struct loop_device *lo, struct file *file)
5527c038
JR
35679 */
35680 static int loop_flush(struct loop_device *lo)
35681 {
35682- return loop_switch(lo, NULL);
35683+ return loop_switch(lo, NULL, NULL);
35684+}
35685+
35686+static struct file *loop_real_file(struct file *file)
35687+{
35688+ struct file *f = NULL;
35689+
35690+ if (file->f_path.dentry->d_sb->s_op->real_loop)
35691+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
35692+ return f;
35693 }
35694
c2c0f25c 35695 static void loop_reread_partitions(struct loop_device *lo,
be52b249 35696@@ -654,6 +666,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
35697 unsigned int arg)
35698 {
35699 struct file *file, *old_file;
35700+ struct file *f, *virt_file = NULL, *old_virt_file;
35701 struct inode *inode;
35702 int error;
35703
be52b249 35704@@ -670,9 +683,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
35705 file = fget(arg);
35706 if (!file)
35707 goto out;
35708+ f = loop_real_file(file);
35709+ if (f) {
35710+ virt_file = file;
35711+ file = f;
35712+ get_file(file);
35713+ }
35714
35715 inode = file->f_mapping->host;
35716 old_file = lo->lo_backing_file;
35717+ old_virt_file = lo->lo_backing_virt_file;
35718
35719 error = -EINVAL;
35720
be52b249 35721@@ -684,17 +704,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
5527c038
JR
35722 goto out_putf;
35723
35724 /* and ... switch */
35725- error = loop_switch(lo, file);
35726+ error = loop_switch(lo, file, virt_file);
35727 if (error)
35728 goto out_putf;
35729
35730 fput(old_file);
35731+ if (old_virt_file)
35732+ fput(old_virt_file);
35733 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
c2c0f25c 35734 loop_reread_partitions(lo, bdev);
5527c038
JR
35735 return 0;
35736
35737 out_putf:
35738 fput(file);
35739+ if (virt_file)
35740+ fput(virt_file);
35741 out:
35742 return error;
35743 }
be52b249 35744@@ -881,7 +905,7 @@ static int loop_prepare_queue(struct loop_device *lo)
5527c038
JR
35745 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35746 struct block_device *bdev, unsigned int arg)
35747 {
35748- struct file *file, *f;
35749+ struct file *file, *f, *virt_file = NULL;
35750 struct inode *inode;
35751 struct address_space *mapping;
35752 unsigned lo_blocksize;
be52b249 35753@@ -896,6 +920,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
35754 file = fget(arg);
35755 if (!file)
35756 goto out;
35757+ f = loop_real_file(file);
35758+ if (f) {
35759+ virt_file = file;
35760+ file = f;
35761+ get_file(file);
35762+ }
35763
35764 error = -EBUSY;
35765 if (lo->lo_state != Lo_unbound)
be52b249 35766@@ -948,6 +978,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
35767 lo->lo_device = bdev;
35768 lo->lo_flags = lo_flags;
35769 lo->lo_backing_file = file;
35770+ lo->lo_backing_virt_file = virt_file;
35771 lo->transfer = NULL;
35772 lo->ioctl = NULL;
35773 lo->lo_sizelimit = 0;
be52b249 35774@@ -980,6 +1011,8 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
5527c038
JR
35775
35776 out_putf:
35777 fput(file);
35778+ if (virt_file)
35779+ fput(virt_file);
35780 out:
35781 /* This is safe: open() is still holding a reference. */
35782 module_put(THIS_MODULE);
be52b249 35783@@ -1026,6 +1059,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
5527c038
JR
35784 static int loop_clr_fd(struct loop_device *lo)
35785 {
35786 struct file *filp = lo->lo_backing_file;
35787+ struct file *virt_filp = lo->lo_backing_virt_file;
35788 gfp_t gfp = lo->old_gfp_mask;
35789 struct block_device *bdev = lo->lo_device;
35790
be52b249 35791@@ -1057,6 +1091,7 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
35792 spin_lock_irq(&lo->lo_lock);
35793 lo->lo_state = Lo_rundown;
35794 lo->lo_backing_file = NULL;
35795+ lo->lo_backing_virt_file = NULL;
35796 spin_unlock_irq(&lo->lo_lock);
35797
35798 loop_release_xfer(lo);
be52b249 35799@@ -1101,6 +1136,8 @@ static int loop_clr_fd(struct loop_device *lo)
5527c038
JR
35800 * bd_mutex which is usually taken before lo_ctl_mutex.
35801 */
35802 fput(filp);
35803+ if (virt_filp)
35804+ fput(virt_filp);
35805 return 0;
35806 }
35807
35808diff --git a/drivers/block/loop.h b/drivers/block/loop.h
be52b249 35809index fb2237c..c3888c5 100644
5527c038
JR
35810--- a/drivers/block/loop.h
35811+++ b/drivers/block/loop.h
35812@@ -46,7 +46,7 @@ struct loop_device {
35813 int (*ioctl)(struct loop_device *, int cmd,
35814 unsigned long arg);
35815
35816- struct file * lo_backing_file;
35817+ struct file * lo_backing_file, *lo_backing_virt_file;
35818 struct block_device *lo_device;
35819 unsigned lo_blocksize;
35820 void *key_data;
35821diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
8cdd5066 35822index 2baacd7..22bcb89 100644
5527c038
JR
35823--- a/fs/aufs/f_op.c
35824+++ b/fs/aufs/f_op.c
8cdd5066 35825@@ -399,7 +399,7 @@ static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
5527c038
JR
35826 if (IS_ERR(h_file))
35827 goto out;
35828
35829- if (au_test_loopback_kthread()) {
35830+ if (0 && au_test_loopback_kthread()) {
35831 au_warn_loopback(h_file->f_path.dentry->d_sb);
35832 if (file->f_mapping != h_file->f_mapping) {
35833 file->f_mapping = h_file->f_mapping;
35834diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
8cdd5066 35835index 5711e7a..9df5d16 100644
5527c038
JR
35836--- a/fs/aufs/loop.c
35837+++ b/fs/aufs/loop.c
79b8bda9
AM
35838@@ -131,3 +131,19 @@ void au_loopback_fin(void)
35839 symbol_put(loop_backing_file);
5527c038
JR
35840 kfree(au_warn_loopback_array);
35841 }
35842+
35843+/* ---------------------------------------------------------------------- */
35844+
35845+/* support the loopback block device insude aufs */
35846+
35847+struct file *aufs_real_loop(struct file *file)
35848+{
35849+ struct file *f;
35850+
35851+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
35852+ fi_read_lock(file);
35853+ f = au_hf_top(file);
35854+ fi_read_unlock(file);
35855+ AuDebugOn(!f);
35856+ return f;
35857+}
35858diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
8cdd5066 35859index 48bf070..66afec7 100644
5527c038
JR
35860--- a/fs/aufs/loop.h
35861+++ b/fs/aufs/loop.h
35862@@ -25,7 +25,11 @@ void au_warn_loopback(struct super_block *h_sb);
35863
35864 int au_loopback_init(void);
35865 void au_loopback_fin(void);
35866+
35867+struct file *aufs_real_loop(struct file *file);
35868 #else
35869+AuStub(struct file *, loop_backing_file, return NULL)
35870+
35871 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
35872 struct dentry *h_adding)
35873 AuStubInt0(au_test_loopback_kthread, void)
35874@@ -33,6 +37,8 @@ AuStubVoid(au_warn_loopback, struct super_block *h_sb)
35875
35876 AuStubInt0(au_loopback_init, void)
35877 AuStubVoid(au_loopback_fin, void)
35878+
35879+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
35880 #endif /* BLK_DEV_LOOP */
35881
35882 #endif /* __KERNEL__ */
35883diff --git a/fs/aufs/super.c b/fs/aufs/super.c
8cdd5066 35884index b41d789..51d2fb9 100644
5527c038
JR
35885--- a/fs/aufs/super.c
35886+++ b/fs/aufs/super.c
be52b249 35887@@ -832,7 +832,10 @@ static const struct super_operations aufs_sop = {
5527c038
JR
35888 .statfs = aufs_statfs,
35889 .put_super = aufs_put_super,
35890 .sync_fs = aufs_sync_fs,
35891- .remount_fs = aufs_remount_fs
35892+ .remount_fs = aufs_remount_fs,
35893+#ifdef CONFIG_AUFS_BDEV_LOOP
35894+ .real_loop = aufs_real_loop
35895+#endif
35896 };
35897
35898 /* ---------------------------------------------------------------------- */
35899diff --git a/include/linux/fs.h b/include/linux/fs.h
be52b249 35900index 8d48506..5246785 100644
5527c038
JR
35901--- a/include/linux/fs.h
35902+++ b/include/linux/fs.h
be52b249 35903@@ -1719,6 +1719,10 @@ struct super_operations {
5527c038
JR
35904 struct shrink_control *);
35905 long (*free_cached_objects)(struct super_block *,
35906 struct shrink_control *);
35907+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
35908+ /* and aufs */
35909+ struct file *(*real_loop)(struct file *);
35910+#endif
35911 };
35912
35913 /*
This page took 6.130765 seconds and 4 git commands to generate.