]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-aufs3.patch
- fix pointer incompatibility errors
[packages/kernel.git] / kernel-aufs3.patch
CommitLineData
7f2ca4b1 1aufs3.18.25+ kbuild patch
7f207e10
AM
2
3diff --git a/fs/Kconfig b/fs/Kconfig
c1595e42 4index 664991a..1481093 100644
7f207e10
AM
5--- a/fs/Kconfig
6+++ b/fs/Kconfig
c1595e42 7@@ -210,6 +210,7 @@ source "fs/ufs/Kconfig"
7f207e10 8 source "fs/exofs/Kconfig"
1716fcea 9 source "fs/f2fs/Kconfig"
c06a8ce3 10 source "fs/efivarfs/Kconfig"
7f207e10
AM
11+source "fs/aufs/Kconfig"
12
13 endif # MISC_FILESYSTEMS
14
15diff --git a/fs/Makefile b/fs/Makefile
c1595e42 16index da0bbb4..c8bc724 100644
7f207e10
AM
17--- a/fs/Makefile
18+++ b/fs/Makefile
c1595e42 19@@ -126,3 +126,4 @@ obj-y += exofs/ # Multiple modules
7f207e10 20 obj-$(CONFIG_CEPH_FS) += ceph/
bf0370f2 21 obj-$(CONFIG_PSTORE) += pstore/
c06a8ce3 22 obj-$(CONFIG_EFIVAR_FS) += efivarfs/
86dc4139 23+obj-$(CONFIG_AUFS_FS) += aufs/
c06a8ce3 24diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
c1595e42 25index 8523f9b..11f8f74 100644
c06a8ce3
AM
26--- a/include/uapi/linux/Kbuild
27+++ b/include/uapi/linux/Kbuild
28@@ -56,6 +56,7 @@ header-y += atmppp.h
29 header-y += atmsap.h
30 header-y += atmsvc.h
31 header-y += audit.h
32+header-y += aufs_type.h
33 header-y += auto_fs.h
34 header-y += auto_fs4.h
35 header-y += auxvec.h
7f2ca4b1 36aufs3.18.25+ base patch
7f207e10 37
c1595e42
JR
38diff --git a/MAINTAINERS b/MAINTAINERS
39index c721042..83801d0 100644
40--- a/MAINTAINERS
41+++ b/MAINTAINERS
42@@ -1795,6 +1795,20 @@ F: include/linux/audit.h
43 F: include/uapi/linux/audit.h
44 F: kernel/audit*
45
46+AUFS (advanced multi layered unification filesystem) FILESYSTEM
47+M: "J. R. Okajima" <hooanon05g@gmail.com>
48+L: linux-unionfs@vger.kernel.org
49+L: aufs-users@lists.sourceforge.net (members only)
50+W: http://aufs.sourceforge.net
51+T: git://git.code.sf.net/p/aufs/aufs3-linux
52+T: git://github.com/sfjro/aufs3-linux.git
53+S: Supported
54+F: Documentation/filesystems/aufs/
55+F: Documentation/ABI/testing/debugfs-aufs
56+F: Documentation/ABI/testing/sysfs-aufs
57+F: fs/aufs/
58+F: include/uapi/linux/aufs_type.h
59+
60 AUXILIARY DISPLAY DRIVERS
61 M: Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
62 W: http://miguelojeda.es/auxdisplay.htm
392086de 63diff --git a/drivers/block/loop.c b/drivers/block/loop.c
076b876e 64index 6cb1beb..30efd68 100644
392086de
AM
65--- a/drivers/block/loop.c
66+++ b/drivers/block/loop.c
fb47a38f 67@@ -692,6 +692,24 @@ static inline int is_loop_device(struct file *file)
392086de
AM
68 return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
69 }
70
71+/*
72+ * for AUFS
73+ * no get/put for file.
74+ */
75+struct file *loop_backing_file(struct super_block *sb)
76+{
77+ struct file *ret;
78+ struct loop_device *l;
79+
80+ ret = NULL;
81+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
82+ l = sb->s_bdev->bd_disk->private_data;
83+ ret = l->lo_backing_file;
84+ }
85+ return ret;
86+}
87+EXPORT_SYMBOL(loop_backing_file);
88+
89 /* loop sysfs attributes */
90
91 static ssize_t loop_attr_show(struct device *dev, char *page,
c1595e42 92diff --git a/fs/dcache.c b/fs/dcache.c
7f2ca4b1 93index d25f8fd..857990a 100644
c1595e42
JR
94--- a/fs/dcache.c
95+++ b/fs/dcache.c
7f2ca4b1 96@@ -1022,7 +1022,7 @@ enum d_walk_ret {
c1595e42
JR
97 *
98 * The @enter() and @finish() callbacks are called with d_lock held.
99 */
100-static void d_walk(struct dentry *parent, void *data,
101+void d_walk(struct dentry *parent, void *data,
102 enum d_walk_ret (*enter)(void *, struct dentry *),
103 void (*finish)(void *))
104 {
0c3ec466 105diff --git a/fs/inode.c b/fs/inode.c
7f2ca4b1 106index 56d1d2b..2998e86 100644
0c3ec466
AM
107--- a/fs/inode.c
108+++ b/fs/inode.c
c1595e42 109@@ -1497,7 +1497,7 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
0c3ec466
AM
110 * This does the actual work of updating an inodes time or version. Must have
111 * had called mnt_want_write() before calling this.
112 */
113-static int update_time(struct inode *inode, struct timespec *time, int flags)
114+int update_time(struct inode *inode, struct timespec *time, int flags)
115 {
116 if (inode->i_op->update_time)
117 return inode->i_op->update_time(inode, time, flags);
7f207e10 118diff --git a/fs/splice.c b/fs/splice.c
c1595e42 119index 75c6058..619359a 100644
7f207e10
AM
120--- a/fs/splice.c
121+++ b/fs/splice.c
076b876e 122@@ -1114,8 +1114,8 @@ EXPORT_SYMBOL(generic_splice_sendpage);
7f207e10
AM
123 /*
124 * Attempt to initiate a splice from pipe to file.
125 */
126-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
127- loff_t *ppos, size_t len, unsigned int flags)
128+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
129+ loff_t *ppos, size_t len, unsigned int flags)
130 {
131 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,
132 loff_t *, size_t, unsigned int);
076b876e 133@@ -1131,9 +1131,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
7f207e10
AM
134 /*
135 * Attempt to initiate a splice from a file to a pipe.
136 */
137-static long do_splice_to(struct file *in, loff_t *ppos,
138- struct pipe_inode_info *pipe, size_t len,
139- unsigned int flags)
140+long do_splice_to(struct file *in, loff_t *ppos,
141+ struct pipe_inode_info *pipe, size_t len,
142+ unsigned int flags)
143 {
144 ssize_t (*splice_read)(struct file *, loff_t *,
145 struct pipe_inode_info *, size_t, unsigned int);
7f2ca4b1
JR
146diff --git a/include/linux/file.h b/include/linux/file.h
147index 4d69123..62cffc0 100644
148--- a/include/linux/file.h
149+++ b/include/linux/file.h
150@@ -19,6 +19,7 @@ struct dentry;
151 struct path;
152 extern struct file *alloc_file(struct path *, fmode_t mode,
153 const struct file_operations *fop);
154+extern struct file *get_empty_filp(void);
155
156 static inline void fput_light(struct file *file, int fput_needed)
157 {
0c3ec466 158diff --git a/include/linux/fs.h b/include/linux/fs.h
7f2ca4b1 159index 6fd017e..e35dc41 100644
0c3ec466
AM
160--- a/include/linux/fs.h
161+++ b/include/linux/fs.h
7f2ca4b1 162@@ -2662,6 +2662,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *);
0c3ec466
AM
163 extern int inode_newsize_ok(const struct inode *, loff_t offset);
164 extern void setattr_copy(struct inode *inode, const struct iattr *attr);
165
166+extern int update_time(struct inode *, struct timespec *, int);
167 extern int file_update_time(struct file *file);
168
169 extern int generic_show_options(struct seq_file *m, struct dentry *root);
1e00d052 170diff --git a/include/linux/splice.h b/include/linux/splice.h
076b876e 171index da2751d..2e0fca6 100644
1e00d052
AM
172--- a/include/linux/splice.h
173+++ b/include/linux/splice.h
076b876e 174@@ -83,4 +83,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
4b3da204
AM
175 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
176
177 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
1e00d052
AM
178+
179+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
180+ loff_t *ppos, size_t len, unsigned int flags);
181+extern long do_splice_to(struct file *in, loff_t *ppos,
182+ struct pipe_inode_info *pipe, size_t len,
183+ unsigned int flags);
184 #endif
7f2ca4b1 185aufs3.18.25+ mmap patch
fb47a38f
JR
186
187diff --git a/fs/buffer.c b/fs/buffer.c
c1595e42 188index 20805db..363569f 100644
fb47a38f
JR
189--- a/fs/buffer.c
190+++ b/fs/buffer.c
c1595e42 191@@ -2450,7 +2450,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
fb47a38f
JR
192 * Update file times before taking page lock. We may end up failing the
193 * fault so this update may be superfluous but who really cares...
194 */
195- file_update_time(vma->vm_file);
196+ vma_file_update_time(vma);
197
198 ret = __block_page_mkwrite(vma, vmf, get_block);
199 sb_end_pagefault(sb);
c1595e42 200diff --git a/fs/proc/base.c b/fs/proc/base.c
7f2ca4b1 201index 7dc3ea8..b368ad5 100644
c1595e42
JR
202--- a/fs/proc/base.c
203+++ b/fs/proc/base.c
204@@ -1735,7 +1735,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
205 down_read(&mm->mmap_sem);
206 vma = find_exact_vma(mm, vm_start, vm_end);
207 if (vma && vma->vm_file) {
208- *path = vma->vm_file->f_path;
209+ *path = vma_pr_or_file(vma)->f_path;
210 path_get(path);
211 rc = 0;
212 }
fb47a38f 213diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
076b876e 214index d4a3574..1397181 100644
fb47a38f
JR
215--- a/fs/proc/nommu.c
216+++ b/fs/proc/nommu.c
076b876e 217@@ -45,7 +45,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
fb47a38f
JR
218 file = region->vm_file;
219
220 if (file) {
221- struct inode *inode = file_inode(region->vm_file);
222+ struct inode *inode;
076b876e 223+
fb47a38f
JR
224+ file = vmr_pr_or_file(region);
225+ inode = file_inode(file);
226 dev = inode->i_sb->s_dev;
227 ino = inode->i_ino;
228 }
229diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
7f2ca4b1 230index 69aa378..426b962 100644
fb47a38f
JR
231--- a/fs/proc/task_mmu.c
232+++ b/fs/proc/task_mmu.c
c1595e42 233@@ -276,7 +276,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
fb47a38f
JR
234 const char *name = NULL;
235
236 if (file) {
237- struct inode *inode = file_inode(vma->vm_file);
238+ struct inode *inode;
076b876e 239+
fb47a38f
JR
240+ file = vma_pr_or_file(vma);
241+ inode = file_inode(file);
242 dev = inode->i_sb->s_dev;
243 ino = inode->i_ino;
244 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
7f2ca4b1 245@@ -1447,7 +1450,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
076b876e
AM
246 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
247 struct vm_area_struct *vma = v;
248 struct numa_maps *md = &numa_priv->md;
249- struct file *file = vma->vm_file;
250+ struct file *file = vma_pr_or_file(vma);
076b876e
AM
251 struct mm_struct *mm = vma->vm_mm;
252 struct mm_walk walk = {};
e26ee53e 253 struct mempolicy *pol;
fb47a38f 254diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
7f2ca4b1 255index 599ec2e..1740207 100644
fb47a38f
JR
256--- a/fs/proc/task_nommu.c
257+++ b/fs/proc/task_nommu.c
c1595e42 258@@ -160,7 +160,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
fb47a38f
JR
259 file = vma->vm_file;
260
261 if (file) {
262- struct inode *inode = file_inode(vma->vm_file);
263+ struct inode *inode;
076b876e 264+
7f2ca4b1 265+ file = vma_pr_or_file(vma);
fb47a38f
JR
266+ inode = file_inode(file);
267 dev = inode->i_sb->s_dev;
268 ino = inode->i_ino;
269 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
270diff --git a/include/linux/mm.h b/include/linux/mm.h
7f2ca4b1 271index 86a977b..a2d0dbb2 100644
fb47a38f
JR
272--- a/include/linux/mm.h
273+++ b/include/linux/mm.h
7f2ca4b1 274@@ -1208,6 +1208,28 @@ static inline int fixup_user_fault(struct task_struct *tsk,
fb47a38f
JR
275 }
276 #endif
277
076b876e
AM
278+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
279+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
280+ int);
281+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
282+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
fb47a38f 283+
fb47a38f
JR
284+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
285+ __LINE__)
286+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
287+ __LINE__)
288+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
289+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
7f2ca4b1
JR
290+
291+#ifndef CONFIG_MMU
076b876e
AM
292+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
293+extern void vmr_do_fput(struct vm_region *, const char[], int);
294+
295+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
296+ __LINE__)
297+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
7f2ca4b1 298+#endif /* !CONFIG_MMU */
fb47a38f
JR
299+
300 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
301 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
302 void *buf, int len, int write);
303diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
c1595e42 304index 6e0b286..8f374ed 100644
fb47a38f
JR
305--- a/include/linux/mm_types.h
306+++ b/include/linux/mm_types.h
38d290e6 307@@ -232,6 +232,7 @@ struct vm_region {
fb47a38f
JR
308 unsigned long vm_top; /* region allocated to here */
309 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
310 struct file *vm_file; /* the backing file or NULL */
311+ struct file *vm_prfile; /* the virtual backing file or NULL */
312
313 int vm_usage; /* region usage count (access under nommu_region_sem) */
314 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
38d290e6 315@@ -300,6 +301,7 @@ struct vm_area_struct {
fb47a38f
JR
316 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
317 units, *not* PAGE_CACHE_SIZE */
318 struct file * vm_file; /* File we map to (can be NULL). */
319+ struct file *vm_prfile; /* shadow of vm_file */
320 void * vm_private_data; /* was vm_pte (shared mem) */
321
322 #ifndef CONFIG_MMU
323diff --git a/kernel/fork.c b/kernel/fork.c
7f2ca4b1 324index 0a4f601..67ecb91 100644
fb47a38f
JR
325--- a/kernel/fork.c
326+++ b/kernel/fork.c
c1595e42 327@@ -430,7 +430,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
fb47a38f
JR
328 struct inode *inode = file_inode(file);
329 struct address_space *mapping = file->f_mapping;
330
331- get_file(file);
332+ vma_get_file(tmp);
333 if (tmp->vm_flags & VM_DENYWRITE)
334 atomic_dec(&inode->i_writecount);
335 mutex_lock(&mapping->i_mmap_mutex);
076b876e 336diff --git a/mm/Makefile b/mm/Makefile
c1595e42 337index 8405eb0..e0bda2d 100644
076b876e
AM
338--- a/mm/Makefile
339+++ b/mm/Makefile
c1595e42 340@@ -18,7 +18,7 @@ obj-y := filemap.o mempool.o oom_kill.o \
076b876e 341 mm_init.o mmu_context.o percpu.o slab_common.o \
c1595e42 342 compaction.o vmacache.o \
076b876e 343 interval_tree.o list_lru.o workingset.o \
e26ee53e 344- iov_iter.o debug.o $(mmu-y)
c1595e42 345+ iov_iter.o prfile.o debug.o $(mmu-y)
076b876e
AM
346
347 obj-y += init-mm.o
348
fb47a38f 349diff --git a/mm/filemap.c b/mm/filemap.c
7f2ca4b1 350index 7e6ab98..2fe1e57 100644
fb47a38f
JR
351--- a/mm/filemap.c
352+++ b/mm/filemap.c
7f2ca4b1 353@@ -2063,7 +2063,7 @@ int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
fb47a38f
JR
354 int ret = VM_FAULT_LOCKED;
355
356 sb_start_pagefault(inode->i_sb);
357- file_update_time(vma->vm_file);
358+ vma_file_update_time(vma);
359 lock_page(page);
360 if (page->mapping != inode->i_mapping) {
361 unlock_page(page);
362diff --git a/mm/fremap.c b/mm/fremap.c
076b876e 363index 72b8fa3..a00bbf0 100644
fb47a38f
JR
364--- a/mm/fremap.c
365+++ b/mm/fremap.c
076b876e 366@@ -224,16 +224,28 @@ get_write_lock:
38d290e6
JR
367 */
368 if (mapping_cap_account_dirty(mapping)) {
369 unsigned long addr;
370- struct file *file = get_file(vma->vm_file);
371+ struct file *file = vma->vm_file,
372+ *prfile = vma->vm_prfile;
373+
fb47a38f
JR
374 /* mmap_region may free vma; grab the info now */
375 vm_flags = vma->vm_flags;
376
377+ vma_get_file(vma);
378 addr = mmap_region(file, start, size, vm_flags, pgoff);
38d290e6 379- fput(file);
fb47a38f 380+ vma_fput(vma);
fb47a38f
JR
381 if (IS_ERR_VALUE(addr)) {
382 err = addr;
38d290e6
JR
383 } else {
384 BUG_ON(addr != start);
385+ if (prfile) {
386+ struct vm_area_struct *new_vma;
076b876e 387+
38d290e6
JR
388+ new_vma = find_vma(mm, addr);
389+ if (!new_vma->vm_prfile)
390+ new_vma->vm_prfile = prfile;
391+ if (new_vma != vma)
392+ get_file(prfile);
393+ }
394 err = 0;
395 }
396 goto out_freed;
fb47a38f 397diff --git a/mm/memory.c b/mm/memory.c
7f2ca4b1 398index 90fb265..844df2e 100644
fb47a38f
JR
399--- a/mm/memory.c
400+++ b/mm/memory.c
7f2ca4b1
JR
401@@ -2156,7 +2156,7 @@ reuse:
402
fb47a38f
JR
403 /* file_update_time outside page_lock */
404 if (vma->vm_file)
405- file_update_time(vma->vm_file);
406+ vma_file_update_time(vma);
407 }
408 put_page(dirty_page);
409 if (page_mkwrite) {
fb47a38f 410diff --git a/mm/mmap.c b/mm/mmap.c
7f2ca4b1 411index f88b4f9..9994987 100644
fb47a38f
JR
412--- a/mm/mmap.c
413+++ b/mm/mmap.c
c1595e42 414@@ -277,7 +277,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
fb47a38f
JR
415 if (vma->vm_ops && vma->vm_ops->close)
416 vma->vm_ops->close(vma);
417 if (vma->vm_file)
418- fput(vma->vm_file);
419+ vma_fput(vma);
420 mpol_put(vma_policy(vma));
421 kmem_cache_free(vm_area_cachep, vma);
422 return next;
c1595e42 423@@ -895,7 +895,7 @@ again: remove_next = 1 + (end > next->vm_end);
fb47a38f
JR
424 if (remove_next) {
425 if (file) {
426 uprobe_munmap(next, next->vm_start, next->vm_end);
427- fput(file);
428+ vma_fput(vma);
429 }
430 if (next->anon_vma)
431 anon_vma_merge(vma, next);
c1595e42 432@@ -1680,8 +1680,8 @@ out:
35939ee7
JR
433 return addr;
434
fb47a38f 435 unmap_and_free_vma:
fb47a38f
JR
436+ vma_fput(vma);
437 vma->vm_file = NULL;
438- fput(file);
439
440 /* Undo any partial mapping done by a device driver. */
441 unmap_region(mm, vma, prev, vma->vm_start, vma->vm_end);
7f2ca4b1 442@@ -2480,7 +2480,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
443 goto out_free_mpol;
444
445 if (new->vm_file)
446- get_file(new->vm_file);
447+ vma_get_file(new);
448
449 if (new->vm_ops && new->vm_ops->open)
450 new->vm_ops->open(new);
7f2ca4b1 451@@ -2499,7 +2499,7 @@ static int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
fb47a38f
JR
452 if (new->vm_ops && new->vm_ops->close)
453 new->vm_ops->close(new);
454 if (new->vm_file)
455- fput(new->vm_file);
456+ vma_fput(new);
457 unlink_anon_vmas(new);
458 out_free_mpol:
459 mpol_put(vma_policy(new));
7f2ca4b1 460@@ -2889,7 +2889,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
fb47a38f
JR
461 if (anon_vma_clone(new_vma, vma))
462 goto out_free_mempol;
463 if (new_vma->vm_file)
464- get_file(new_vma->vm_file);
465+ vma_get_file(new_vma);
466 if (new_vma->vm_ops && new_vma->vm_ops->open)
467 new_vma->vm_ops->open(new_vma);
468 vma_link(mm, new_vma, prev, rb_link, rb_parent);
fb47a38f 469diff --git a/mm/nommu.c b/mm/nommu.c
7f2ca4b1 470index b5ba5bc..a7662fc 100644
fb47a38f
JR
471--- a/mm/nommu.c
472+++ b/mm/nommu.c
076b876e 473@@ -658,7 +658,7 @@ static void __put_nommu_region(struct vm_region *region)
fb47a38f
JR
474 up_write(&nommu_region_sem);
475
476 if (region->vm_file)
477- fput(region->vm_file);
478+ vmr_fput(region);
479
480 /* IO memory and memory shared directly out of the pagecache
481 * from ramfs/tmpfs mustn't be released here */
076b876e 482@@ -823,7 +823,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
fb47a38f
JR
483 if (vma->vm_ops && vma->vm_ops->close)
484 vma->vm_ops->close(vma);
485 if (vma->vm_file)
486- fput(vma->vm_file);
487+ vma_fput(vma);
488 put_nommu_region(vma->vm_region);
489 kmem_cache_free(vm_area_cachep, vma);
490 }
076b876e 491@@ -1385,7 +1385,7 @@ unsigned long do_mmap_pgoff(struct file *file,
fb47a38f
JR
492 goto error_just_free;
493 }
494 }
495- fput(region->vm_file);
496+ vmr_fput(region);
497 kmem_cache_free(vm_region_jar, region);
498 region = pregion;
499 result = start;
076b876e 500@@ -1461,10 +1461,10 @@ error_just_free:
fb47a38f
JR
501 up_write(&nommu_region_sem);
502 error:
503 if (region->vm_file)
504- fput(region->vm_file);
505+ vmr_fput(region);
506 kmem_cache_free(vm_region_jar, region);
507 if (vma->vm_file)
508- fput(vma->vm_file);
509+ vma_fput(vma);
510 kmem_cache_free(vm_area_cachep, vma);
511 kleave(" = %d", ret);
512 return ret;
076b876e
AM
513diff --git a/mm/prfile.c b/mm/prfile.c
514new file mode 100644
7f2ca4b1 515index 0000000..532e518
076b876e
AM
516--- /dev/null
517+++ b/mm/prfile.c
518@@ -0,0 +1,86 @@
519+/*
520+ * Mainly for aufs which mmap(2) diffrent file and wants to print different path
521+ * in /proc/PID/maps.
522+ * Call these functions via macros defined in linux/mm.h.
523+ *
524+ * See Documentation/filesystems/aufs/design/06mmap.txt
525+ *
526+ * Copyright (c) 2014 Junjro R. Okajima
527+ * Copyright (c) 2014 Ian Campbell
528+ */
529+
530+#include <linux/mm.h>
531+#include <linux/file.h>
532+#include <linux/fs.h>
533+
534+/* #define PRFILE_TRACE */
535+static inline void prfile_trace(struct file *f, struct file *pr,
536+ const char func[], int line, const char func2[])
537+{
538+#ifdef PRFILE_TRACE
539+ if (pr)
7f2ca4b1 540+ pr_info("%s:%d: %s, %s\n", func, line, func2,
076b876e
AM
541+ f ? (char *)f->f_dentry->d_name.name : "(null)");
542+#endif
543+}
544+
076b876e
AM
545+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
546+ int line)
547+{
548+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
549+
550+ prfile_trace(f, pr, func, line, __func__);
551+ file_update_time(f);
552+ if (f && pr)
553+ file_update_time(pr);
554+}
555+
556+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
557+ int line)
558+{
559+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
560+
561+ prfile_trace(f, pr, func, line, __func__);
562+ return (f && pr) ? pr : f;
563+}
564+
565+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
566+{
567+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
568+
569+ prfile_trace(f, pr, func, line, __func__);
570+ get_file(f);
571+ if (f && pr)
572+ get_file(pr);
573+}
574+
575+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
576+{
577+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
578+
579+ prfile_trace(f, pr, func, line, __func__);
580+ fput(f);
581+ if (f && pr)
582+ fput(pr);
583+}
7f2ca4b1
JR
584+
585+#ifndef CONFIG_MMU
076b876e
AM
586+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
587+ int line)
588+{
589+ struct file *f = region->vm_file, *pr = region->vm_prfile;
590+
591+ prfile_trace(f, pr, func, line, __func__);
592+ return (f && pr) ? pr : f;
593+}
594+
595+void vmr_do_fput(struct vm_region *region, const char func[], int line)
596+{
597+ struct file *f = region->vm_file, *pr = region->vm_prfile;
598+
599+ prfile_trace(f, pr, func, line, __func__);
600+ fput(f);
601+ if (f && pr)
602+ fput(pr);
603+}
7f2ca4b1
JR
604+#endif /* !CONFIG_MMU */
605aufs3.18.25+ standalone patch
7f207e10 606
c1595e42 607diff --git a/fs/dcache.c b/fs/dcache.c
7f2ca4b1 608index 857990a..4d3b72c 100644
c1595e42
JR
609--- a/fs/dcache.c
610+++ b/fs/dcache.c
7f2ca4b1 611@@ -1127,6 +1127,7 @@ rename_retry:
c1595e42
JR
612 seq = 1;
613 goto again;
614 }
615+EXPORT_SYMBOL(d_walk);
616
617 /*
618 * Search for at least 1 mount point in the dentry's subdirs.
7f2ca4b1
JR
619diff --git a/fs/file_table.c b/fs/file_table.c
620index 3f85411..8975b8f 100644
621--- a/fs/file_table.c
622+++ b/fs/file_table.c
623@@ -147,6 +147,7 @@ over:
624 }
625 return ERR_PTR(-ENFILE);
626 }
627+EXPORT_SYMBOL(get_empty_filp);
628
629 /**
630 * alloc_file - allocate and initialize a 'struct file'
631@@ -258,6 +259,7 @@ void flush_delayed_fput(void)
632 {
633 delayed_fput(NULL);
634 }
635+EXPORT_SYMBOL(flush_delayed_fput);
636
637 static DECLARE_DELAYED_WORK(delayed_fput_work, delayed_fput);
638
639@@ -300,6 +302,7 @@ void __fput_sync(struct file *file)
640 }
641
642 EXPORT_SYMBOL(fput);
643+EXPORT_SYMBOL(__fput_sync);
644
645 void put_filp(struct file *file)
646 {
647@@ -308,6 +311,7 @@ void put_filp(struct file *file)
648 file_free(file);
649 }
650 }
651+EXPORT_SYMBOL(put_filp);
652
653 void __init files_init(unsigned long mempages)
654 {
1e00d052 655diff --git a/fs/inode.c b/fs/inode.c
7f2ca4b1 656index 2998e86..dc11e3c 100644
1e00d052
AM
657--- a/fs/inode.c
658+++ b/fs/inode.c
392086de 659@@ -57,6 +57,7 @@ static struct hlist_head *inode_hashtable __read_mostly;
4b3da204 660 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock);
2cbb1c4b
JR
661
662 __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock);
2cbb1c4b 663+EXPORT_SYMBOL(inode_sb_list_lock);
7f207e10
AM
664
665 /*
4b3da204 666 * Empty aops. Can be used for the cases where the user does not
c1595e42 667@@ -1513,6 +1514,7 @@ int update_time(struct inode *inode, struct timespec *time, int flags)
0c3ec466
AM
668 mark_inode_dirty_sync(inode);
669 return 0;
670 }
671+EXPORT_SYMBOL(update_time);
672
673 /**
674 * touch_atime - update the access time
7f207e10 675diff --git a/fs/namespace.c b/fs/namespace.c
7f2ca4b1 676index da23ad8..1fe9810 100644
7f207e10
AM
677--- a/fs/namespace.c
678+++ b/fs/namespace.c
c1595e42 679@@ -454,6 +454,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
c06a8ce3
AM
680 mnt_dec_writers(real_mount(mnt));
681 preempt_enable();
682 }
683+EXPORT_SYMBOL_GPL(__mnt_drop_write);
684
685 /**
686 * mnt_drop_write - give up write access to a mount
7f2ca4b1 687@@ -1736,6 +1737,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
7f207e10
AM
688 }
689 return 0;
690 }
691+EXPORT_SYMBOL(iterate_mounts);
692
7eafdf33 693 static void cleanup_group_ids(struct mount *mnt, struct mount *end)
7f207e10
AM
694 {
695diff --git a/fs/notify/group.c b/fs/notify/group.c
c1595e42 696index d16b62c..06ca6bc 100644
7f207e10
AM
697--- a/fs/notify/group.c
698+++ b/fs/notify/group.c
699@@ -22,6 +22,7 @@
700 #include <linux/srcu.h>
701 #include <linux/rculist.h>
702 #include <linux/wait.h>
703+#include <linux/module.h>
704
705 #include <linux/fsnotify_backend.h>
706 #include "fsnotify.h"
fb47a38f 707@@ -72,6 +73,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
1716fcea
AM
708 {
709 atomic_inc(&group->refcnt);
710 }
711+EXPORT_SYMBOL(fsnotify_get_group);
712
713 /*
714 * Drop a reference to a group. Free it if it's through.
fb47a38f 715@@ -81,6 +83,7 @@ void fsnotify_put_group(struct fsnotify_group *group)
7f207e10 716 if (atomic_dec_and_test(&group->refcnt))
1716fcea 717 fsnotify_final_destroy_group(group);
7f207e10
AM
718 }
719+EXPORT_SYMBOL(fsnotify_put_group);
720
721 /*
722 * Create a new fsnotify_group and hold a reference for the group returned.
fb47a38f 723@@ -109,6 +112,7 @@ struct fsnotify_group *fsnotify_alloc_group(const struct fsnotify_ops *ops)
7f207e10
AM
724
725 return group;
726 }
727+EXPORT_SYMBOL(fsnotify_alloc_group);
1716fcea
AM
728
729 int fsnotify_fasync(int fd, struct file *file, int on)
730 {
7f207e10 731diff --git a/fs/notify/mark.c b/fs/notify/mark.c
7f2ca4b1 732index 28c90a6..b48025e 100644
7f207e10
AM
733--- a/fs/notify/mark.c
734+++ b/fs/notify/mark.c
392086de 735@@ -109,6 +109,7 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
7f207e10 736 mark->free_mark(mark);
1716fcea 737 }
7f207e10
AM
738 }
739+EXPORT_SYMBOL(fsnotify_put_mark);
740
741 /*
742 * Any time a mark is getting freed we end up here.
392086de 743@@ -191,6 +192,7 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
1716fcea
AM
744 fsnotify_destroy_mark_locked(mark, group);
745 mutex_unlock(&group->mark_mutex);
7f207e10
AM
746 }
747+EXPORT_SYMBOL(fsnotify_destroy_mark);
748
749 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
750 {
c1595e42 751@@ -311,6 +313,7 @@ err:
7f207e10
AM
752
753 return ret;
754 }
755+EXPORT_SYMBOL(fsnotify_add_mark);
756
1716fcea
AM
757 int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
758 struct inode *inode, struct vfsmount *mnt, int allow_dups)
7f2ca4b1 759@@ -392,6 +395,7 @@ void fsnotify_init_mark(struct fsnotify_mark *mark,
7f207e10
AM
760 atomic_set(&mark->refcnt, 1);
761 mark->free_mark = free_mark;
762 }
763+EXPORT_SYMBOL(fsnotify_init_mark);
764
765 static int fsnotify_mark_destroy(void *ignored)
766 {
767diff --git a/fs/open.c b/fs/open.c
7f2ca4b1 768index d058ff1..696084a 100644
7f207e10
AM
769--- a/fs/open.c
770+++ b/fs/open.c
523b37e3 771@@ -62,6 +62,7 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
7f207e10
AM
772 mutex_unlock(&dentry->d_inode->i_mutex);
773 return ret;
774 }
775+EXPORT_SYMBOL(do_truncate);
776
1716fcea 777 long vfs_truncate(struct path *path, loff_t length)
7f207e10 778 {
076b876e 779@@ -298,6 +299,7 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
38d290e6
JR
780 sb_end_write(inode->i_sb);
781 return ret;
782 }
783+EXPORT_SYMBOL(do_fallocate);
784
785 SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
786 {
7f2ca4b1
JR
787@@ -663,6 +665,7 @@ int open_check_o_direct(struct file *f)
788 }
789 return 0;
790 }
791+EXPORT_SYMBOL(open_check_o_direct);
792
793 static int do_dentry_open(struct file *f,
794 struct inode *inode,
7f207e10 795diff --git a/fs/splice.c b/fs/splice.c
c1595e42 796index 619359a..c14f60e 100644
7f207e10
AM
797--- a/fs/splice.c
798+++ b/fs/splice.c
076b876e 799@@ -1127,6 +1127,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
392086de
AM
800
801 return splice_write(pipe, out, ppos, len, flags);
7f207e10
AM
802 }
803+EXPORT_SYMBOL(do_splice_from);
804
805 /*
806 * Attempt to initiate a splice from a file to a pipe.
076b876e 807@@ -1153,6 +1154,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
7f207e10
AM
808
809 return splice_read(in, ppos, pipe, len, flags);
810 }
811+EXPORT_SYMBOL(do_splice_to);
812
813 /**
814 * splice_direct_to_actor - splices data directly between two non-pipes
c1595e42
JR
815diff --git a/fs/xattr.c b/fs/xattr.c
816index 64e83ef..bd71e53 100644
817--- a/fs/xattr.c
818+++ b/fs/xattr.c
819@@ -207,6 +207,7 @@ vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
820 *xattr_value = value;
821 return error;
822 }
823+EXPORT_SYMBOL(vfs_getxattr_alloc);
824
825 /* Compare an extended attribute value with the given value */
826 int vfs_xattr_cmp(struct dentry *dentry, const char *xattr_name,
7f2ca4b1
JR
827diff --git a/kernel/task_work.c b/kernel/task_work.c
828index 8727032..7003e5a 100644
829--- a/kernel/task_work.c
830+++ b/kernel/task_work.c
831@@ -126,3 +126,4 @@ void task_work_run(void)
832 } while (work);
833 }
834 }
835+EXPORT_SYMBOL(task_work_run);
7f207e10 836diff --git a/security/commoncap.c b/security/commoncap.c
c1595e42 837index bab0611..3fa2f82 100644
7f207e10
AM
838--- a/security/commoncap.c
839+++ b/security/commoncap.c
c1595e42 840@@ -979,9 +979,11 @@ int cap_mmap_addr(unsigned long addr)
94337f0d 841 }
7f207e10
AM
842 return ret;
843 }
0c3ec466
AM
844+EXPORT_SYMBOL(cap_mmap_addr);
845
846 int cap_mmap_file(struct file *file, unsigned long reqprot,
847 unsigned long prot, unsigned long flags)
848 {
849 return 0;
850 }
851+EXPORT_SYMBOL(cap_mmap_file);
7f207e10 852diff --git a/security/device_cgroup.c b/security/device_cgroup.c
c1595e42 853index 188c1d2..426d9af 100644
7f207e10
AM
854--- a/security/device_cgroup.c
855+++ b/security/device_cgroup.c
f6c5ef8b
AM
856@@ -7,6 +7,7 @@
857 #include <linux/device_cgroup.h>
858 #include <linux/cgroup.h>
859 #include <linux/ctype.h>
860+#include <linux/export.h>
861 #include <linux/list.h>
862 #include <linux/uaccess.h>
863 #include <linux/seq_file.h>
076b876e 864@@ -849,6 +850,7 @@ int __devcgroup_inode_permission(struct inode *inode, int mask)
537831f9
AM
865 return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
866 access);
7f207e10 867 }
2cbb1c4b 868+EXPORT_SYMBOL(__devcgroup_inode_permission);
7f207e10
AM
869
870 int devcgroup_inode_mknod(int mode, dev_t dev)
871 {
872diff --git a/security/security.c b/security/security.c
c1595e42 873index 18b35c6..12c67af 100644
7f207e10
AM
874--- a/security/security.c
875+++ b/security/security.c
392086de 876@@ -407,6 +407,7 @@ int security_path_rmdir(struct path *dir, struct dentry *dentry)
7f207e10
AM
877 return 0;
878 return security_ops->path_rmdir(dir, dentry);
879 }
880+EXPORT_SYMBOL(security_path_rmdir);
881
882 int security_path_unlink(struct path *dir, struct dentry *dentry)
883 {
392086de 884@@ -423,6 +424,7 @@ int security_path_symlink(struct path *dir, struct dentry *dentry,
7f207e10
AM
885 return 0;
886 return security_ops->path_symlink(dir, dentry, old_name);
887 }
888+EXPORT_SYMBOL(security_path_symlink);
889
890 int security_path_link(struct dentry *old_dentry, struct path *new_dir,
891 struct dentry *new_dentry)
392086de 892@@ -431,6 +433,7 @@ int security_path_link(struct dentry *old_dentry, struct path *new_dir,
7f207e10
AM
893 return 0;
894 return security_ops->path_link(old_dentry, new_dir, new_dentry);
895 }
896+EXPORT_SYMBOL(security_path_link);
897
898 int security_path_rename(struct path *old_dir, struct dentry *old_dentry,
38d290e6
JR
899 struct path *new_dir, struct dentry *new_dentry,
900@@ -458,6 +461,7 @@ int security_path_truncate(struct path *path)
7f207e10
AM
901 return 0;
902 return security_ops->path_truncate(path);
903 }
904+EXPORT_SYMBOL(security_path_truncate);
905
7eafdf33
AM
906 int security_path_chmod(struct path *path, umode_t mode)
907 {
38d290e6 908@@ -465,6 +469,7 @@ int security_path_chmod(struct path *path, umode_t mode)
7f207e10 909 return 0;
7eafdf33 910 return security_ops->path_chmod(path, mode);
7f207e10
AM
911 }
912+EXPORT_SYMBOL(security_path_chmod);
913
537831f9 914 int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10 915 {
38d290e6 916@@ -472,6 +477,7 @@ int security_path_chown(struct path *path, kuid_t uid, kgid_t gid)
7f207e10
AM
917 return 0;
918 return security_ops->path_chown(path, uid, gid);
919 }
920+EXPORT_SYMBOL(security_path_chown);
921
922 int security_path_chroot(struct path *path)
923 {
38d290e6 924@@ -557,6 +563,7 @@ int security_inode_readlink(struct dentry *dentry)
7f207e10
AM
925 return 0;
926 return security_ops->inode_readlink(dentry);
927 }
928+EXPORT_SYMBOL(security_inode_readlink);
929
930 int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd)
931 {
38d290e6 932@@ -571,6 +578,7 @@ int security_inode_permission(struct inode *inode, int mask)
7f207e10 933 return 0;
1e00d052 934 return security_ops->inode_permission(inode, mask);
7f207e10
AM
935 }
936+EXPORT_SYMBOL(security_inode_permission);
937
1e00d052 938 int security_inode_setattr(struct dentry *dentry, struct iattr *attr)
7f207e10 939 {
38d290e6 940@@ -693,6 +701,7 @@ int security_file_permission(struct file *file, int mask)
7f207e10
AM
941
942 return fsnotify_perm(file, mask);
943 }
944+EXPORT_SYMBOL(security_file_permission);
945
946 int security_file_alloc(struct file *file)
947 {
38d290e6 948@@ -753,6 +762,7 @@ int security_mmap_file(struct file *file, unsigned long prot,
7f207e10
AM
949 return ret;
950 return ima_file_mmap(file, prot);
951 }
0c3ec466 952+EXPORT_SYMBOL(security_mmap_file);
7f207e10 953
0c3ec466
AM
954 int security_mmap_addr(unsigned long addr)
955 {
7f207e10
AM
956diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
957--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 958+++ linux/Documentation/ABI/testing/debugfs-aufs 2016-02-28 11:27:01.273912209 +0100
86dc4139 959@@ -0,0 +1,50 @@
7f207e10
AM
960+What: /debug/aufs/si_<id>/
961+Date: March 2009
f6b6e03d 962+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
963+Description:
964+ Under /debug/aufs, a directory named si_<id> is created
965+ per aufs mount, where <id> is a unique id generated
966+ internally.
1facf9fc 967+
86dc4139
AM
968+What: /debug/aufs/si_<id>/plink
969+Date: Apr 2013
f6b6e03d 970+Contact: J. R. Okajima <hooanon05g@gmail.com>
86dc4139
AM
971+Description:
972+ It has three lines and shows the information about the
973+ pseudo-link. The first line is a single number
974+ representing a number of buckets. The second line is a
975+ number of pseudo-links per buckets (separated by a
976+ blank). The last line is a single number representing a
977+ total number of psedo-links.
978+ When the aufs mount option 'noplink' is specified, it
979+ will show "1\n0\n0\n".
980+
7f207e10
AM
981+What: /debug/aufs/si_<id>/xib
982+Date: March 2009
f6b6e03d 983+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
984+Description:
985+ It shows the consumed blocks by xib (External Inode Number
986+ Bitmap), its block size and file size.
987+ When the aufs mount option 'noxino' is specified, it
988+ will be empty. About XINO files, see the aufs manual.
989+
990+What: /debug/aufs/si_<id>/xino0, xino1 ... xinoN
991+Date: March 2009
f6b6e03d 992+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
993+Description:
994+ It shows the consumed blocks by xino (External Inode Number
995+ Translation Table), its link count, block size and file
996+ size.
997+ When the aufs mount option 'noxino' is specified, it
998+ will be empty. About XINO files, see the aufs manual.
999+
1000+What: /debug/aufs/si_<id>/xigen
1001+Date: March 2009
f6b6e03d 1002+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1003+Description:
1004+ It shows the consumed blocks by xigen (External Inode
1005+ Generation Table), its block size and file size.
1006+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
1007+ be created.
1008+ When the aufs mount option 'noxino' is specified, it
1009+ will be empty. About XINO files, see the aufs manual.
1010diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
1011--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 1012+++ linux/Documentation/ABI/testing/sysfs-aufs 2016-02-28 11:27:01.273912209 +0100
392086de 1013@@ -0,0 +1,31 @@
7f207e10
AM
1014+What: /sys/fs/aufs/si_<id>/
1015+Date: March 2009
f6b6e03d 1016+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1017+Description:
1018+ Under /sys/fs/aufs, a directory named si_<id> is created
1019+ per aufs mount, where <id> is a unique id generated
1020+ internally.
1021+
1022+What: /sys/fs/aufs/si_<id>/br0, br1 ... brN
1023+Date: March 2009
f6b6e03d 1024+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1025+Description:
1026+ It shows the abolute path of a member directory (which
1027+ is called branch) in aufs, and its permission.
1028+
392086de
AM
1029+What: /sys/fs/aufs/si_<id>/brid0, brid1 ... bridN
1030+Date: July 2013
f6b6e03d 1031+Contact: J. R. Okajima <hooanon05g@gmail.com>
392086de
AM
1032+Description:
1033+ It shows the id of a member directory (which is called
1034+ branch) in aufs.
1035+
7f207e10
AM
1036+What: /sys/fs/aufs/si_<id>/xi_path
1037+Date: March 2009
f6b6e03d 1038+Contact: J. R. Okajima <hooanon05g@gmail.com>
7f207e10
AM
1039+Description:
1040+ It shows the abolute path of XINO (External Inode Number
1041+ Bitmap, Translation Table and Generation Table) file
1042+ even if it is the default path.
1043+ When the aufs mount option 'noxino' is specified, it
1044+ will be empty. About XINO files, see the aufs manual.
53392da6
AM
1045diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1046--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
1047+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2016-02-28 11:27:01.273912209 +0100
1048@@ -0,0 +1,170 @@
53392da6 1049+
7f2ca4b1 1050+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1051+#
1052+# This program is free software; you can redistribute it and/or modify
1053+# it under the terms of the GNU General Public License as published by
1054+# the Free Software Foundation; either version 2 of the License, or
1055+# (at your option) any later version.
1056+#
1057+# This program is distributed in the hope that it will be useful,
1058+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1059+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1060+# GNU General Public License for more details.
1061+#
1062+# You should have received a copy of the GNU General Public License
523b37e3 1063+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1064+
1065+Introduction
1066+----------------------------------------
1067+
1068+aufs [ei ju: ef es] | [a u f s]
1069+1. abbrev. for "advanced multi-layered unification filesystem".
1070+2. abbrev. for "another unionfs".
1071+3. abbrev. for "auf das" in German which means "on the" in English.
1072+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1073+ But "Filesystem aufs Filesystem" is hard to understand.
1074+
1075+AUFS is a filesystem with features:
1076+- multi layered stackable unification filesystem, the member directory
1077+ is called as a branch.
1078+- branch permission and attribute, 'readonly', 'real-readonly',
7f2ca4b1 1079+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
53392da6
AM
1080+ combination.
1081+- internal "file copy-on-write".
1082+- logical deletion, whiteout.
1083+- dynamic branch manipulation, adding, deleting and changing permission.
1084+- allow bypassing aufs, user's direct branch access.
1085+- external inode number translation table and bitmap which maintains the
1086+ persistent aufs inode number.
1087+- seekable directory, including NFS readdir.
1088+- file mapping, mmap and sharing pages.
1089+- pseudo-link, hardlink over branches.
1090+- loopback mounted filesystem as a branch.
1091+- several policies to select one among multiple writable branches.
1092+- revert a single systemcall when an error occurs in aufs.
1093+- and more...
1094+
1095+
1096+Multi Layered Stackable Unification Filesystem
1097+----------------------------------------------------------------------
1098+Most people already knows what it is.
1099+It is a filesystem which unifies several directories and provides a
1100+merged single directory. When users access a file, the access will be
1101+passed/re-directed/converted (sorry, I am not sure which English word is
1102+correct) to the real file on the member filesystem. The member
1103+filesystem is called 'lower filesystem' or 'branch' and has a mode
1104+'readonly' and 'readwrite.' And the deletion for a file on the lower
1105+readonly branch is handled by creating 'whiteout' on the upper writable
1106+branch.
1107+
1108+On LKML, there have been discussions about UnionMount (Jan Blunck,
1109+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1110+different approaches to implement the merged-view.
1111+The former tries putting it into VFS, and the latter implements as a
1112+separate filesystem.
1113+(If I misunderstand about these implementations, please let me know and
1114+I shall correct it. Because it is a long time ago when I read their
1115+source files last time).
1116+
1117+UnionMount's approach will be able to small, but may be hard to share
1118+branches between several UnionMount since the whiteout in it is
1119+implemented in the inode on branch filesystem and always
1120+shared. According to Bharata's post, readdir does not seems to be
1121+finished yet.
1122+There are several missing features known in this implementations such as
1123+- for users, the inode number may change silently. eg. copy-up.
1124+- link(2) may break by copy-up.
1125+- read(2) may get an obsoleted filedata (fstat(2) too).
1126+- fcntl(F_SETLK) may be broken by copy-up.
1127+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1128+ open(O_RDWR).
1129+
7f2ca4b1
JR
1130+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1131+merged into mainline. This is another implementation of UnionMount as a
1132+separated filesystem. All the limitations and known problems which
1133+UnionMount are equally inherited to "overlay" filesystem.
1134+
1135+Unionfs has a longer history. When I started implementing a stackable
1136+filesystem (Aug 2005), it already existed. It has virtual super_block,
1137+inode, dentry and file objects and they have an array pointing lower
1138+same kind objects. After contributing many patches for Unionfs, I
1139+re-started my project AUFS (Jun 2006).
53392da6
AM
1140+
1141+In AUFS, the structure of filesystem resembles to Unionfs, but I
1142+implemented my own ideas, approaches and enhancements and it became
1143+totally different one.
1144+
1145+Comparing DM snapshot and fs based implementation
1146+- the number of bytes to be copied between devices is much smaller.
1147+- the type of filesystem must be one and only.
1148+- the fs must be writable, no readonly fs, even for the lower original
1149+ device. so the compression fs will not be usable. but if we use
1150+ loopback mount, we may address this issue.
1151+ for instance,
1152+ mount /cdrom/squashfs.img /sq
1153+ losetup /sq/ext2.img
1154+ losetup /somewhere/cow
1155+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1156+- it will be difficult (or needs more operations) to extract the
1157+ difference between the original device and COW.
1158+- DM snapshot-merge may help a lot when users try merging. in the
1159+ fs-layer union, users will use rsync(1).
1160+
7f2ca4b1
JR
1161+You may want to read my old paper "Filesystems in LiveCD"
1162+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
53392da6 1163+
7f2ca4b1
JR
1164+
1165+Several characters/aspects/persona of aufs
53392da6
AM
1166+----------------------------------------------------------------------
1167+
7f2ca4b1 1168+Aufs has several characters, aspects or persona.
53392da6
AM
1169+1. a filesystem, callee of VFS helper
1170+2. sub-VFS, caller of VFS helper for branches
1171+3. a virtual filesystem which maintains persistent inode number
1172+4. reader/writer of files on branches such like an application
1173+
1174+1. Callee of VFS Helper
1175+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1176+unlink(2) from an application reaches sys_unlink() kernel function and
1177+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1178+calls filesystem specific unlink operation. Actually aufs implements the
1179+unlink operation but it behaves like a redirector.
1180+
1181+2. Caller of VFS Helper for Branches
1182+aufs_unlink() passes the unlink request to the branch filesystem as if
1183+it were called from VFS. So the called unlink operation of the branch
1184+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1185+every necessary pre/post operation for the branch filesystem.
1186+- acquire the lock for the parent dir on a branch
1187+- lookup in a branch
1188+- revalidate dentry on a branch
1189+- mnt_want_write() for a branch
1190+- vfs_unlink() for a branch
1191+- mnt_drop_write() for a branch
1192+- release the lock on a branch
1193+
1194+3. Persistent Inode Number
1195+One of the most important issue for a filesystem is to maintain inode
1196+numbers. This is particularly important to support exporting a
1197+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1198+backend block device for its own. But some storage is necessary to
7f2ca4b1
JR
1199+keep and maintain the inode numbers. It may be a large space and may not
1200+suit to keep in memory. Aufs rents some space from its first writable
1201+branch filesystem (by default) and creates file(s) on it. These files
1202+are created by aufs internally and removed soon (currently) keeping
1203+opened.
53392da6
AM
1204+Note: Because these files are removed, they are totally gone after
1205+ unmounting aufs. It means the inode numbers are not persistent
1206+ across unmount or reboot. I have a plan to make them really
1207+ persistent which will be important for aufs on NFS server.
1208+
1209+4. Read/Write Files Internally (copy-on-write)
1210+Because a branch can be readonly, when you write a file on it, aufs will
1211+"copy-up" it to the upper writable branch internally. And then write the
1212+originally requested thing to the file. Generally kernel doesn't
1213+open/read/write file actively. In aufs, even a single write may cause a
1214+internal "file copy". This behaviour is very similar to cp(1) command.
1215+
1216+Some people may think it is better to pass such work to user space
1217+helper, instead of doing in kernel space. Actually I am still thinking
1218+about it. But currently I have implemented it in kernel space.
1219diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1220--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
1221+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2016-02-28 11:27:01.277245613 +0100
1222@@ -0,0 +1,258 @@
53392da6 1223+
7f2ca4b1 1224+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1225+#
1226+# This program is free software; you can redistribute it and/or modify
1227+# it under the terms of the GNU General Public License as published by
1228+# the Free Software Foundation; either version 2 of the License, or
1229+# (at your option) any later version.
1230+#
1231+# This program is distributed in the hope that it will be useful,
1232+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1233+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1234+# GNU General Public License for more details.
1235+#
1236+# You should have received a copy of the GNU General Public License
523b37e3 1237+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1238+
1239+Basic Aufs Internal Structure
1240+
1241+Superblock/Inode/Dentry/File Objects
1242+----------------------------------------------------------------------
1243+As like an ordinary filesystem, aufs has its own
1244+superblock/inode/dentry/file objects. All these objects have a
1245+dynamically allocated array and store the same kind of pointers to the
1246+lower filesystem, branch.
1247+For example, when you build a union with one readwrite branch and one
1248+readonly, mounted /au, /rw and /ro respectively.
1249+- /au = /rw + /ro
1250+- /ro/fileA exists but /rw/fileA
1251+
1252+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1253+pointers are stored in a aufs dentry. The array in aufs dentry will be,
7f2ca4b1 1254+- [0] = NULL (because /rw/fileA doesn't exist)
53392da6
AM
1255+- [1] = /ro/fileA
1256+
1257+This style of an array is essentially same to the aufs
1258+superblock/inode/dentry/file objects.
1259+
1260+Because aufs supports manipulating branches, ie. add/delete/change
7f2ca4b1
JR
1261+branches dynamically, these objects has its own generation. When
1262+branches are changed, the generation in aufs superblock is
1263+incremented. And a generation in other object are compared when it is
1264+accessed. When a generation in other objects are obsoleted, aufs
1265+refreshes the internal array.
53392da6
AM
1266+
1267+
1268+Superblock
1269+----------------------------------------------------------------------
1270+Additionally aufs superblock has some data for policies to select one
1271+among multiple writable branches, XIB files, pseudo-links and kobject.
1272+See below in detail.
7f2ca4b1
JR
1273+About the policies which supports copy-down a directory, see
1274+wbr_policy.txt too.
53392da6
AM
1275+
1276+
1277+Branch and XINO(External Inode Number Translation Table)
1278+----------------------------------------------------------------------
1279+Every branch has its own xino (external inode number translation table)
1280+file. The xino file is created and unlinked by aufs internally. When two
1281+members of a union exist on the same filesystem, they share the single
1282+xino file.
1283+The struct of a xino file is simple, just a sequence of aufs inode
1284+numbers which is indexed by the lower inode number.
1285+In the above sample, assume the inode number of /ro/fileA is i111 and
1286+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1287+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1288+
1289+When the inode numbers are not contiguous, the xino file will be sparse
1290+which has a hole in it and doesn't consume as much disk space as it
1291+might appear. If your branch filesystem consumes disk space for such
1292+holes, then you should specify 'xino=' option at mounting aufs.
1293+
7f2ca4b1
JR
1294+Aufs has a mount option to free the disk blocks for such holes in XINO
1295+files on tmpfs or ramdisk. But it is not so effective actually. If you
1296+meet a problem of disk shortage due to XINO files, then you should try
1297+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1298+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1299+the holes in XINO files.
1300+
53392da6 1301+Also a writable branch has three kinds of "whiteout bases". All these
7f2ca4b1 1302+are existed when the branch is joined to aufs, and their names are
53392da6
AM
1303+whiteout-ed doubly, so that users will never see their names in aufs
1304+hierarchy.
7f2ca4b1 1305+1. a regular file which will be hardlinked to all whiteouts.
53392da6 1306+2. a directory to store a pseudo-link.
7f2ca4b1 1307+3. a directory to store an "orphan"-ed file temporary.
53392da6
AM
1308+
1309+1. Whiteout Base
1310+ When you remove a file on a readonly branch, aufs handles it as a
1311+ logical deletion and creates a whiteout on the upper writable branch
1312+ as a hardlink of this file in order not to consume inode on the
1313+ writable branch.
1314+2. Pseudo-link Dir
1315+ See below, Pseudo-link.
1316+3. Step-Parent Dir
1317+ When "fileC" exists on the lower readonly branch only and it is
1318+ opened and removed with its parent dir, and then user writes
1319+ something into it, then aufs copies-up fileC to this
1320+ directory. Because there is no other dir to store fileC. After
1321+ creating a file under this dir, the file is unlinked.
1322+
1323+Because aufs supports manipulating branches, ie. add/delete/change
7f2ca4b1
JR
1324+dynamically, a branch has its own id. When the branch order changes,
1325+aufs finds the new index by searching the branch id.
53392da6
AM
1326+
1327+
1328+Pseudo-link
1329+----------------------------------------------------------------------
1330+Assume "fileA" exists on the lower readonly branch only and it is
1331+hardlinked to "fileB" on the branch. When you write something to fileA,
1332+aufs copies-up it to the upper writable branch. Additionally aufs
1333+creates a hardlink under the Pseudo-link Directory of the writable
1334+branch. The inode of a pseudo-link is kept in aufs super_block as a
1335+simple list. If fileB is read after unlinking fileA, aufs returns
1336+filedata from the pseudo-link instead of the lower readonly
1337+branch. Because the pseudo-link is based upon the inode, to keep the
7f2ca4b1 1338+inode number by xino (see above) is essentially necessary.
53392da6
AM
1339+
1340+All the hardlinks under the Pseudo-link Directory of the writable branch
1341+should be restored in a proper location later. Aufs provides a utility
1342+to do this. The userspace helpers executed at remounting and unmounting
1343+aufs by default.
1344+During this utility is running, it puts aufs into the pseudo-link
1345+maintenance mode. In this mode, only the process which began the
1346+maintenance mode (and its child processes) is allowed to operate in
1347+aufs. Some other processes which are not related to the pseudo-link will
1348+be allowed to run too, but the rest have to return an error or wait
1349+until the maintenance mode ends. If a process already acquires an inode
1350+mutex (in VFS), it has to return an error.
1351+
1352+
1353+XIB(external inode number bitmap)
1354+----------------------------------------------------------------------
1355+Addition to the xino file per a branch, aufs has an external inode number
7f2ca4b1
JR
1356+bitmap in a superblock object. It is also an internal file such like a
1357+xino file.
53392da6
AM
1358+It is a simple bitmap to mark whether the aufs inode number is in-use or
1359+not.
1360+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1361+
7f2ca4b1 1362+As well as XINO files, aufs has a feature to truncate/refresh XIB to
53392da6
AM
1363+reduce the number of consumed disk blocks for these files.
1364+
1365+
1366+Virtual or Vertical Dir, and Readdir in Userspace
1367+----------------------------------------------------------------------
1368+In order to support multiple layers (branches), aufs readdir operation
1369+constructs a virtual dir block on memory. For readdir, aufs calls
1370+vfs_readdir() internally for each dir on branches, merges their entries
1371+with eliminating the whiteout-ed ones, and sets it to file (dir)
1372+object. So the file object has its entry list until it is closed. The
1373+entry list will be updated when the file position is zero and becomes
7f2ca4b1 1374+obsoleted. This decision is made in aufs automatically.
53392da6
AM
1375+
1376+The dynamically allocated memory block for the name of entries has a
1377+unit of 512 bytes (by default) and stores the names contiguously (no
1378+padding). Another block for each entry is handled by kmem_cache too.
1379+During building dir blocks, aufs creates hash list and judging whether
1380+the entry is whiteouted by its upper branch or already listed.
1381+The merged result is cached in the corresponding inode object and
1382+maintained by a customizable life-time option.
1383+
1384+Some people may call it can be a security hole or invite DoS attack
1385+since the opened and once readdir-ed dir (file object) holds its entry
1386+list and becomes a pressure for system memory. But I'd say it is similar
1387+to files under /proc or /sys. The virtual files in them also holds a
1388+memory page (generally) while they are opened. When an idea to reduce
1389+memory for them is introduced, it will be applied to aufs too.
1390+For those who really hate this situation, I've developed readdir(3)
1391+library which operates this merging in userspace. You just need to set
1392+LD_PRELOAD environment variable, and aufs will not consume no memory in
1393+kernel space for readdir(3).
1394+
1395+
1396+Workqueue
1397+----------------------------------------------------------------------
1398+Aufs sometimes requires privilege access to a branch. For instance,
1399+in copy-up/down operation. When a user process is going to make changes
1400+to a file which exists in the lower readonly branch only, and the mode
1401+of one of ancestor directories may not be writable by a user
1402+process. Here aufs copy-up the file with its ancestors and they may
1403+require privilege to set its owner/group/mode/etc.
1404+This is a typical case of a application character of aufs (see
1405+Introduction).
1406+
1407+Aufs uses workqueue synchronously for this case. It creates its own
1408+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1409+passes the request to call mkdir or write (for example), and wait for
1410+its completion. This approach solves a problem of a signal handler
1411+simply.
1412+If aufs didn't adopt the workqueue and changed the privilege of the
7f2ca4b1
JR
1413+process, then the process may receive the unexpected SIGXFSZ or other
1414+signals.
53392da6
AM
1415+
1416+Also aufs uses the system global workqueue ("events" kernel thread) too
1417+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1418+whiteout base and etc. This is unrelated to a privilege.
1419+Most of aufs operation tries acquiring a rw_semaphore for aufs
1420+superblock at the beginning, at the same time waits for the completion
1421+of all queued asynchronous tasks.
1422+
1423+
1424+Whiteout
1425+----------------------------------------------------------------------
1426+The whiteout in aufs is very similar to Unionfs's. That is represented
1427+by its filename. UnionMount takes an approach of a file mode, but I am
1428+afraid several utilities (find(1) or something) will have to support it.
1429+
1430+Basically the whiteout represents "logical deletion" which stops aufs to
1431+lookup further, but also it represents "dir is opaque" which also stop
7f2ca4b1 1432+further lookup.
53392da6
AM
1433+
1434+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1435+In order to make several functions in a single systemcall to be
1436+revertible, aufs adopts an approach to rename a directory to a temporary
1437+unique whiteouted name.
1438+For example, in rename(2) dir where the target dir already existed, aufs
1439+renames the target dir to a temporary unique whiteouted name before the
7f2ca4b1 1440+actual rename on a branch, and then handles other actions (make it opaque,
53392da6
AM
1441+update the attributes, etc). If an error happens in these actions, aufs
1442+simply renames the whiteouted name back and returns an error. If all are
1443+succeeded, aufs registers a function to remove the whiteouted unique
1444+temporary name completely and asynchronously to the system global
1445+workqueue.
1446+
1447+
1448+Copy-up
1449+----------------------------------------------------------------------
1450+It is a well-known feature or concept.
1451+When user modifies a file on a readonly branch, aufs operate "copy-up"
1452+internally and makes change to the new file on the upper writable branch.
1453+When the trigger systemcall does not update the timestamps of the parent
1454+dir, aufs reverts it after copy-up.
c2b27bf2
AM
1455+
1456+
1457+Move-down (aufs3.9 and later)
1458+----------------------------------------------------------------------
1459+"Copy-up" is one of the essential feature in aufs. It copies a file from
1460+the lower readonly branch to the upper writable branch when a user
1461+changes something about the file.
1462+"Move-down" is an opposite action of copy-up. Basically this action is
1463+ran manually instead of automatically and internally.
076b876e
AM
1464+For desgin and implementation, aufs has to consider these issues.
1465+- whiteout for the file may exist on the lower branch.
1466+- ancestor directories may not exist on the lower branch.
1467+- diropq for the ancestor directories may exist on the upper branch.
1468+- free space on the lower branch will reduce.
1469+- another access to the file may happen during moving-down, including
7f2ca4b1 1470+ UDBA (see "Revalidate Dentry and UDBA").
076b876e
AM
1471+- the file should not be hard-linked nor pseudo-linked. they should be
1472+ handled by auplink utility later.
c2b27bf2
AM
1473+
1474+Sometimes users want to move-down a file from the upper writable branch
1475+to the lower readonly or writable branch. For instance,
1476+- the free space of the upper writable branch is going to run out.
1477+- create a new intermediate branch between the upper and lower branch.
1478+- etc.
1479+
1480+For this purpose, use "aumvdown" command in aufs-util.git.
7f2ca4b1
JR
1481diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1482--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
1483+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2016-02-28 11:27:01.277245613 +0100
1484@@ -0,0 +1,85 @@
1485+
1486+# Copyright (C) 2015-2016 Junjiro R. Okajima
1487+#
1488+# This program is free software; you can redistribute it and/or modify
1489+# it under the terms of the GNU General Public License as published by
1490+# the Free Software Foundation; either version 2 of the License, or
1491+# (at your option) any later version.
1492+#
1493+# This program is distributed in the hope that it will be useful,
1494+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1495+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1496+# GNU General Public License for more details.
1497+#
1498+# You should have received a copy of the GNU General Public License
1499+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1500+
1501+Support for a branch who has its ->atomic_open()
1502+----------------------------------------------------------------------
1503+The filesystems who implement its ->atomic_open() are not majority. For
1504+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1505+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1506+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1507+sure whether all filesystems who have ->atomic_open() behave like this,
1508+but NFSv4 surely returns the error.
1509+
1510+In order to support ->atomic_open() for aufs, there are a few
1511+approaches.
1512+
1513+A. Introduce aufs_atomic_open()
1514+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1515+ branch fs.
1516+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1517+ an aufs user Pip Cet's approach
1518+ - calls aufs_create(), VFS finish_open() and notify_change().
1519+ - pass fake-mode to finish_open(), and then correct the mode by
1520+ notify_change().
1521+C. Extend aufs_open() to call branch fs's ->atomic_open()
1522+ - no aufs_atomic_open().
1523+ - aufs_lookup() registers the TID to an aufs internal object.
1524+ - aufs_create() does nothing when the matching TID is registered, but
1525+ registers the mode.
1526+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1527+ TID is registered.
1528+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1529+ credential
1530+ - no aufs_atomic_open().
1531+ - aufs_create() registers the TID to an internal object. this info
1532+ represents "this process created this file just now."
1533+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1534+ registered TID and re-try open() with superuser's credential.
1535+
1536+Pros and cons for each approach.
1537+
1538+A.
1539+ - straightforward but highly depends upon VFS internal.
1540+ - the atomic behavaiour is kept.
1541+ - some of parameters such as nameidata are hard to reproduce for
1542+ branch fs.
1543+ - large overhead.
1544+B.
1545+ - easy to implement.
1546+ - the atomic behavaiour is lost.
1547+C.
1548+ - the atomic behavaiour is kept.
1549+ - dirty and tricky.
1550+ - VFS checks whether the file is created correctly after calling
1551+ ->create(), which means this approach doesn't work.
1552+D.
1553+ - easy to implement.
1554+ - the atomic behavaiour is lost.
1555+ - to open a file with superuser's credential and give it to a user
1556+ process is a bad idea, since the file object keeps the credential
1557+ in it. It may affect LSM or something. This approach doesn't work
1558+ either.
1559+
1560+The approach A is ideal, but it hard to implement. So here is a
1561+variation of A, which is to be implemented.
1562+
1563+A-1. Introduce aufs_atomic_open()
1564+ - calls branch fs ->atomic_open() if exists. otherwise calls
1565+ vfs_create() and finish_open().
1566+ - the demerit is that the several checks after branch fs
1567+ ->atomic_open() are lost. in the ordinary case, the checks are
1568+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1569+ be implemented in aufs, but not all I am afraid.
53392da6
AM
1570diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1571--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
1572+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2016-02-28 11:27:01.277245613 +0100
1573@@ -0,0 +1,113 @@
53392da6 1574+
7f2ca4b1 1575+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1576+#
1577+# This program is free software; you can redistribute it and/or modify
1578+# it under the terms of the GNU General Public License as published by
1579+# the Free Software Foundation; either version 2 of the License, or
1580+# (at your option) any later version.
1581+#
1582+# This program is distributed in the hope that it will be useful,
1583+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1584+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1585+# GNU General Public License for more details.
1586+#
1587+# You should have received a copy of the GNU General Public License
523b37e3 1588+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1589+
1590+Lookup in a Branch
1591+----------------------------------------------------------------------
1592+Since aufs has a character of sub-VFS (see Introduction), it operates
7f2ca4b1
JR
1593+lookup for branches as VFS does. It may be a heavy work. But almost all
1594+lookup operation in aufs is the simplest case, ie. lookup only an entry
1595+directly connected to its parent. Digging down the directory hierarchy
1596+is unnecessary. VFS has a function lookup_one_len() for that use, and
1597+aufs calls it.
1598+
1599+When a branch is a remote filesystem, aufs basically relies upon its
53392da6
AM
1600+->d_revalidate(), also aufs forces the hardest revalidate tests for
1601+them.
1602+For d_revalidate, aufs implements three levels of revalidate tests. See
1603+"Revalidate Dentry and UDBA" in detail.
1604+
1605+
076b876e
AM
1606+Test Only the Highest One for the Directory Permission (dirperm1 option)
1607+----------------------------------------------------------------------
1608+Let's try case study.
1609+- aufs has two branches, upper readwrite and lower readonly.
1610+ /au = /rw + /ro
1611+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1612+- user invoked "chmod a+rx /au/dirA"
1613+- the internal copy-up is activated and "/rw/dirA" is created and its
7f2ca4b1 1614+ permission bits are set to world readable.
076b876e
AM
1615+- then "/au/dirA" becomes world readable?
1616+
1617+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1618+or it may be a natively readonly filesystem. If aufs respects the lower
1619+branch, it should not respond readdir request from other users. But user
1620+allowed it by chmod. Should really aufs rejects showing the entries
1621+under /ro/dirA?
1622+
7f2ca4b1
JR
1623+To be honest, I don't have a good solution for this case. So aufs
1624+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1625+users.
076b876e
AM
1626+When dirperm1 is specified, aufs checks only the highest one for the
1627+directory permission, and shows the entries. Otherwise, as usual, checks
1628+every dir existing on all branches and rejects the request.
1629+
1630+As a side effect, dirperm1 option improves the performance of aufs
1631+because the number of permission check is reduced when the number of
1632+branch is many.
1633+
1634+
53392da6
AM
1635+Revalidate Dentry and UDBA (User's Direct Branch Access)
1636+----------------------------------------------------------------------
1637+Generally VFS helpers re-validate a dentry as a part of lookup.
1638+0. digging down the directory hierarchy.
1639+1. lock the parent dir by its i_mutex.
1640+2. lookup the final (child) entry.
1641+3. revalidate it.
1642+4. call the actual operation (create, unlink, etc.)
1643+5. unlock the parent dir
1644+
1645+If the filesystem implements its ->d_revalidate() (step 3), then it is
1646+called. Actually aufs implements it and checks the dentry on a branch is
1647+still valid.
1648+But it is not enough. Because aufs has to release the lock for the
1649+parent dir on a branch at the end of ->lookup() (step 2) and
1650+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1651+held by VFS.
1652+If the file on a branch is changed directly, eg. bypassing aufs, after
1653+aufs released the lock, then the subsequent operation may cause
1654+something unpleasant result.
1655+
1656+This situation is a result of VFS architecture, ->lookup() and
1657+->d_revalidate() is separated. But I never say it is wrong. It is a good
1658+design from VFS's point of view. It is just not suitable for sub-VFS
1659+character in aufs.
1660+
1661+Aufs supports such case by three level of revalidation which is
1662+selectable by user.
1663+1. Simple Revalidate
1664+ Addition to the native flow in VFS's, confirm the child-parent
1665+ relationship on the branch just after locking the parent dir on the
1666+ branch in the "actual operation" (step 4). When this validation
1667+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1668+ checks the validation of the dentry on branches.
1669+2. Monitor Changes Internally by Inotify/Fsnotify
1670+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1671+ the dentry on the branch, and returns EBUSY if it finds different
1672+ dentry.
1673+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1674+ during it is in cache. When the event is notified, aufs registers a
1675+ function to kernel 'events' thread by schedule_work(). And the
1676+ function sets some special status to the cached aufs dentry and inode
1677+ private data. If they are not cached, then aufs has nothing to
1678+ do. When the same file is accessed through aufs (step 0-3) later,
1679+ aufs will detect the status and refresh all necessary data.
1680+ In this mode, aufs has to ignore the event which is fired by aufs
1681+ itself.
1682+3. No Extra Validation
1683+ This is the simplest test and doesn't add any additional revalidation
7f2ca4b1 1684+ test, and skip the revalidation in step 4. It is useful and improves
53392da6
AM
1685+ aufs performance when system surely hide the aufs branches from user,
1686+ by over-mounting something (or another method).
1687diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1688--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
1689+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2016-02-28 11:27:01.277245613 +0100
1690@@ -0,0 +1,74 @@
53392da6 1691+
7f2ca4b1 1692+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1693+#
1694+# This program is free software; you can redistribute it and/or modify
1695+# it under the terms of the GNU General Public License as published by
1696+# the Free Software Foundation; either version 2 of the License, or
1697+# (at your option) any later version.
1698+#
1699+# This program is distributed in the hope that it will be useful,
1700+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1701+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1702+# GNU General Public License for more details.
1703+#
1704+# You should have received a copy of the GNU General Public License
523b37e3 1705+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1706+
1707+Branch Manipulation
1708+
1709+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1710+and changing its permission/attribute, there are a lot of works to do.
1711+
1712+
1713+Add a Branch
1714+----------------------------------------------------------------------
1715+o Confirm the adding dir exists outside of aufs, including loopback
7f2ca4b1 1716+ mount, and its various attributes.
53392da6
AM
1717+o Initialize the xino file and whiteout bases if necessary.
1718+ See struct.txt.
1719+
1720+o Check the owner/group/mode of the directory
1721+ When the owner/group/mode of the adding directory differs from the
1722+ existing branch, aufs issues a warning because it may impose a
1723+ security risk.
1724+ For example, when a upper writable branch has a world writable empty
1725+ top directory, a malicious user can create any files on the writable
1726+ branch directly, like copy-up and modify manually. If something like
1727+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1728+ writable branch, and the writable branch is world-writable, then a
1729+ malicious guy may create /etc/passwd on the writable branch directly
1730+ and the infected file will be valid in aufs.
7f2ca4b1 1731+ I am afraid it can be a security issue, but aufs can do nothing except
53392da6
AM
1732+ producing a warning.
1733+
1734+
1735+Delete a Branch
1736+----------------------------------------------------------------------
1737+o Confirm the deleting branch is not busy
1738+ To be general, there is one merit to adopt "remount" interface to
1739+ manipulate branches. It is to discard caches. At deleting a branch,
1740+ aufs checks the still cached (and connected) dentries and inodes. If
1741+ there are any, then they are all in-use. An inode without its
1742+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1743+
1744+ For the cached one, aufs checks whether the same named entry exists on
1745+ other branches.
1746+ If the cached one is a directory, because aufs provides a merged view
1747+ to users, as long as one dir is left on any branch aufs can show the
1748+ dir to users. In this case, the branch can be removed from aufs.
1749+ Otherwise aufs rejects deleting the branch.
1750+
1751+ If any file on the deleting branch is opened by aufs, then aufs
1752+ rejects deleting.
1753+
1754+
1755+Modify the Permission of a Branch
1756+----------------------------------------------------------------------
1757+o Re-initialize or remove the xino file and whiteout bases if necessary.
1758+ See struct.txt.
1759+
1760+o rw --> ro: Confirm the modifying branch is not busy
1761+ Aufs rejects the request if any of these conditions are true.
1762+ - a file on the branch is mmap-ed.
1763+ - a regular file on the branch is opened for write and there is no
1764+ same named entry on the upper branch.
1765diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1766--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 1767+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2016-02-28 11:27:01.277245613 +0100
523b37e3 1768@@ -0,0 +1,64 @@
53392da6 1769+
7f2ca4b1 1770+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1771+#
1772+# This program is free software; you can redistribute it and/or modify
1773+# it under the terms of the GNU General Public License as published by
1774+# the Free Software Foundation; either version 2 of the License, or
1775+# (at your option) any later version.
1776+#
1777+# This program is distributed in the hope that it will be useful,
1778+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1779+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1780+# GNU General Public License for more details.
1781+#
1782+# You should have received a copy of the GNU General Public License
523b37e3 1783+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1784+
1785+Policies to Select One among Multiple Writable Branches
1786+----------------------------------------------------------------------
1787+When the number of writable branch is more than one, aufs has to decide
1788+the target branch for file creation or copy-up. By default, the highest
1789+writable branch which has the parent (or ancestor) dir of the target
1790+file is chosen (top-down-parent policy).
1791+By user's request, aufs implements some other policies to select the
7f2ca4b1
JR
1792+writable branch, for file creation several policies, round-robin,
1793+most-free-space, and other policies. For copy-up, top-down-parent,
1794+bottom-up-parent, bottom-up and others.
53392da6
AM
1795+
1796+As expected, the round-robin policy selects the branch in circular. When
1797+you have two writable branches and creates 10 new files, 5 files will be
1798+created for each branch. mkdir(2) systemcall is an exception. When you
1799+create 10 new directories, all will be created on the same branch.
1800+And the most-free-space policy selects the one which has most free
1801+space among the writable branches. The amount of free space will be
1802+checked by aufs internally, and users can specify its time interval.
1803+
1804+The policies for copy-up is more simple,
1805+top-down-parent is equivalent to the same named on in create policy,
1806+bottom-up-parent selects the writable branch where the parent dir
1807+exists and the nearest upper one from the copyup-source,
1808+bottom-up selects the nearest upper writable branch from the
1809+copyup-source, regardless the existence of the parent dir.
1810+
1811+There are some rules or exceptions to apply these policies.
1812+- If there is a readonly branch above the policy-selected branch and
1813+ the parent dir is marked as opaque (a variation of whiteout), or the
1814+ target (creating) file is whiteout-ed on the upper readonly branch,
1815+ then the result of the policy is ignored and the target file will be
1816+ created on the nearest upper writable branch than the readonly branch.
1817+- If there is a writable branch above the policy-selected branch and
1818+ the parent dir is marked as opaque or the target file is whiteouted
1819+ on the branch, then the result of the policy is ignored and the target
1820+ file will be created on the highest one among the upper writable
1821+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1822+ it as usual.
1823+- link(2) and rename(2) systemcalls are exceptions in every policy.
1824+ They try selecting the branch where the source exists as possible
1825+ since copyup a large file will take long time. If it can't be,
1826+ ie. the branch where the source exists is readonly, then they will
1827+ follow the copyup policy.
1828+- There is an exception for rename(2) when the target exists.
1829+ If the rename target exists, aufs compares the index of the branches
1830+ where the source and the target exists and selects the higher
1831+ one. If the selected branch is readonly, then aufs follows the
1832+ copyup policy.
076b876e
AM
1833diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1834--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 1835+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2016-02-28 11:27:01.277245613 +0100
076b876e
AM
1836@@ -0,0 +1,120 @@
1837+
7f2ca4b1 1838+# Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
1839+#
1840+# This program is free software; you can redistribute it and/or modify
1841+# it under the terms of the GNU General Public License as published by
1842+# the Free Software Foundation; either version 2 of the License, or
1843+# (at your option) any later version.
1844+#
1845+# This program is distributed in the hope that it will be useful,
1846+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1847+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1848+# GNU General Public License for more details.
1849+#
1850+# You should have received a copy of the GNU General Public License
1851+# along with this program; if not, write to the Free Software
1852+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
1853+
1854+
1855+File-based Hierarchical Storage Management (FHSM)
1856+----------------------------------------------------------------------
1857+Hierarchical Storage Management (or HSM) is a well-known feature in the
1858+storage world. Aufs provides this feature as file-based with multiple
7f2ca4b1 1859+writable branches, based upon the principle of "Colder, the Lower".
076b876e 1860+Here the word "colder" means that the less used files, and "lower" means
7f2ca4b1 1861+that the position in the order of the stacked branches vertically.
076b876e
AM
1862+These multiple writable branches are prioritized, ie. the topmost one
1863+should be the fastest drive and be used heavily.
1864+
1865+o Characters in aufs FHSM story
1866+- aufs itself and a new branch attribute.
1867+- a new ioctl interface to move-down and to establish a connection with
1868+ the daemon ("move-down" is a converse of "copy-up").
1869+- userspace tool and daemon.
1870+
1871+The userspace daemon establishes a connection with aufs and waits for
1872+the notification. The notified information is very similar to struct
1873+statfs containing the number of consumed blocks and inodes.
1874+When the consumed blocks/inodes of a branch exceeds the user-specified
1875+upper watermark, the daemon activates its move-down process until the
1876+consumed blocks/inodes reaches the user-specified lower watermark.
1877+
1878+The actual move-down is done by aufs based upon the request from
1879+user-space since we need to maintain the inode number and the internal
1880+pointer arrays in aufs.
1881+
1882+Currently aufs FHSM handles the regular files only. Additionally they
1883+must not be hard-linked nor pseudo-linked.
1884+
1885+
1886+o Cowork of aufs and the user-space daemon
1887+ During the userspace daemon established the connection, aufs sends a
1888+ small notification to it whenever aufs writes something into the
1889+ writable branch. But it may cost high since aufs issues statfs(2)
1890+ internally. So user can specify a new option to cache the
1891+ info. Actually the notification is controlled by these factors.
1892+ + the specified cache time.
1893+ + classified as "force" by aufs internally.
1894+ Until the specified time expires, aufs doesn't send the info
1895+ except the forced cases. When aufs decide forcing, the info is always
1896+ notified to userspace.
1897+ For example, the number of free inodes is generally large enough and
1898+ the shortage of it happens rarely. So aufs doesn't force the
1899+ notification when creating a new file, directory and others. This is
1900+ the typical case which aufs doesn't force.
1901+ When aufs writes the actual filedata and the files consumes any of new
1902+ blocks, the aufs forces notifying.
1903+
1904+
1905+o Interfaces in aufs
1906+- New branch attribute.
1907+ + fhsm
1908+ Specifies that the branch is managed by FHSM feature. In other word,
1909+ participant in the FHSM.
1910+ When nofhsm is set to the branch, it will not be the source/target
1911+ branch of the move-down operation. This attribute is set
1912+ independently from coo and moo attributes, and if you want full
1913+ FHSM, you should specify them as well.
1914+- New mount option.
1915+ + fhsm_sec
1916+ Specifies a second to suppress many less important info to be
1917+ notified.
1918+- New ioctl.
1919+ + AUFS_CTL_FHSM_FD
1920+ create a new file descriptor which userspace can read the notification
1921+ (a subset of struct statfs) from aufs.
1922+- Module parameter 'brs'
1923+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
1924+ be set.
1925+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
1926+ When there are two or more branches with fhsm attributes,
1927+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
1928+ terminates it. As a result of remounting and branch-manipulation, the
1929+ number of branches with fhsm attribute can be one. In this case,
1930+ /sbin/mount.aufs will terminate the user-space daemon.
1931+
1932+
1933+Finally the operation is done as these steps in kernel-space.
1934+- make sure that,
1935+ + no one else is using the file.
1936+ + the file is not hard-linked.
1937+ + the file is not pseudo-linked.
1938+ + the file is a regular file.
1939+ + the parent dir is not opaqued.
1940+- find the target writable branch.
1941+- make sure the file is not whiteout-ed by the upper (than the target)
1942+ branch.
1943+- make the parent dir on the target branch.
1944+- mutex lock the inode on the branch.
1945+- unlink the whiteout on the target branch (if exists).
1946+- lookup and create the whiteout-ed temporary name on the target branch.
1947+- copy the file as the whiteout-ed temporary name on the target branch.
1948+- rename the whiteout-ed temporary name to the original name.
1949+- unlink the file on the source branch.
1950+- maintain the internal pointer array and the external inode number
1951+ table (XINO).
1952+- maintain the timestamps and other attributes of the parent dir and the
1953+ file.
1954+
1955+And of course, in every step, an error may happen. So the operation
1956+should restore the original file state after an error happens.
53392da6
AM
1957diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
1958--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
1959+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2016-02-28 11:27:01.277245613 +0100
1960@@ -0,0 +1,72 @@
53392da6 1961+
7f2ca4b1 1962+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
1963+#
1964+# This program is free software; you can redistribute it and/or modify
1965+# it under the terms of the GNU General Public License as published by
1966+# the Free Software Foundation; either version 2 of the License, or
1967+# (at your option) any later version.
1968+#
1969+# This program is distributed in the hope that it will be useful,
1970+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1971+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1972+# GNU General Public License for more details.
1973+#
1974+# You should have received a copy of the GNU General Public License
523b37e3 1975+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
1976+
1977+mmap(2) -- File Memory Mapping
1978+----------------------------------------------------------------------
1979+In aufs, the file-mapped pages are handled by a branch fs directly, no
1980+interaction with aufs. It means aufs_mmap() calls the branch fs's
1981+->mmap().
1982+This approach is simple and good, but there is one problem.
7f2ca4b1 1983+Under /proc, several entries show the mmapped files by its path (with
53392da6
AM
1984+device and inode number), and the printed path will be the path on the
1985+branch fs's instead of virtual aufs's.
1986+This is not a problem in most cases, but some utilities lsof(1) (and its
1987+user) may expect the path on aufs.
1988+
1989+To address this issue, aufs adds a new member called vm_prfile in struct
1990+vm_area_struct (and struct vm_region). The original vm_file points to
1991+the file on the branch fs in order to handle everything correctly as
1992+usual. The new vm_prfile points to a virtual file in aufs, and the
1993+show-functions in procfs refers to vm_prfile if it is set.
1994+Also we need to maintain several other places where touching vm_file
1995+such like
1996+- fork()/clone() copies vma and the reference count of vm_file is
1997+ incremented.
1998+- merging vma maintains the ref count too.
1999+
7f2ca4b1 2000+This is not a good approach. It just fakes the printed path. But it
53392da6
AM
2001+leaves all behaviour around f_mapping unchanged. This is surely an
2002+advantage.
2003+Actually aufs had adopted another complicated approach which calls
2004+generic_file_mmap() and handles struct vm_operations_struct. In this
2005+approach, aufs met a hard problem and I could not solve it without
2006+switching the approach.
7f2ca4b1
JR
2007+
2008+There may be one more another approach which is
2009+- bind-mount the branch-root onto the aufs-root internally
2010+- grab the new vfsmount (ie. struct mount)
2011+- lazy-umount the branch-root internally
2012+- in open(2) the aufs-file, open the branch-file with the hidden
2013+ vfsmount (instead of the original branch's vfsmount)
2014+- ideally this "bind-mount and lazy-umount" should be done atomically,
2015+ but it may be possible from userspace by the mount helper.
2016+
2017+Adding the internal hidden vfsmount and using it in opening a file, the
2018+file path under /proc will be printed correctly. This approach looks
2019+smarter, but is not possible I am afraid.
2020+- aufs-root may be bind-mount later. when it happens, another hidden
2021+ vfsmount will be required.
2022+- it is hard to get the chance to bind-mount and lazy-umount
2023+ + in kernel-space, FS can have vfsmount in open(2) via
2024+ file->f_path, and aufs can know its vfsmount. But several locks are
2025+ already acquired, and if aufs tries to bind-mount and lazy-umount
2026+ here, then it may cause a deadlock.
2027+ + in user-space, bind-mount doesn't invoke the mount helper.
2028+- since /proc shows dev and ino, aufs has to give vma these info. it
2029+ means a new member vm_prinode will be necessary. this is essentially
2030+ equivalent to vm_prfile described above.
2031+
2032+I have to give up this "looks-smater" approach.
c1595e42
JR
2033diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2034--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 2035+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2016-02-28 11:27:01.277245613 +0100
c1595e42
JR
2036@@ -0,0 +1,96 @@
2037+
7f2ca4b1 2038+# Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
2039+#
2040+# This program is free software; you can redistribute it and/or modify
2041+# it under the terms of the GNU General Public License as published by
2042+# the Free Software Foundation; either version 2 of the License, or
2043+# (at your option) any later version.
2044+#
2045+# This program is distributed in the hope that it will be useful,
2046+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2047+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2048+# GNU General Public License for more details.
2049+#
2050+# You should have received a copy of the GNU General Public License
2051+# along with this program; if not, write to the Free Software
2052+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
2053+
2054+
2055+Listing XATTR/EA and getting the value
2056+----------------------------------------------------------------------
2057+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2058+shows the values from the topmost existing file. This behaviour is good
7f2ca4b1 2059+for the non-dir entries since the bahaviour exactly matches the shown
c1595e42
JR
2060+information. But for the directories, aufs considers all the same named
2061+entries on the lower branches. Which means, if one of the lower entry
2062+rejects readdir call, then aufs returns an error even if the topmost
2063+entry allows it. This behaviour is necessary to respect the branch fs's
2064+security, but can make users confused since the user-visible standard
2065+attributes don't match the behaviour.
2066+To address this issue, aufs has a mount option called dirperm1 which
2067+checks the permission for the topmost entry only, and ignores the lower
2068+entry's permission.
2069+
2070+A similar issue can happen around XATTR.
2071+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
7f2ca4b1
JR
2072+always set. Otherwise these very unpleasant situation would happen.
2073+- listxattr(2) may return the duplicated entries.
c1595e42
JR
2074+- users may not be able to remove or reset the XATTR forever,
2075+
2076+
2077+XATTR/EA support in the internal (copy,move)-(up,down)
2078+----------------------------------------------------------------------
7f2ca4b1 2079+Generally the extended attributes of inode are categorized as these.
c1595e42
JR
2080+- "security" for LSM and capability.
2081+- "system" for posix ACL, 'acl' mount option is required for the branch
2082+ fs generally.
2083+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2084+- "user" for userspace, 'user_xattr' mount option is required for the
2085+ branch fs generally.
2086+
2087+Moreover there are some other categories. Aufs handles these rather
2088+unpopular categories as the ordinary ones, ie. there is no special
2089+condition nor exception.
2090+
2091+In copy-up, the support for XATTR on the dst branch may differ from the
2092+src branch. In this case, the copy-up operation will get an error and
7f2ca4b1
JR
2093+the original user operation which triggered the copy-up will fail. It
2094+can happen that even all copy-up will fail.
c1595e42
JR
2095+When both of src and dst branches support XATTR and if an error occurs
2096+during copying XATTR, then the copy-up should fail obviously. That is a
2097+good reason and aufs should return an error to userspace. But when only
7f2ca4b1 2098+the src branch support that XATTR, aufs should not return an error.
c1595e42
JR
2099+For example, the src branch supports ACL but the dst branch doesn't
2100+because the dst branch may natively un-support it or temporary
2101+un-support it due to "noacl" mount option. Of course, the dst branch fs
2102+may NOT return an error even if the XATTR is not supported. It is
2103+totally up to the branch fs.
2104+
2105+Anyway when the aufs internal copy-up gets an error from the dst branch
2106+fs, then aufs tries removing the just copied entry and returns the error
2107+to the userspace. The worst case of this situation will be all copy-up
2108+will fail.
2109+
2110+For the copy-up operation, there two basic approaches.
2111+- copy the specified XATTR only (by category above), and return the
7f2ca4b1 2112+ error unconditionally if it happens.
c1595e42
JR
2113+- copy all XATTR, and ignore the error on the specified category only.
2114+
2115+In order to support XATTR and to implement the correct behaviour, aufs
7f2ca4b1
JR
2116+chooses the latter approach and introduces some new branch attributes,
2117+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
c1595e42 2118+They correspond to the XATTR namespaces (see above). Additionally, to be
7f2ca4b1
JR
2119+convenient, "icex" is also provided which means all "icex*" attributes
2120+are set (here the word "icex" stands for "ignore copy-error on XATTR").
c1595e42
JR
2121+
2122+The meaning of these attributes is to ignore the error from setting
2123+XATTR on that branch.
2124+Note that aufs tries copying all XATTR unconditionally, and ignores the
2125+error from the dst branch according to the specified attributes.
2126+
2127+Some XATTR may have its default value. The default value may come from
2128+the parent dir or the environment. If the default value is set at the
2129+file creating-time, it will be overwritten by copy-up.
2130+Some contradiction may happen I am afraid.
2131+Do we need another attribute to stop copying XATTR? I am unsure. For
2132+now, aufs implements the branch attributes to ignore the error.
53392da6
AM
2133diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2134--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 2135+++ linux/Documentation/filesystems/aufs/design/07export.txt 2016-02-28 11:27:01.277245613 +0100
523b37e3 2136@@ -0,0 +1,58 @@
53392da6 2137+
7f2ca4b1 2138+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2139+#
2140+# This program is free software; you can redistribute it and/or modify
2141+# it under the terms of the GNU General Public License as published by
2142+# the Free Software Foundation; either version 2 of the License, or
2143+# (at your option) any later version.
2144+#
2145+# This program is distributed in the hope that it will be useful,
2146+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2147+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2148+# GNU General Public License for more details.
2149+#
2150+# You should have received a copy of the GNU General Public License
523b37e3 2151+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2152+
2153+Export Aufs via NFS
2154+----------------------------------------------------------------------
2155+Here is an approach.
2156+- like xino/xib, add a new file 'xigen' which stores aufs inode
2157+ generation.
2158+- iget_locked(): initialize aufs inode generation for a new inode, and
2159+ store it in xigen file.
2160+- destroy_inode(): increment aufs inode generation and store it in xigen
2161+ file. it is necessary even if it is not unlinked, because any data of
2162+ inode may be changed by UDBA.
2163+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2164+ build file handle by
2165+ + branch id (4 bytes)
2166+ + superblock generation (4 bytes)
2167+ + inode number (4 or 8 bytes)
2168+ + parent dir inode number (4 or 8 bytes)
2169+ + inode generation (4 bytes))
2170+ + return value of exportfs_encode_fh() for the parent on a branch (4
2171+ bytes)
2172+ + file handle for a branch (by exportfs_encode_fh())
2173+- fh_to_dentry():
2174+ + find the index of a branch from its id in handle, and check it is
2175+ still exist in aufs.
2176+ + 1st level: get the inode number from handle and search it in cache.
7f2ca4b1
JR
2177+ + 2nd level: if not found in cache, get the parent inode number from
2178+ the handle and search it in cache. and then open the found parent
2179+ dir, find the matching inode number by vfs_readdir() and get its
2180+ name, and call lookup_one_len() for the target dentry.
53392da6
AM
2181+ + 3rd level: if the parent dir is not cached, call
2182+ exportfs_decode_fh() for a branch and get the parent on a branch,
2183+ build a pathname of it, convert it a pathname in aufs, call
2184+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2185+ the 2nd level.
2186+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2187+ for every branch, but not itself. to get this, (currently) aufs
2188+ searches in current->nsproxy->mnt_ns list. it may not be a good
2189+ idea, but I didn't get other approach.
2190+ + test the generation of the gotten inode.
2191+- every inode operation: they may get EBUSY due to UDBA. in this case,
2192+ convert it into ESTALE for NFSD.
2193+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2194+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2195diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2196--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 2197+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2016-02-28 11:27:01.277245613 +0100
523b37e3 2198@@ -0,0 +1,52 @@
53392da6 2199+
7f2ca4b1 2200+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2201+#
2202+# This program is free software; you can redistribute it and/or modify
2203+# it under the terms of the GNU General Public License as published by
2204+# the Free Software Foundation; either version 2 of the License, or
2205+# (at your option) any later version.
2206+#
2207+# This program is distributed in the hope that it will be useful,
2208+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2209+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2210+# GNU General Public License for more details.
2211+#
2212+# You should have received a copy of the GNU General Public License
523b37e3 2213+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2214+
2215+Show Whiteout Mode (shwh)
2216+----------------------------------------------------------------------
2217+Generally aufs hides the name of whiteouts. But in some cases, to show
2218+them is very useful for users. For instance, creating a new middle layer
2219+(branch) by merging existing layers.
2220+
2221+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2222+When you have three branches,
2223+- Bottom: 'system', squashfs (underlying base system), read-only
2224+- Middle: 'mods', squashfs, read-only
2225+- Top: 'overlay', ram (tmpfs), read-write
2226+
2227+The top layer is loaded at boot time and saved at shutdown, to preserve
2228+the changes made to the system during the session.
2229+When larger changes have been made, or smaller changes have accumulated,
2230+the size of the saved top layer data grows. At this point, it would be
2231+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2232+and rewrite the 'mods' squashfs, clearing the top layer and thus
2233+restoring save and load speed.
2234+
2235+This merging is simplified by the use of another aufs mount, of just the
2236+two overlay branches using the 'shwh' option.
2237+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2238+ aufs /livesys/merge_union
2239+
2240+A merged view of these two branches is then available at
2241+/livesys/merge_union, and the new feature is that the whiteouts are
2242+visible!
2243+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2244+writing to all branches. Also the default mode for all branches is 'ro'.
2245+It is now possible to save the combined contents of the two overlay
2246+branches to a new squashfs, e.g.:
2247+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2248+
2249+This new squashfs archive can be stored on the boot device and the
2250+initramfs will use it to replace the old one at the next boot.
2251diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2252--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
2253+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2016-02-28 11:27:01.277245613 +0100
2254@@ -0,0 +1,47 @@
53392da6 2255+
7f2ca4b1 2256+# Copyright (C) 2010-2016 Junjiro R. Okajima
53392da6
AM
2257+#
2258+# This program is free software; you can redistribute it and/or modify
2259+# it under the terms of the GNU General Public License as published by
2260+# the Free Software Foundation; either version 2 of the License, or
2261+# (at your option) any later version.
2262+#
2263+# This program is distributed in the hope that it will be useful,
2264+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2265+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2266+# GNU General Public License for more details.
2267+#
2268+# You should have received a copy of the GNU General Public License
523b37e3 2269+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2270+
2271+Dynamically customizable FS operations
2272+----------------------------------------------------------------------
2273+Generally FS operations (struct inode_operations, struct
2274+address_space_operations, struct file_operations, etc.) are defined as
2275+"static const", but it never means that FS have only one set of
2276+operation. Some FS have multiple sets of them. For instance, ext2 has
2277+three sets, one for XIP, for NOBH, and for normal.
2278+Since aufs overrides and redirects these operations, sometimes aufs has
7f2ca4b1 2279+to change its behaviour according to the branch FS type. More importantly
53392da6
AM
2280+VFS acts differently if a function (member in the struct) is set or
2281+not. It means aufs should have several sets of operations and select one
2282+among them according to the branch FS definition.
2283+
7f2ca4b1 2284+In order to solve this problem and not to affect the behaviour of VFS,
53392da6 2285+aufs defines these operations dynamically. For instance, aufs defines
7f2ca4b1
JR
2286+dummy direct_IO function for struct address_space_operations, but it may
2287+not be set to the address_space_operations actually. When the branch FS
2288+doesn't have it, aufs doesn't set it to its address_space_operations
2289+while the function definition itself is still alive. So the behaviour
2290+itself will not change, and it will return an error when direct_IO is
2291+not set.
53392da6
AM
2292+
2293+The lifetime of these dynamically generated operation object is
2294+maintained by aufs branch object. When the branch is removed from aufs,
2295+the reference counter of the object is decremented. When it reaches
2296+zero, the dynamically generated operation object will be freed.
2297+
7f2ca4b1
JR
2298+This approach is designed to support AIO (io_submit), Direct I/O and
2299+XIP (DAX) mainly.
2300+Currently this approach is applied to address_space_operations for
2301+regular files only.
53392da6
AM
2302diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt linux/Documentation/filesystems/aufs/design/99plan.txt
2303--- /usr/share/empty/Documentation/filesystems/aufs/design/99plan.txt 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
2304+++ linux/Documentation/filesystems/aufs/design/99plan.txt 2016-02-28 11:27:01.277245613 +0100
2305@@ -0,0 +1,57 @@
53392da6 2306+
7f2ca4b1 2307+# Copyright (C) 2005-2016 Junjiro R. Okajima
53392da6
AM
2308+#
2309+# This program is free software; you can redistribute it and/or modify
2310+# it under the terms of the GNU General Public License as published by
2311+# the Free Software Foundation; either version 2 of the License, or
2312+# (at your option) any later version.
2313+#
2314+# This program is distributed in the hope that it will be useful,
2315+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2316+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2317+# GNU General Public License for more details.
2318+#
2319+# You should have received a copy of the GNU General Public License
523b37e3 2320+# along with this program. If not, see <http://www.gnu.org/licenses/>.
53392da6
AM
2321+
2322+Plan
2323+
2324+Restoring some features which was implemented in aufs1.
2325+They were dropped in aufs2 in order to make source files simpler and
2326+easier to be reviewed.
2327+
2328+
53392da6
AM
2329+Being Another Aufs's Readonly Branch (robr)
2330+----------------------------------------------------------------------
2331+Aufs1 allows aufs to be another aufs's readonly branch.
2332+This feature was developed by a user's request. But it may not be used
7f2ca4b1 2333+currently.
53392da6
AM
2334+
2335+
53392da6
AM
2336+Refresh the Opened File (refrof)
2337+----------------------------------------------------------------------
2338+This option is implemented in aufs1 but incomplete.
2339+
2340+When user reads from a file, he expects to get its latest filedata
2341+generally. If the file is removed and a new same named file is created,
2342+the content he gets is unchanged, ie. the unlinked filedata.
2343+
2344+Let's try case study again.
2345+- aufs has two branches.
2346+ /au = /rw + /ro
2347+- "fileA" exists under /ro, but /rw.
2348+- user opened "/au/fileA".
2349+- he or someone else inserts a branch (/new) between /rw and /ro.
2350+ /au = /rw + /new + /ro
7f2ca4b1 2351+- the new branch contains "fileA".
53392da6
AM
2352+- user reads from the opened "fileA"
2353+- which filedata should aufs return, from /ro or /new?
2354+
2355+Some people says it has to be "from /ro" and it is a semantics of Unix.
2356+The others say it should be "from /new" because the file is not removed
2357+and it is equivalent to the case of someone else modifies the file.
2358+
2359+Here again I don't have a best and final answer. I got an idea to
2360+implement 'refrof' and 'norefrof' option. When 'refrof' (REFResh the
2361+Opened File) is specified (by default), aufs returns the filedata from
7f2ca4b1 2362+/new. Otherwise from /ro.
53392da6
AM
2363diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2364--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
2365+++ linux/Documentation/filesystems/aufs/README 2016-02-28 11:27:01.273912209 +0100
2366@@ -0,0 +1,392 @@
53392da6
AM
2367+
2368+Aufs3 -- advanced multi layered unification filesystem version 3.x
2369+http://aufs.sf.net
2370+Junjiro R. Okajima
2371+
2372+
2373+0. Introduction
2374+----------------------------------------
2375+In the early days, aufs was entirely re-designed and re-implemented
7f2ca4b1 2376+Unionfs Version 1.x series. Adding many original ideas, approaches,
53392da6
AM
2377+improvements and implementations, it becomes totally different from
2378+Unionfs while keeping the basic features.
2379+Recently, Unionfs Version 2.x series begin taking some of the same
2380+approaches to aufs1's.
2381+Unionfs is being developed by Professor Erez Zadok at Stony Brook
2382+University and his team.
2383+
2384+Aufs3 supports linux-3.0 and later.
2385+If you want older kernel version support, try aufs2-2.6.git or
2386+aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
2387+
2388+Note: it becomes clear that "Aufs was rejected. Let's give it up."
38d290e6
JR
2389+ According to Christoph Hellwig, linux rejects all union-type
2390+ filesystems but UnionMount.
53392da6
AM
2391+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2392+
38d290e6
JR
2393+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2394+ UnionMount, and he pointed out an issue around a directory mutex
2395+ lock and aufs addressed it. But it is still unsure whether aufs will
2396+ be merged (or any other union solution).
076b876e 2397+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
38d290e6 2398+
53392da6
AM
2399+
2400+1. Features
2401+----------------------------------------
2402+- unite several directories into a single virtual filesystem. The member
2403+ directory is called as a branch.
2404+- you can specify the permission flags to the branch, which are 'readonly',
2405+ 'readwrite' and 'whiteout-able.'
2406+- by upper writable branch, internal copyup and whiteout, files/dirs on
2407+ readonly branch are modifiable logically.
2408+- dynamic branch manipulation, add, del.
2409+- etc...
2410+
7f2ca4b1
JR
2411+Also there are many enhancements in aufs, such as:
2412+- test only the highest one for the directory permission (dirperm1)
2413+- copyup on open (coo=)
2414+- 'move' policy for copy-up between two writable branches, after
2415+ checking free space.
2416+- xattr, acl
53392da6
AM
2417+- readdir(3) in userspace.
2418+- keep inode number by external inode number table
2419+- keep the timestamps of file/dir in internal copyup operation
2420+- seekable directory, supporting NFS readdir.
2421+- whiteout is hardlinked in order to reduce the consumption of inodes
2422+ on branch
2423+- do not copyup, nor create a whiteout when it is unnecessary
2424+- revert a single systemcall when an error occurs in aufs
2425+- remount interface instead of ioctl
2426+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2427+- loopback mounted filesystem as a branch
2428+- kernel thread for removing the dir who has a plenty of whiteouts
2429+- support copyup sparse file (a file which has a 'hole' in it)
2430+- default permission flags for branches
2431+- selectable permission flags for ro branch, whether whiteout can
2432+ exist or not
2433+- export via NFS.
2434+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2435+- support multiple writable branches, some policies to select one
2436+ among multiple writable branches.
2437+- a new semantics for link(2) and rename(2) to support multiple
2438+ writable branches.
2439+- no glibc changes are required.
2440+- pseudo hardlink (hardlink over branches)
2441+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2442+ including NFS or remote filesystem branch.
2443+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2444+- and more...
2445+
2446+Currently these features are dropped temporary from aufs3.
2447+See design/08plan.txt in detail.
53392da6
AM
2448+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2449+ (robr)
2450+- statistics of aufs thread (/sys/fs/aufs/stat)
53392da6
AM
2451+
2452+Features or just an idea in the future (see also design/*.txt),
2453+- reorder the branch index without del/re-add.
2454+- permanent xino files for NFSD
2455+- an option for refreshing the opened files after add/del branches
53392da6
AM
2456+- light version, without branch manipulation. (unnecessary?)
2457+- copyup in userspace
2458+- inotify in userspace
2459+- readv/writev
53392da6
AM
2460+
2461+
2462+2. Download
2463+----------------------------------------
1e00d052
AM
2464+There were three GIT trees for aufs3, aufs3-linux.git,
2465+aufs3-standalone.git, and aufs-util.git. Note that there is no "3" in
2466+"aufs-util.git."
2467+While the aufs-util is always necessary, you need either of aufs3-linux
2468+or aufs3-standalone.
2469+
2470+The aufs3-linux tree includes the whole linux mainline GIT tree,
2471+git://git.kernel.org/.../torvalds/linux.git.
2472+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
b4510431 2473+build aufs3 as an external kernel module.
7f2ca4b1
JR
2474+Several extra patches are not included in this tree. Only
2475+aufs3-standalone tree contains them. They are described in the later
2476+section "Configuration and Compilation."
1e00d052
AM
2477+
2478+On the other hand, the aufs3-standalone tree has only aufs source files
53392da6 2479+and necessary patches, and you can select CONFIG_AUFS_FS=m.
7f2ca4b1 2480+But you need to apply all aufs patches manually.
53392da6
AM
2481+
2482+You will find GIT branches whose name is in form of "aufs3.x" where "x"
2483+represents the linux kernel version, "linux-3.x". For instance,
1e00d052
AM
2484+"aufs3.0" is for linux-3.0. For latest "linux-3.x-rcN", use
2485+"aufs3.x-rcN" branch.
2486+
2487+o aufs3-linux tree
2488+$ git clone --reference /your/linux/git/tree \
7f2ca4b1 2489+ git://git.code.sf.net/p/aufs/aufs3-linux aufs3-linux.git
1e00d052
AM
2490+- if you don't have linux GIT tree, then remove "--reference ..."
2491+$ cd aufs3-linux.git
2492+$ git checkout origin/aufs3.0
53392da6 2493+
7f2ca4b1
JR
2494+Or You may want to directly git-pull aufs into your linux GIT tree, and
2495+leave the patch-work to GIT.
2496+$ cd /your/linux/git/tree
2497+$ git remote add aufs3 https://github.com/sfjro/aufs3-linux.git
2498+- aufs3-linux.git tree also exists on github.
2499+$ git fetch aufs3
2500+$ git checkout -b my3.14 v3.14
2501+$ (add your change...)
2502+$ git pull aufs3 aufs3.14
2503+- now you have v3.14 + your_changes + aufs3.14 in you my3.14 branch.
2504+- you may need to solve some conflicts between your_changes and
2505+ aufs3.14. in this case, git-rerere is recommended so that you can
2506+ solve the similar confilicts automatically when you upgrade to 3.15 or
2507+ later in the future.
2508+
53392da6 2509+o aufs3-standalone tree
86dc4139 2510+$ git clone git://git.code.sf.net/p/aufs/aufs3-standalone \
53392da6
AM
2511+ aufs3-standalone.git
2512+$ cd aufs3-standalone.git
2513+$ git checkout origin/aufs3.0
2514+
2515+o aufs-util tree
86dc4139 2516+$ git clone git://git.code.sf.net/p/aufs/aufs-util \
53392da6
AM
2517+ aufs-util.git
2518+$ cd aufs-util.git
2519+$ git checkout origin/aufs3.0
2520+
9dbd164d
AM
2521+Note: The 3.x-rcN branch is to be used with `rc' kernel versions ONLY.
2522+The minor version number, 'x' in '3.x', of aufs may not always
2523+follow the minor version number of the kernel.
2524+Because changes in the kernel that cause the use of a new
2525+minor version number do not always require changes to aufs-util.
2526+
2527+Since aufs-util has its own minor version number, you may not be
2528+able to find a GIT branch in aufs-util for your kernel's
2529+exact minor version number.
2530+In this case, you should git-checkout the branch for the
53392da6 2531+nearest lower number.
9dbd164d
AM
2532+
2533+For (an unreleased) example:
2534+If you are using "linux-3.10" and the "aufs3.10" branch
7eafdf33 2535+does not exist in aufs-util repository, then "aufs3.9", "aufs3.8"
9dbd164d
AM
2536+or something numerically smaller is the branch for your kernel.
2537+
53392da6
AM
2538+Also you can view all branches by
2539+ $ git branch -a
2540+
2541+
2542+3. Configuration and Compilation
2543+----------------------------------------
2544+Make sure you have git-checkout'ed the correct branch.
2545+
1e00d052 2546+For aufs3-linux tree,
c06a8ce3 2547+- enable CONFIG_AUFS_FS.
1e00d052
AM
2548+- set other aufs configurations if necessary.
2549+
53392da6
AM
2550+For aufs3-standalone tree,
2551+There are several ways to build.
2552+
2553+1.
2554+- apply ./aufs3-kbuild.patch to your kernel source files.
2555+- apply ./aufs3-base.patch too.
523b37e3 2556+- apply ./aufs3-mmap.patch too.
53392da6
AM
2557+- apply ./aufs3-standalone.patch too, if you have a plan to set
2558+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs3-standalone.patch.
537831f9
AM
2559+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2560+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
c06a8ce3 2561+- enable CONFIG_AUFS_FS, you can select either
53392da6
AM
2562+ =m or =y.
2563+- and build your kernel as usual.
2564+- install the built kernel.
c06a8ce3
AM
2565+ Note: Since linux-3.9, every filesystem module requires an alias
2566+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2567+ modules.aliases file if you set CONFIG_AUFS_FS=m.
7eafdf33
AM
2568+- install the header files too by "make headers_install" to the
2569+ directory where you specify. By default, it is $PWD/usr.
b4510431 2570+ "make help" shows a brief note for headers_install.
53392da6
AM
2571+- and reboot your system.
2572+
2573+2.
2574+- module only (CONFIG_AUFS_FS=m).
2575+- apply ./aufs3-base.patch to your kernel source files.
523b37e3 2576+- apply ./aufs3-mmap.patch too.
53392da6
AM
2577+- apply ./aufs3-standalone.patch too.
2578+- build your kernel, don't forget "make headers_install", and reboot.
2579+- edit ./config.mk and set other aufs configurations if necessary.
b4510431 2580+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
53392da6
AM
2581+ every aufs configurations.
2582+- build the module by simple "make".
c06a8ce3
AM
2583+ Note: Since linux-3.9, every filesystem module requires an alias
2584+ "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
2585+ modules.aliases file.
53392da6
AM
2586+- you can specify ${KDIR} make variable which points to your kernel
2587+ source tree.
2588+- install the files
2589+ + run "make install" to install the aufs module, or copy the built
b4510431
AM
2590+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2591+ + run "make install_headers" (instead of headers_install) to install
2592+ the modified aufs header file (you can specify DESTDIR which is
2593+ available in aufs standalone version's Makefile only), or copy
2594+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2595+ you like manually. By default, the target directory is $PWD/usr.
53392da6
AM
2596+- no need to apply aufs3-kbuild.patch, nor copying source files to your
2597+ kernel source tree.
2598+
b4510431 2599+Note: The header file aufs_type.h is necessary to build aufs-util
53392da6
AM
2600+ as well as "make headers_install" in the kernel source tree.
2601+ headers_install is subject to be forgotten, but it is essentially
2602+ necessary, not only for building aufs-util.
2603+ You may not meet problems without headers_install in some older
2604+ version though.
2605+
2606+And then,
2607+- read README in aufs-util, build and install it
9dbd164d
AM
2608+- note that your distribution may contain an obsoleted version of
2609+ aufs_type.h in /usr/include/linux or something. When you build aufs
2610+ utilities, make sure that your compiler refers the correct aufs header
2611+ file which is built by "make headers_install."
53392da6
AM
2612+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2613+ then run "make install_ulib" too. And refer to the aufs manual in
2614+ detail.
2615+
38d290e6
JR
2616+There several other patches in aufs3-standalone.git. They are all
2617+optional. When you meet some problems, they will help you.
2618+- aufs3-loopback.patch
2619+ Supports a nested loopback mount in a branch-fs. This patch is
2620+ unnecessary until aufs produces a message like "you may want to try
2621+ another patch for loopback file".
2622+- vfs-ino.patch
2623+ Modifies a system global kernel internal function get_next_ino() in
2624+ order to stop assigning 0 for an inode-number. Not directly related to
2625+ aufs, but recommended generally.
2626+- tmpfs-idr.patch
2627+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2628+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2629+ duplication of inode number, which is important for backup tools and
2630+ other utilities. When you find aufs XINO files for tmpfs branch
2631+ growing too much, try this patch.
7f2ca4b1
JR
2632+- lockdep-debug.patch
2633+ Because aufs is not only an ordinary filesystem (callee of VFS), but
2634+ also a caller of VFS functions for branch filesystems, subclassing of
2635+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
2636+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
2637+ need to apply this debug patch to expand several constant values.
2638+ If don't know what LOCKDEP, then you don't have apply this patch.
38d290e6 2639+
53392da6
AM
2640+
2641+4. Usage
2642+----------------------------------------
2643+At first, make sure aufs-util are installed, and please read the aufs
2644+manual, aufs.5 in aufs-util.git tree.
2645+$ man -l aufs.5
2646+
2647+And then,
2648+$ mkdir /tmp/rw /tmp/aufs
2649+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2650+
2651+Here is another example. The result is equivalent.
2652+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2653+ Or
2654+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2655+# mount -o remount,append:${HOME} /tmp/aufs
2656+
2657+Then, you can see whole tree of your home dir through /tmp/aufs. If
2658+you modify a file under /tmp/aufs, the one on your home directory is
2659+not affected, instead the same named file will be newly created under
2660+/tmp/rw. And all of your modification to a file will be applied to
2661+the one under /tmp/rw. This is called the file based Copy on Write
2662+(COW) method.
2663+Aufs mount options are described in aufs.5.
2664+If you run chroot or something and make your aufs as a root directory,
2665+then you need to customize the shutdown script. See the aufs manual in
2666+detail.
2667+
2668+Additionally, there are some sample usages of aufs which are a
2669+diskless system with network booting, and LiveCD over NFS.
2670+See sample dir in CVS tree on SourceForge.
2671+
2672+
2673+5. Contact
2674+----------------------------------------
2675+When you have any problems or strange behaviour in aufs, please let me
2676+know with:
2677+- /proc/mounts (instead of the output of mount(8))
2678+- /sys/module/aufs/*
2679+- /sys/fs/aufs/* (if you have them)
2680+- /debug/aufs/* (if you have them)
2681+- linux kernel version
2682+ if your kernel is not plain, for example modified by distributor,
2683+ the url where i can download its source is necessary too.
2684+- aufs version which was printed at loading the module or booting the
2685+ system, instead of the date you downloaded.
2686+- configuration (define/undefine CONFIG_AUFS_xxx)
2687+- kernel configuration or /proc/config.gz (if you have it)
2688+- behaviour which you think to be incorrect
2689+- actual operation, reproducible one is better
2690+- mailto: aufs-users at lists.sourceforge.net
2691+
2692+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2693+and Feature Requests) on SourceForge. Please join and write to
2694+aufs-users ML.
2695+
2696+
2697+6. Acknowledgements
2698+----------------------------------------
2699+Thanks to everyone who have tried and are using aufs, whoever
2700+have reported a bug or any feedback.
2701+
2702+Especially donators:
2703+Tomas Matejicek(slax.org) made a donation (much more than once).
2704+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2705+ scripts) is making "doubling" donations.
2706+ Unfortunately I cannot list all of the donators, but I really
b4510431 2707+ appreciate.
53392da6
AM
2708+ It ends Aug 2010, but the ordinary donation URL is still available.
2709+ <http://sourceforge.net/donate/index.php?group_id=167503>
2710+Dai Itasaka made a donation (2007/8).
2711+Chuck Smith made a donation (2008/4, 10 and 12).
2712+Henk Schoneveld made a donation (2008/9).
2713+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2714+Francois Dupoux made a donation (2008/11).
2715+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2716+ aufs2 GIT tree (2009/2).
2717+William Grant made a donation (2009/3).
2718+Patrick Lane made a donation (2009/4).
2719+The Mail Archive (mail-archive.com) made donations (2009/5).
2720+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2721+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2722+Pavel Pronskiy made a donation (2011/2).
2723+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2724+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
537831f9
AM
2725+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2726+11).
1e00d052 2727+Sam Liddicott made a donation (2011/9).
86dc4139
AM
2728+Era Scarecrow made a donation (2013/4).
2729+Bor Ratajc made a donation (2013/4).
2730+Alessandro Gorreta made a donation (2013/4).
2731+POIRETTE Marc made a donation (2013/4).
2732+Alessandro Gorreta made a donation (2013/4).
2733+lauri kasvandik made a donation (2013/5).
392086de 2734+"pemasu from Finland" made a donation (2013/7).
523b37e3
AM
2735+The Parted Magic Project made a donation (2013/9 and 11).
2736+Pavel Barta made a donation (2013/10).
38d290e6 2737+Nikolay Pertsev made a donation (2014/5).
7f2ca4b1 2738+James B made a donation (2014/7 and 2015/7).
076b876e 2739+Stefano Di Biase made a donation (2014/8).
7f2ca4b1
JR
2740+Daniel Epellei made a donation (2015/1).
2741+OmegaPhil made a donation (2016/1).
53392da6
AM
2742+
2743+Thank you very much.
2744+Donations are always, including future donations, very important and
2745+helpful for me to keep on developing aufs.
2746+
2747+
2748+7.
2749+----------------------------------------
2750+If you are an experienced user, no explanation is needed. Aufs is
2751+just a linux filesystem.
2752+
2753+
2754+Enjoy!
2755+
2756+# Local variables: ;
2757+# mode: text;
2758+# End: ;
7f207e10
AM
2759diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2760--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 2761+++ linux/fs/aufs/aufs.h 2016-02-28 11:27:01.277245613 +0100
523b37e3 2762@@ -0,0 +1,59 @@
7f207e10 2763+/*
7f2ca4b1 2764+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2765+ *
2766+ * This program, aufs is free software; you can redistribute it and/or modify
2767+ * it under the terms of the GNU General Public License as published by
2768+ * the Free Software Foundation; either version 2 of the License, or
2769+ * (at your option) any later version.
2770+ *
2771+ * This program is distributed in the hope that it will be useful,
2772+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2773+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2774+ * GNU General Public License for more details.
2775+ *
2776+ * You should have received a copy of the GNU General Public License
523b37e3 2777+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2778+ */
2779+
2780+/*
2781+ * all header files
2782+ */
2783+
2784+#ifndef __AUFS_H__
2785+#define __AUFS_H__
2786+
2787+#ifdef __KERNEL__
2788+
2789+#define AuStub(type, name, body, ...) \
2790+ static inline type name(__VA_ARGS__) { body; }
2791+
2792+#define AuStubVoid(name, ...) \
2793+ AuStub(void, name, , __VA_ARGS__)
2794+#define AuStubInt0(name, ...) \
2795+ AuStub(int, name, return 0, __VA_ARGS__)
2796+
2797+#include "debug.h"
2798+
2799+#include "branch.h"
2800+#include "cpup.h"
2801+#include "dcsub.h"
2802+#include "dbgaufs.h"
2803+#include "dentry.h"
2804+#include "dir.h"
2805+#include "dynop.h"
2806+#include "file.h"
2807+#include "fstype.h"
2808+#include "inode.h"
2809+#include "loop.h"
2810+#include "module.h"
7f207e10
AM
2811+#include "opts.h"
2812+#include "rwsem.h"
2813+#include "spl.h"
2814+#include "super.h"
2815+#include "sysaufs.h"
2816+#include "vfsub.h"
2817+#include "whout.h"
2818+#include "wkq.h"
2819+
2820+#endif /* __KERNEL__ */
2821+#endif /* __AUFS_H__ */
2822diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2823--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
2824+++ linux/fs/aufs/branch.c 2016-02-28 11:27:01.277245613 +0100
2825@@ -0,0 +1,1402 @@
7f207e10 2826+/*
7f2ca4b1 2827+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
2828+ *
2829+ * This program, aufs is free software; you can redistribute it and/or modify
2830+ * it under the terms of the GNU General Public License as published by
2831+ * the Free Software Foundation; either version 2 of the License, or
2832+ * (at your option) any later version.
2833+ *
2834+ * This program is distributed in the hope that it will be useful,
2835+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2836+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2837+ * GNU General Public License for more details.
2838+ *
2839+ * You should have received a copy of the GNU General Public License
523b37e3 2840+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
2841+ */
2842+
2843+/*
2844+ * branch management
2845+ */
2846+
027c5e7a 2847+#include <linux/compat.h>
7f207e10
AM
2848+#include <linux/statfs.h>
2849+#include "aufs.h"
2850+
2851+/*
2852+ * free a single branch
1facf9fc 2853+ */
2854+static void au_br_do_free(struct au_branch *br)
2855+{
2856+ int i;
2857+ struct au_wbr *wbr;
4a4d8108 2858+ struct au_dykey **key;
1facf9fc 2859+
027c5e7a
AM
2860+ au_hnotify_fin_br(br);
2861+
1facf9fc 2862+ if (br->br_xino.xi_file)
2863+ fput(br->br_xino.xi_file);
2864+ mutex_destroy(&br->br_xino.xi_nondir_mtx);
2865+
2866+ AuDebugOn(atomic_read(&br->br_count));
2867+
2868+ wbr = br->br_wbr;
2869+ if (wbr) {
2870+ for (i = 0; i < AuBrWh_Last; i++)
2871+ dput(wbr->wbr_wh[i]);
2872+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
dece6358 2873+ AuRwDestroy(&wbr->wbr_wh_rwsem);
1facf9fc 2874+ }
2875+
076b876e
AM
2876+ if (br->br_fhsm) {
2877+ au_br_fhsm_fin(br->br_fhsm);
2878+ kfree(br->br_fhsm);
2879+ }
2880+
4a4d8108
AM
2881+ key = br->br_dykey;
2882+ for (i = 0; i < AuBrDynOp; i++, key++)
2883+ if (*key)
2884+ au_dy_put(*key);
2885+ else
2886+ break;
2887+
537831f9
AM
2888+ /* recursive lock, s_umount of branch's */
2889+ lockdep_off();
86dc4139 2890+ path_put(&br->br_path);
537831f9 2891+ lockdep_on();
1facf9fc 2892+ kfree(wbr);
2893+ kfree(br);
2894+}
2895+
2896+/*
2897+ * frees all branches
2898+ */
2899+void au_br_free(struct au_sbinfo *sbinfo)
2900+{
2901+ aufs_bindex_t bmax;
2902+ struct au_branch **br;
2903+
dece6358
AM
2904+ AuRwMustWriteLock(&sbinfo->si_rwsem);
2905+
1facf9fc 2906+ bmax = sbinfo->si_bend + 1;
2907+ br = sbinfo->si_branch;
2908+ while (bmax--)
2909+ au_br_do_free(*br++);
2910+}
2911+
2912+/*
2913+ * find the index of a branch which is specified by @br_id.
2914+ */
2915+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
2916+{
2917+ aufs_bindex_t bindex, bend;
2918+
2919+ bend = au_sbend(sb);
2920+ for (bindex = 0; bindex <= bend; bindex++)
2921+ if (au_sbr_id(sb, bindex) == br_id)
2922+ return bindex;
2923+ return -1;
2924+}
2925+
2926+/* ---------------------------------------------------------------------- */
2927+
2928+/*
2929+ * add a branch
2930+ */
2931+
b752ccd1
AM
2932+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
2933+ struct dentry *h_root)
1facf9fc 2934+{
b752ccd1
AM
2935+ if (unlikely(h_adding == h_root
2936+ || au_test_loopback_overlap(sb, h_adding)))
1facf9fc 2937+ return 1;
b752ccd1
AM
2938+ if (h_adding->d_sb != h_root->d_sb)
2939+ return 0;
2940+ return au_test_subdir(h_adding, h_root)
2941+ || au_test_subdir(h_root, h_adding);
1facf9fc 2942+}
2943+
2944+/*
2945+ * returns a newly allocated branch. @new_nbranch is a number of branches
2946+ * after adding a branch.
2947+ */
2948+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
2949+ int perm)
2950+{
2951+ struct au_branch *add_branch;
2952+ struct dentry *root;
4a4d8108 2953+ int err;
1facf9fc 2954+
4a4d8108 2955+ err = -ENOMEM;
1facf9fc 2956+ root = sb->s_root;
7f2ca4b1 2957+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
1facf9fc 2958+ if (unlikely(!add_branch))
2959+ goto out;
2960+
027c5e7a
AM
2961+ err = au_hnotify_init_br(add_branch, perm);
2962+ if (unlikely(err))
2963+ goto out_br;
2964+
1facf9fc 2965+ if (au_br_writable(perm)) {
2966+ /* may be freed separately at changing the branch permission */
7f2ca4b1 2967+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
1facf9fc 2968+ GFP_NOFS);
2969+ if (unlikely(!add_branch->br_wbr))
027c5e7a 2970+ goto out_hnotify;
1facf9fc 2971+ }
2972+
076b876e
AM
2973+ if (au_br_fhsm(perm)) {
2974+ err = au_fhsm_br_alloc(add_branch);
2975+ if (unlikely(err))
2976+ goto out_wbr;
2977+ }
2978+
4a4d8108
AM
2979+ err = au_sbr_realloc(au_sbi(sb), new_nbranch);
2980+ if (!err)
2981+ err = au_di_realloc(au_di(root), new_nbranch);
2982+ if (!err)
2983+ err = au_ii_realloc(au_ii(root->d_inode), new_nbranch);
2984+ if (!err)
2985+ return add_branch; /* success */
1facf9fc 2986+
076b876e 2987+out_wbr:
1facf9fc 2988+ kfree(add_branch->br_wbr);
027c5e7a
AM
2989+out_hnotify:
2990+ au_hnotify_fin_br(add_branch);
4f0767ce 2991+out_br:
1facf9fc 2992+ kfree(add_branch);
4f0767ce 2993+out:
4a4d8108 2994+ return ERR_PTR(err);
1facf9fc 2995+}
2996+
2997+/*
2998+ * test if the branch permission is legal or not.
2999+ */
3000+static int test_br(struct inode *inode, int brperm, char *path)
3001+{
3002+ int err;
3003+
4a4d8108
AM
3004+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
3005+ if (!err)
3006+ goto out;
1facf9fc 3007+
4a4d8108
AM
3008+ err = -EINVAL;
3009+ pr_err("write permission for readonly mount or inode, %s\n", path);
3010+
4f0767ce 3011+out:
1facf9fc 3012+ return err;
3013+}
3014+
3015+/*
3016+ * returns:
3017+ * 0: success, the caller will add it
3018+ * plus: success, it is already unified, the caller should ignore it
3019+ * minus: error
3020+ */
3021+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
3022+{
3023+ int err;
3024+ aufs_bindex_t bend, bindex;
3025+ struct dentry *root;
3026+ struct inode *inode, *h_inode;
3027+
3028+ root = sb->s_root;
3029+ bend = au_sbend(sb);
3030+ if (unlikely(bend >= 0
3031+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
3032+ err = 1;
3033+ if (!remount) {
3034+ err = -EINVAL;
4a4d8108 3035+ pr_err("%s duplicated\n", add->pathname);
1facf9fc 3036+ }
3037+ goto out;
3038+ }
3039+
3040+ err = -ENOSPC; /* -E2BIG; */
3041+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
3042+ || AUFS_BRANCH_MAX - 1 <= bend)) {
4a4d8108 3043+ pr_err("number of branches exceeded %s\n", add->pathname);
1facf9fc 3044+ goto out;
3045+ }
3046+
3047+ err = -EDOM;
3048+ if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
4a4d8108 3049+ pr_err("bad index %d\n", add->bindex);
1facf9fc 3050+ goto out;
3051+ }
3052+
3053+ inode = add->path.dentry->d_inode;
3054+ err = -ENOENT;
3055+ if (unlikely(!inode->i_nlink)) {
4a4d8108 3056+ pr_err("no existence %s\n", add->pathname);
1facf9fc 3057+ goto out;
3058+ }
3059+
3060+ err = -EINVAL;
3061+ if (unlikely(inode->i_sb == sb)) {
4a4d8108 3062+ pr_err("%s must be outside\n", add->pathname);
1facf9fc 3063+ goto out;
3064+ }
3065+
3066+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
4a4d8108
AM
3067+ pr_err("unsupported filesystem, %s (%s)\n",
3068+ add->pathname, au_sbtype(inode->i_sb));
1facf9fc 3069+ goto out;
3070+ }
3071+
c1595e42
JR
3072+ if (unlikely(inode->i_sb->s_stack_depth)) {
3073+ pr_err("already stacked, %s (%s)\n",
3074+ add->pathname, au_sbtype(inode->i_sb));
3075+ goto out;
3076+ }
3077+
1facf9fc 3078+ err = test_br(add->path.dentry->d_inode, add->perm, add->pathname);
3079+ if (unlikely(err))
3080+ goto out;
3081+
3082+ if (bend < 0)
3083+ return 0; /* success */
3084+
3085+ err = -EINVAL;
3086+ for (bindex = 0; bindex <= bend; bindex++)
3087+ if (unlikely(test_overlap(sb, add->path.dentry,
3088+ au_h_dptr(root, bindex)))) {
4a4d8108 3089+ pr_err("%s is overlapped\n", add->pathname);
1facf9fc 3090+ goto out;
3091+ }
3092+
3093+ err = 0;
3094+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
3095+ h_inode = au_h_dptr(root, 0)->d_inode;
3096+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
0c3ec466
AM
3097+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3098+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3099+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3100+ add->pathname,
3101+ i_uid_read(inode), i_gid_read(inode),
3102+ (inode->i_mode & S_IALLUGO),
3103+ i_uid_read(h_inode), i_gid_read(h_inode),
3104+ (h_inode->i_mode & S_IALLUGO));
1facf9fc 3105+ }
3106+
4f0767ce 3107+out:
1facf9fc 3108+ return err;
3109+}
3110+
3111+/*
3112+ * initialize or clean the whiteouts for an adding branch
3113+ */
3114+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
86dc4139 3115+ int new_perm)
1facf9fc 3116+{
3117+ int err, old_perm;
3118+ aufs_bindex_t bindex;
3119+ struct mutex *h_mtx;
3120+ struct au_wbr *wbr;
3121+ struct au_hinode *hdir;
3122+
86dc4139
AM
3123+ err = vfsub_mnt_want_write(au_br_mnt(br));
3124+ if (unlikely(err))
3125+ goto out;
3126+
1facf9fc 3127+ wbr = br->br_wbr;
3128+ old_perm = br->br_perm;
3129+ br->br_perm = new_perm;
3130+ hdir = NULL;
3131+ h_mtx = NULL;
3132+ bindex = au_br_index(sb, br->br_id);
3133+ if (0 <= bindex) {
3134+ hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 3135+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 3136+ } else {
86dc4139 3137+ h_mtx = &au_br_dentry(br)->d_inode->i_mutex;
1facf9fc 3138+ mutex_lock_nested(h_mtx, AuLsc_I_PARENT);
3139+ }
3140+ if (!wbr)
86dc4139 3141+ err = au_wh_init(br, sb);
1facf9fc 3142+ else {
3143+ wbr_wh_write_lock(wbr);
86dc4139 3144+ err = au_wh_init(br, sb);
1facf9fc 3145+ wbr_wh_write_unlock(wbr);
3146+ }
3147+ if (hdir)
4a4d8108 3148+ au_hn_imtx_unlock(hdir);
1facf9fc 3149+ else
3150+ mutex_unlock(h_mtx);
86dc4139 3151+ vfsub_mnt_drop_write(au_br_mnt(br));
1facf9fc 3152+ br->br_perm = old_perm;
3153+
3154+ if (!err && wbr && !au_br_writable(new_perm)) {
3155+ kfree(wbr);
3156+ br->br_wbr = NULL;
3157+ }
3158+
86dc4139 3159+out:
1facf9fc 3160+ return err;
3161+}
3162+
3163+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
86dc4139 3164+ int perm)
1facf9fc 3165+{
3166+ int err;
4a4d8108 3167+ struct kstatfs kst;
1facf9fc 3168+ struct au_wbr *wbr;
3169+
3170+ wbr = br->br_wbr;
dece6358 3171+ au_rw_init(&wbr->wbr_wh_rwsem);
1facf9fc 3172+ atomic_set(&wbr->wbr_wh_running, 0);
1facf9fc 3173+
4a4d8108
AM
3174+ /*
3175+ * a limit for rmdir/rename a dir
523b37e3 3176+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
4a4d8108 3177+ */
86dc4139 3178+ err = vfs_statfs(&br->br_path, &kst);
4a4d8108
AM
3179+ if (unlikely(err))
3180+ goto out;
3181+ err = -EINVAL;
3182+ if (kst.f_namelen >= NAME_MAX)
86dc4139 3183+ err = au_br_init_wh(sb, br, perm);
4a4d8108 3184+ else
523b37e3
AM
3185+ pr_err("%pd(%s), unsupported namelen %ld\n",
3186+ au_br_dentry(br),
86dc4139 3187+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
1facf9fc 3188+
4f0767ce 3189+out:
1facf9fc 3190+ return err;
3191+}
3192+
c1595e42 3193+/* initialize a new branch */
1facf9fc 3194+static int au_br_init(struct au_branch *br, struct super_block *sb,
3195+ struct au_opt_add *add)
3196+{
3197+ int err;
3198+
3199+ err = 0;
1facf9fc 3200+ mutex_init(&br->br_xino.xi_nondir_mtx);
3201+ br->br_perm = add->perm;
86dc4139 3202+ br->br_path = add->path; /* set first, path_get() later */
4a4d8108 3203+ spin_lock_init(&br->br_dykey_lock);
1facf9fc 3204+ atomic_set(&br->br_count, 0);
1facf9fc 3205+ atomic_set(&br->br_xino_running, 0);
3206+ br->br_id = au_new_br_id(sb);
7f207e10 3207+ AuDebugOn(br->br_id < 0);
1facf9fc 3208+
3209+ if (au_br_writable(add->perm)) {
86dc4139 3210+ err = au_wbr_init(br, sb, add->perm);
1facf9fc 3211+ if (unlikely(err))
b752ccd1 3212+ goto out_err;
1facf9fc 3213+ }
3214+
3215+ if (au_opt_test(au_mntflags(sb), XINO)) {
3216+ err = au_xino_br(sb, br, add->path.dentry->d_inode->i_ino,
3217+ au_sbr(sb, 0)->br_xino.xi_file, /*do_test*/1);
3218+ if (unlikely(err)) {
3219+ AuDebugOn(br->br_xino.xi_file);
b752ccd1 3220+ goto out_err;
1facf9fc 3221+ }
3222+ }
3223+
3224+ sysaufs_br_init(br);
86dc4139 3225+ path_get(&br->br_path);
b752ccd1 3226+ goto out; /* success */
1facf9fc 3227+
4f0767ce 3228+out_err:
86dc4139 3229+ memset(&br->br_path, 0, sizeof(br->br_path));
4f0767ce 3230+out:
1facf9fc 3231+ return err;
3232+}
3233+
3234+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
3235+ struct au_branch *br, aufs_bindex_t bend,
3236+ aufs_bindex_t amount)
3237+{
3238+ struct au_branch **brp;
3239+
dece6358
AM
3240+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3241+
1facf9fc 3242+ brp = sbinfo->si_branch + bindex;
3243+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3244+ *brp = br;
3245+ sbinfo->si_bend++;
3246+ if (unlikely(bend < 0))
3247+ sbinfo->si_bend = 0;
3248+}
3249+
3250+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
3251+ aufs_bindex_t bend, aufs_bindex_t amount)
3252+{
3253+ struct au_hdentry *hdp;
3254+
1308ab2a 3255+ AuRwMustWriteLock(&dinfo->di_rwsem);
3256+
1facf9fc 3257+ hdp = dinfo->di_hdentry + bindex;
3258+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3259+ au_h_dentry_init(hdp);
3260+ dinfo->di_bend++;
3261+ if (unlikely(bend < 0))
3262+ dinfo->di_bstart = 0;
3263+}
3264+
3265+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
3266+ aufs_bindex_t bend, aufs_bindex_t amount)
3267+{
3268+ struct au_hinode *hip;
3269+
1308ab2a 3270+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3271+
1facf9fc 3272+ hip = iinfo->ii_hinode + bindex;
3273+ memmove(hip + 1, hip, sizeof(*hip) * amount);
3274+ hip->hi_inode = NULL;
4a4d8108 3275+ au_hn_init(hip);
1facf9fc 3276+ iinfo->ii_bend++;
3277+ if (unlikely(bend < 0))
3278+ iinfo->ii_bstart = 0;
3279+}
3280+
86dc4139
AM
3281+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3282+ aufs_bindex_t bindex)
1facf9fc 3283+{
86dc4139 3284+ struct dentry *root, *h_dentry;
1facf9fc 3285+ struct inode *root_inode;
3286+ aufs_bindex_t bend, amount;
3287+
3288+ root = sb->s_root;
3289+ root_inode = root->d_inode;
1facf9fc 3290+ bend = au_sbend(sb);
3291+ amount = bend + 1 - bindex;
86dc4139 3292+ h_dentry = au_br_dentry(br);
53392da6 3293+ au_sbilist_lock();
1facf9fc 3294+ au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
3295+ au_br_do_add_hdp(au_di(root), bindex, bend, amount);
3296+ au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
3297+ au_set_h_dptr(root, bindex, dget(h_dentry));
3298+ au_set_h_iptr(root_inode, bindex, au_igrab(h_dentry->d_inode),
3299+ /*flags*/0);
53392da6 3300+ au_sbilist_unlock();
1facf9fc 3301+}
3302+
3303+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3304+{
3305+ int err;
1facf9fc 3306+ aufs_bindex_t bend, add_bindex;
3307+ struct dentry *root, *h_dentry;
3308+ struct inode *root_inode;
3309+ struct au_branch *add_branch;
3310+
3311+ root = sb->s_root;
3312+ root_inode = root->d_inode;
3313+ IMustLock(root_inode);
3314+ err = test_add(sb, add, remount);
3315+ if (unlikely(err < 0))
3316+ goto out;
3317+ if (err) {
3318+ err = 0;
3319+ goto out; /* success */
3320+ }
3321+
3322+ bend = au_sbend(sb);
3323+ add_branch = au_br_alloc(sb, bend + 2, add->perm);
3324+ err = PTR_ERR(add_branch);
3325+ if (IS_ERR(add_branch))
3326+ goto out;
3327+
3328+ err = au_br_init(add_branch, sb, add);
3329+ if (unlikely(err)) {
3330+ au_br_do_free(add_branch);
3331+ goto out;
3332+ }
3333+
3334+ add_bindex = add->bindex;
1facf9fc 3335+ if (!remount)
86dc4139 3336+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3337+ else {
3338+ sysaufs_brs_del(sb, add_bindex);
86dc4139 3339+ au_br_do_add(sb, add_branch, add_bindex);
1facf9fc 3340+ sysaufs_brs_add(sb, add_bindex);
3341+ }
3342+
86dc4139 3343+ h_dentry = add->path.dentry;
1308ab2a 3344+ if (!add_bindex) {
1facf9fc 3345+ au_cpup_attr_all(root_inode, /*force*/1);
1308ab2a 3346+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3347+ } else
1facf9fc 3348+ au_add_nlink(root_inode, h_dentry->d_inode);
1facf9fc 3349+
3350+ /*
4a4d8108 3351+ * this test/set prevents aufs from handling unnecesary notify events
027c5e7a 3352+ * of xino files, in case of re-adding a writable branch which was
1facf9fc 3353+ * once detached from aufs.
3354+ */
3355+ if (au_xino_brid(sb) < 0
3356+ && au_br_writable(add_branch->br_perm)
3357+ && !au_test_fs_bad_xino(h_dentry->d_sb)
3358+ && add_branch->br_xino.xi_file
3359+ && add_branch->br_xino.xi_file->f_dentry->d_parent == h_dentry)
3360+ au_xino_brid_set(sb, add_branch->br_id);
3361+
4f0767ce 3362+out:
1facf9fc 3363+ return err;
3364+}
3365+
3366+/* ---------------------------------------------------------------------- */
3367+
076b876e
AM
3368+static unsigned long long au_farray_cb(void *a,
3369+ unsigned long long max __maybe_unused,
3370+ void *arg)
3371+{
3372+ unsigned long long n;
3373+ struct file **p, *f;
3374+ struct au_sphlhead *files;
3375+ struct au_finfo *finfo;
3376+ struct super_block *sb = arg;
3377+
3378+ n = 0;
3379+ p = a;
3380+ files = &au_sbi(sb)->si_files;
3381+ spin_lock(&files->spin);
3382+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
3383+ f = finfo->fi_file;
3384+ if (file_count(f)
3385+ && !special_file(file_inode(f)->i_mode)) {
3386+ get_file(f);
3387+ *p++ = f;
3388+ n++;
3389+ AuDebugOn(n > max);
3390+ }
3391+ }
3392+ spin_unlock(&files->spin);
3393+
3394+ return n;
3395+}
3396+
3397+static struct file **au_farray_alloc(struct super_block *sb,
3398+ unsigned long long *max)
3399+{
3400+ *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
3401+ return au_array_alloc(max, au_farray_cb, sb);
3402+}
3403+
3404+static void au_farray_free(struct file **a, unsigned long long max)
3405+{
3406+ unsigned long long ull;
3407+
3408+ for (ull = 0; ull < max; ull++)
3409+ if (a[ull])
3410+ fput(a[ull]);
7f2ca4b1 3411+ kvfree(a);
076b876e
AM
3412+}
3413+
3414+/* ---------------------------------------------------------------------- */
3415+
1facf9fc 3416+/*
3417+ * delete a branch
3418+ */
3419+
3420+/* to show the line number, do not make it inlined function */
4a4d8108 3421+#define AuVerbose(do_info, fmt, ...) do { \
1facf9fc 3422+ if (do_info) \
4a4d8108 3423+ pr_info(fmt, ##__VA_ARGS__); \
1facf9fc 3424+} while (0)
3425+
027c5e7a
AM
3426+static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
3427+ aufs_bindex_t bend)
3428+{
3429+ return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
3430+}
3431+
3432+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
3433+ aufs_bindex_t bend)
3434+{
3435+ return au_test_ibusy(dentry->d_inode, bstart, bend);
3436+}
3437+
1facf9fc 3438+/*
3439+ * test if the branch is deletable or not.
3440+ */
3441+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
b752ccd1 3442+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3443+{
3444+ int err, i, j, ndentry;
3445+ aufs_bindex_t bstart, bend;
1facf9fc 3446+ struct au_dcsub_pages dpages;
3447+ struct au_dpage *dpage;
3448+ struct dentry *d;
1facf9fc 3449+
3450+ err = au_dpages_init(&dpages, GFP_NOFS);
3451+ if (unlikely(err))
3452+ goto out;
3453+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3454+ if (unlikely(err))
3455+ goto out_dpages;
3456+
1facf9fc 3457+ for (i = 0; !err && i < dpages.ndpage; i++) {
3458+ dpage = dpages.dpages + i;
3459+ ndentry = dpage->ndentry;
3460+ for (j = 0; !err && j < ndentry; j++) {
3461+ d = dpage->dentries[j];
c1595e42 3462+ AuDebugOn(au_dcount(d) <= 0);
027c5e7a 3463+ if (!au_digen_test(d, sigen)) {
1facf9fc 3464+ di_read_lock_child(d, AuLock_IR);
027c5e7a
AM
3465+ if (unlikely(au_dbrange_test(d))) {
3466+ di_read_unlock(d, AuLock_IR);
3467+ continue;
3468+ }
3469+ } else {
1facf9fc 3470+ di_write_lock_child(d);
027c5e7a
AM
3471+ if (unlikely(au_dbrange_test(d))) {
3472+ di_write_unlock(d);
3473+ continue;
3474+ }
1facf9fc 3475+ err = au_reval_dpath(d, sigen);
3476+ if (!err)
3477+ di_downgrade_lock(d, AuLock_IR);
3478+ else {
3479+ di_write_unlock(d);
3480+ break;
3481+ }
3482+ }
3483+
027c5e7a 3484+ /* AuDbgDentry(d); */
1facf9fc 3485+ bstart = au_dbstart(d);
3486+ bend = au_dbend(d);
3487+ if (bstart <= bindex
3488+ && bindex <= bend
3489+ && au_h_dptr(d, bindex)
027c5e7a 3490+ && au_test_dbusy(d, bstart, bend)) {
1facf9fc 3491+ err = -EBUSY;
523b37e3 3492+ AuVerbose(verbose, "busy %pd\n", d);
027c5e7a 3493+ AuDbgDentry(d);
1facf9fc 3494+ }
3495+ di_read_unlock(d, AuLock_IR);
3496+ }
3497+ }
3498+
4f0767ce 3499+out_dpages:
1facf9fc 3500+ au_dpages_free(&dpages);
4f0767ce 3501+out:
1facf9fc 3502+ return err;
3503+}
3504+
3505+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
b752ccd1 3506+ unsigned int sigen, const unsigned int verbose)
1facf9fc 3507+{
3508+ int err;
7f207e10
AM
3509+ unsigned long long max, ull;
3510+ struct inode *i, **array;
1facf9fc 3511+ aufs_bindex_t bstart, bend;
1facf9fc 3512+
7f207e10
AM
3513+ array = au_iarray_alloc(sb, &max);
3514+ err = PTR_ERR(array);
3515+ if (IS_ERR(array))
3516+ goto out;
3517+
1facf9fc 3518+ err = 0;
7f207e10
AM
3519+ AuDbg("b%d\n", bindex);
3520+ for (ull = 0; !err && ull < max; ull++) {
3521+ i = array[ull];
076b876e
AM
3522+ if (unlikely(!i))
3523+ break;
7f207e10 3524+ if (i->i_ino == AUFS_ROOT_INO)
1facf9fc 3525+ continue;
3526+
7f207e10 3527+ /* AuDbgInode(i); */
537831f9 3528+ if (au_iigen(i, NULL) == sigen)
1facf9fc 3529+ ii_read_lock_child(i);
3530+ else {
3531+ ii_write_lock_child(i);
027c5e7a
AM
3532+ err = au_refresh_hinode_self(i);
3533+ au_iigen_dec(i);
1facf9fc 3534+ if (!err)
3535+ ii_downgrade_lock(i);
3536+ else {
3537+ ii_write_unlock(i);
3538+ break;
3539+ }
3540+ }
3541+
3542+ bstart = au_ibstart(i);
3543+ bend = au_ibend(i);
3544+ if (bstart <= bindex
3545+ && bindex <= bend
3546+ && au_h_iptr(i, bindex)
027c5e7a 3547+ && au_test_ibusy(i, bstart, bend)) {
1facf9fc 3548+ err = -EBUSY;
3549+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
7f207e10 3550+ AuDbgInode(i);
1facf9fc 3551+ }
3552+ ii_read_unlock(i);
3553+ }
7f207e10 3554+ au_iarray_free(array, max);
1facf9fc 3555+
7f207e10 3556+out:
1facf9fc 3557+ return err;
3558+}
3559+
b752ccd1
AM
3560+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3561+ const unsigned int verbose)
1facf9fc 3562+{
3563+ int err;
3564+ unsigned int sigen;
3565+
3566+ sigen = au_sigen(root->d_sb);
3567+ DiMustNoWaiters(root);
3568+ IiMustNoWaiters(root->d_inode);
3569+ di_write_unlock(root);
b752ccd1 3570+ err = test_dentry_busy(root, bindex, sigen, verbose);
1facf9fc 3571+ if (!err)
b752ccd1 3572+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
1facf9fc 3573+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3574+
3575+ return err;
3576+}
3577+
076b876e
AM
3578+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3579+ struct file **to_free, int *idx)
3580+{
3581+ int err;
c1595e42 3582+ unsigned char matched, root;
076b876e
AM
3583+ aufs_bindex_t bindex, bend;
3584+ struct au_fidir *fidir;
3585+ struct au_hfile *hfile;
3586+
3587+ err = 0;
c1595e42
JR
3588+ root = IS_ROOT(file->f_dentry);
3589+ if (root) {
3590+ get_file(file);
3591+ to_free[*idx] = file;
3592+ (*idx)++;
3593+ goto out;
3594+ }
3595+
076b876e 3596+ matched = 0;
076b876e
AM
3597+ fidir = au_fi(file)->fi_hdir;
3598+ AuDebugOn(!fidir);
3599+ bend = au_fbend_dir(file);
3600+ for (bindex = au_fbstart(file); bindex <= bend; bindex++) {
3601+ hfile = fidir->fd_hfile + bindex;
3602+ if (!hfile->hf_file)
3603+ continue;
3604+
c1595e42 3605+ if (hfile->hf_br->br_id == br_id) {
076b876e 3606+ matched = 1;
076b876e 3607+ break;
c1595e42 3608+ }
076b876e 3609+ }
c1595e42 3610+ if (matched)
076b876e
AM
3611+ err = -EBUSY;
3612+
3613+out:
3614+ return err;
3615+}
3616+
3617+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3618+ struct file **to_free, int opened)
3619+{
3620+ int err, idx;
3621+ unsigned long long ull, max;
3622+ aufs_bindex_t bstart;
3623+ struct file *file, **array;
076b876e
AM
3624+ struct dentry *root;
3625+ struct au_hfile *hfile;
3626+
3627+ array = au_farray_alloc(sb, &max);
3628+ err = PTR_ERR(array);
3629+ if (IS_ERR(array))
3630+ goto out;
3631+
3632+ err = 0;
3633+ idx = 0;
3634+ root = sb->s_root;
3635+ di_write_unlock(root);
3636+ for (ull = 0; ull < max; ull++) {
3637+ file = array[ull];
3638+ if (unlikely(!file))
3639+ break;
3640+
3641+ /* AuDbg("%pD\n", file); */
3642+ fi_read_lock(file);
3643+ bstart = au_fbstart(file);
7f2ca4b1 3644+ if (!d_is_dir(file->f_path.dentry)) {
076b876e
AM
3645+ hfile = &au_fi(file)->fi_htop;
3646+ if (hfile->hf_br->br_id == br_id)
3647+ err = -EBUSY;
3648+ } else
3649+ err = test_dir_busy(file, br_id, to_free, &idx);
3650+ fi_read_unlock(file);
3651+ if (unlikely(err))
3652+ break;
3653+ }
3654+ di_write_lock_child(root);
3655+ au_farray_free(array, max);
3656+ AuDebugOn(idx > opened);
3657+
3658+out:
3659+ return err;
3660+}
3661+
3662+static void br_del_file(struct file **to_free, unsigned long long opened,
3663+ aufs_bindex_t br_id)
3664+{
3665+ unsigned long long ull;
3666+ aufs_bindex_t bindex, bstart, bend, bfound;
3667+ struct file *file;
3668+ struct au_fidir *fidir;
3669+ struct au_hfile *hfile;
3670+
3671+ for (ull = 0; ull < opened; ull++) {
3672+ file = to_free[ull];
3673+ if (unlikely(!file))
3674+ break;
3675+
3676+ /* AuDbg("%pD\n", file); */
7f2ca4b1 3677+ AuDebugOn(!d_is_dir(file->f_path.dentry));
076b876e
AM
3678+ bfound = -1;
3679+ fidir = au_fi(file)->fi_hdir;
3680+ AuDebugOn(!fidir);
3681+ fi_write_lock(file);
3682+ bstart = au_fbstart(file);
3683+ bend = au_fbend_dir(file);
3684+ for (bindex = bstart; bindex <= bend; bindex++) {
3685+ hfile = fidir->fd_hfile + bindex;
3686+ if (!hfile->hf_file)
3687+ continue;
3688+
3689+ if (hfile->hf_br->br_id == br_id) {
3690+ bfound = bindex;
3691+ break;
3692+ }
3693+ }
3694+ AuDebugOn(bfound < 0);
3695+ au_set_h_fptr(file, bfound, NULL);
3696+ if (bfound == bstart) {
3697+ for (bstart++; bstart <= bend; bstart++)
3698+ if (au_hf_dir(file, bstart)) {
3699+ au_set_fbstart(file, bstart);
3700+ break;
3701+ }
3702+ }
3703+ fi_write_unlock(file);
3704+ }
3705+}
3706+
1facf9fc 3707+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3708+ const aufs_bindex_t bindex,
3709+ const aufs_bindex_t bend)
3710+{
3711+ struct au_branch **brp, **p;
3712+
dece6358
AM
3713+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3714+
1facf9fc 3715+ brp = sbinfo->si_branch + bindex;
3716+ if (bindex < bend)
3717+ memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
3718+ sbinfo->si_branch[0 + bend] = NULL;
3719+ sbinfo->si_bend--;
3720+
53392da6 3721+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3722+ if (p)
3723+ sbinfo->si_branch = p;
4a4d8108 3724+ /* harmless error */
1facf9fc 3725+}
3726+
3727+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
3728+ const aufs_bindex_t bend)
3729+{
3730+ struct au_hdentry *hdp, *p;
3731+
1308ab2a 3732+ AuRwMustWriteLock(&dinfo->di_rwsem);
3733+
4a4d8108 3734+ hdp = dinfo->di_hdentry;
1facf9fc 3735+ if (bindex < bend)
4a4d8108
AM
3736+ memmove(hdp + bindex, hdp + bindex + 1,
3737+ sizeof(*hdp) * (bend - bindex));
3738+ hdp[0 + bend].hd_dentry = NULL;
1facf9fc 3739+ dinfo->di_bend--;
3740+
53392da6 3741+ p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3742+ if (p)
3743+ dinfo->di_hdentry = p;
4a4d8108 3744+ /* harmless error */
1facf9fc 3745+}
3746+
3747+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
3748+ const aufs_bindex_t bend)
3749+{
3750+ struct au_hinode *hip, *p;
3751+
1308ab2a 3752+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3753+
1facf9fc 3754+ hip = iinfo->ii_hinode + bindex;
3755+ if (bindex < bend)
3756+ memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
3757+ iinfo->ii_hinode[0 + bend].hi_inode = NULL;
4a4d8108 3758+ au_hn_init(iinfo->ii_hinode + bend);
1facf9fc 3759+ iinfo->ii_bend--;
3760+
53392da6 3761+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
1facf9fc 3762+ if (p)
3763+ iinfo->ii_hinode = p;
4a4d8108 3764+ /* harmless error */
1facf9fc 3765+}
3766+
3767+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3768+ struct au_branch *br)
3769+{
3770+ aufs_bindex_t bend;
3771+ struct au_sbinfo *sbinfo;
53392da6
AM
3772+ struct dentry *root, *h_root;
3773+ struct inode *inode, *h_inode;
3774+ struct au_hinode *hinode;
1facf9fc 3775+
dece6358
AM
3776+ SiMustWriteLock(sb);
3777+
1facf9fc 3778+ root = sb->s_root;
3779+ inode = root->d_inode;
1facf9fc 3780+ sbinfo = au_sbi(sb);
3781+ bend = sbinfo->si_bend;
3782+
53392da6
AM
3783+ h_root = au_h_dptr(root, bindex);
3784+ hinode = au_hi(inode, bindex);
3785+ h_inode = au_igrab(hinode->hi_inode);
3786+ au_hiput(hinode);
1facf9fc 3787+
53392da6 3788+ au_sbilist_lock();
1facf9fc 3789+ au_br_do_del_brp(sbinfo, bindex, bend);
3790+ au_br_do_del_hdp(au_di(root), bindex, bend);
3791+ au_br_do_del_hip(au_ii(inode), bindex, bend);
53392da6
AM
3792+ au_sbilist_unlock();
3793+
3794+ dput(h_root);
3795+ iput(h_inode);
3796+ au_br_do_free(br);
1facf9fc 3797+}
3798+
076b876e
AM
3799+static unsigned long long empty_cb(void *array, unsigned long long max,
3800+ void *arg)
3801+{
3802+ return max;
3803+}
3804+
1facf9fc 3805+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3806+{
3807+ int err, rerr, i;
076b876e 3808+ unsigned long long opened;
1facf9fc 3809+ unsigned int mnt_flags;
3810+ aufs_bindex_t bindex, bend, br_id;
3811+ unsigned char do_wh, verbose;
3812+ struct au_branch *br;
3813+ struct au_wbr *wbr;
076b876e
AM
3814+ struct dentry *root;
3815+ struct file **to_free;
1facf9fc 3816+
3817+ err = 0;
076b876e
AM
3818+ opened = 0;
3819+ to_free = NULL;
3820+ root = sb->s_root;
3821+ bindex = au_find_dbindex(root, del->h_path.dentry);
1facf9fc 3822+ if (bindex < 0) {
3823+ if (remount)
3824+ goto out; /* success */
3825+ err = -ENOENT;
4a4d8108 3826+ pr_err("%s no such branch\n", del->pathname);
1facf9fc 3827+ goto out;
3828+ }
3829+ AuDbg("bindex b%d\n", bindex);
3830+
3831+ err = -EBUSY;
3832+ mnt_flags = au_mntflags(sb);
3833+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3834+ bend = au_sbend(sb);
3835+ if (unlikely(!bend)) {
3836+ AuVerbose(verbose, "no more branches left\n");
3837+ goto out;
3838+ }
3839+ br = au_sbr(sb, bindex);
86dc4139 3840+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
076b876e
AM
3841+
3842+ br_id = br->br_id;
3843+ opened = atomic_read(&br->br_count);
3844+ if (unlikely(opened)) {
3845+ to_free = au_array_alloc(&opened, empty_cb, NULL);
3846+ err = PTR_ERR(to_free);
3847+ if (IS_ERR(to_free))
3848+ goto out;
3849+
3850+ err = test_file_busy(sb, br_id, to_free, opened);
3851+ if (unlikely(err)) {
3852+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3853+ goto out;
3854+ }
1facf9fc 3855+ }
3856+
3857+ wbr = br->br_wbr;
3858+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3859+ if (do_wh) {
1308ab2a 3860+ /* instead of WbrWhMustWriteLock(wbr) */
3861+ SiMustWriteLock(sb);
1facf9fc 3862+ for (i = 0; i < AuBrWh_Last; i++) {
3863+ dput(wbr->wbr_wh[i]);
3864+ wbr->wbr_wh[i] = NULL;
3865+ }
3866+ }
3867+
076b876e 3868+ err = test_children_busy(root, bindex, verbose);
1facf9fc 3869+ if (unlikely(err)) {
3870+ if (do_wh)
3871+ goto out_wh;
3872+ goto out;
3873+ }
3874+
3875+ err = 0;
076b876e
AM
3876+ if (to_free) {
3877+ /*
3878+ * now we confirmed the branch is deletable.
3879+ * let's free the remaining opened dirs on the branch.
3880+ */
3881+ di_write_unlock(root);
3882+ br_del_file(to_free, opened, br_id);
3883+ di_write_lock_child(root);
3884+ }
3885+
1facf9fc 3886+ if (!remount)
3887+ au_br_do_del(sb, bindex, br);
3888+ else {
3889+ sysaufs_brs_del(sb, bindex);
3890+ au_br_do_del(sb, bindex, br);
3891+ sysaufs_brs_add(sb, bindex);
3892+ }
3893+
1308ab2a 3894+ if (!bindex) {
076b876e 3895+ au_cpup_attr_all(root->d_inode, /*force*/1);
1308ab2a 3896+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
3897+ } else
076b876e 3898+ au_sub_nlink(root->d_inode, del->h_path.dentry->d_inode);
1facf9fc 3899+ if (au_opt_test(mnt_flags, PLINK))
3900+ au_plink_half_refresh(sb, br_id);
3901+
b752ccd1 3902+ if (au_xino_brid(sb) == br_id)
1facf9fc 3903+ au_xino_brid_set(sb, -1);
3904+ goto out; /* success */
3905+
4f0767ce 3906+out_wh:
1facf9fc 3907+ /* revert */
86dc4139 3908+ rerr = au_br_init_wh(sb, br, br->br_perm);
1facf9fc 3909+ if (rerr)
0c3ec466
AM
3910+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
3911+ del->pathname, rerr);
4f0767ce 3912+out:
076b876e
AM
3913+ if (to_free)
3914+ au_farray_free(to_free, opened);
1facf9fc 3915+ return err;
3916+}
3917+
3918+/* ---------------------------------------------------------------------- */
3919+
027c5e7a
AM
3920+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
3921+{
3922+ int err;
3923+ aufs_bindex_t bstart, bend;
3924+ struct aufs_ibusy ibusy;
3925+ struct inode *inode, *h_inode;
3926+
3927+ err = -EPERM;
3928+ if (unlikely(!capable(CAP_SYS_ADMIN)))
3929+ goto out;
3930+
3931+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
3932+ if (!err)
3933+ err = !access_ok(VERIFY_WRITE, &arg->h_ino, sizeof(arg->h_ino));
3934+ if (unlikely(err)) {
3935+ err = -EFAULT;
3936+ AuTraceErr(err);
3937+ goto out;
3938+ }
3939+
3940+ err = -EINVAL;
3941+ si_read_lock(sb, AuLock_FLUSH);
3942+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
3943+ goto out_unlock;
3944+
3945+ err = 0;
3946+ ibusy.h_ino = 0; /* invalid */
3947+ inode = ilookup(sb, ibusy.ino);
3948+ if (!inode
3949+ || inode->i_ino == AUFS_ROOT_INO
3950+ || is_bad_inode(inode))
3951+ goto out_unlock;
3952+
3953+ ii_read_lock_child(inode);
3954+ bstart = au_ibstart(inode);
3955+ bend = au_ibend(inode);
3956+ if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
3957+ h_inode = au_h_iptr(inode, ibusy.bindex);
3958+ if (h_inode && au_test_ibusy(inode, bstart, bend))
3959+ ibusy.h_ino = h_inode->i_ino;
3960+ }
3961+ ii_read_unlock(inode);
3962+ iput(inode);
3963+
3964+out_unlock:
3965+ si_read_unlock(sb);
3966+ if (!err) {
3967+ err = __put_user(ibusy.h_ino, &arg->h_ino);
3968+ if (unlikely(err)) {
3969+ err = -EFAULT;
3970+ AuTraceErr(err);
3971+ }
3972+ }
3973+out:
3974+ return err;
3975+}
3976+
3977+long au_ibusy_ioctl(struct file *file, unsigned long arg)
3978+{
3979+ return au_ibusy(file->f_dentry->d_sb, (void __user *)arg);
3980+}
3981+
3982+#ifdef CONFIG_COMPAT
3983+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
3984+{
3985+ return au_ibusy(file->f_dentry->d_sb, compat_ptr(arg));
3986+}
3987+#endif
3988+
3989+/* ---------------------------------------------------------------------- */
3990+
1facf9fc 3991+/*
3992+ * change a branch permission
3993+ */
3994+
dece6358
AM
3995+static void au_warn_ima(void)
3996+{
3997+#ifdef CONFIG_IMA
1308ab2a 3998+ /* since it doesn't support mark_files_ro() */
027c5e7a 3999+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
dece6358
AM
4000+#endif
4001+}
4002+
1facf9fc 4003+static int do_need_sigen_inc(int a, int b)
4004+{
4005+ return au_br_whable(a) && !au_br_whable(b);
4006+}
4007+
4008+static int need_sigen_inc(int old, int new)
4009+{
4010+ return do_need_sigen_inc(old, new)
4011+ || do_need_sigen_inc(new, old);
4012+}
4013+
4014+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
4015+{
7f207e10 4016+ int err, do_warn;
027c5e7a 4017+ unsigned int mnt_flags;
7f207e10 4018+ unsigned long long ull, max;
e49829fe 4019+ aufs_bindex_t br_id;
38d290e6 4020+ unsigned char verbose, writer;
7f207e10 4021+ struct file *file, *hf, **array;
e49829fe
JR
4022+ struct inode *inode;
4023+ struct au_hfile *hfile;
1facf9fc 4024+
027c5e7a
AM
4025+ mnt_flags = au_mntflags(sb);
4026+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4027+
7f207e10
AM
4028+ array = au_farray_alloc(sb, &max);
4029+ err = PTR_ERR(array);
4030+ if (IS_ERR(array))
1facf9fc 4031+ goto out;
4032+
7f207e10 4033+ do_warn = 0;
e49829fe 4034+ br_id = au_sbr_id(sb, bindex);
7f207e10
AM
4035+ for (ull = 0; ull < max; ull++) {
4036+ file = array[ull];
076b876e
AM
4037+ if (unlikely(!file))
4038+ break;
1facf9fc 4039+
523b37e3 4040+ /* AuDbg("%pD\n", file); */
1facf9fc 4041+ fi_read_lock(file);
4042+ if (unlikely(au_test_mmapped(file))) {
4043+ err = -EBUSY;
523b37e3 4044+ AuVerbose(verbose, "mmapped %pD\n", file);
7f207e10 4045+ AuDbgFile(file);
1facf9fc 4046+ FiMustNoWaiters(file);
4047+ fi_read_unlock(file);
7f207e10 4048+ goto out_array;
1facf9fc 4049+ }
4050+
c06a8ce3 4051+ inode = file_inode(file);
e49829fe
JR
4052+ hfile = &au_fi(file)->fi_htop;
4053+ hf = hfile->hf_file;
4054+ if (!S_ISREG(inode->i_mode)
1facf9fc 4055+ || !(file->f_mode & FMODE_WRITE)
e49829fe 4056+ || hfile->hf_br->br_id != br_id
7f207e10
AM
4057+ || !(hf->f_mode & FMODE_WRITE))
4058+ array[ull] = NULL;
4059+ else {
4060+ do_warn = 1;
4061+ get_file(file);
1facf9fc 4062+ }
4063+
1facf9fc 4064+ FiMustNoWaiters(file);
4065+ fi_read_unlock(file);
7f207e10
AM
4066+ fput(file);
4067+ }
1facf9fc 4068+
4069+ err = 0;
7f207e10 4070+ if (do_warn)
dece6358 4071+ au_warn_ima();
7f207e10
AM
4072+
4073+ for (ull = 0; ull < max; ull++) {
4074+ file = array[ull];
4075+ if (!file)
4076+ continue;
4077+
1facf9fc 4078+ /* todo: already flushed? */
523b37e3
AM
4079+ /*
4080+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4081+ * approach which resets f_mode and calls mnt_drop_write() and
4082+ * file_release_write() for each file, because the branch
4083+ * attribute in aufs world is totally different from the native
4084+ * fs rw/ro mode.
4085+ */
7f207e10
AM
4086+ /* fi_read_lock(file); */
4087+ hfile = &au_fi(file)->fi_htop;
4088+ hf = hfile->hf_file;
4089+ /* fi_read_unlock(file); */
027c5e7a 4090+ spin_lock(&hf->f_lock);
38d290e6
JR
4091+ writer = !!(hf->f_mode & FMODE_WRITER);
4092+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
027c5e7a 4093+ spin_unlock(&hf->f_lock);
38d290e6
JR
4094+ if (writer) {
4095+ put_write_access(file_inode(hf));
c06a8ce3 4096+ __mnt_drop_write(hf->f_path.mnt);
1facf9fc 4097+ }
4098+ }
4099+
7f207e10
AM
4100+out_array:
4101+ au_farray_free(array, max);
4f0767ce 4102+out:
7f207e10 4103+ AuTraceErr(err);
1facf9fc 4104+ return err;
4105+}
4106+
4107+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4108+ int *do_refresh)
1facf9fc 4109+{
4110+ int err, rerr;
4111+ aufs_bindex_t bindex;
4112+ struct dentry *root;
4113+ struct au_branch *br;
076b876e 4114+ struct au_br_fhsm *bf;
1facf9fc 4115+
4116+ root = sb->s_root;
1facf9fc 4117+ bindex = au_find_dbindex(root, mod->h_root);
4118+ if (bindex < 0) {
4119+ if (remount)
4120+ return 0; /* success */
4121+ err = -ENOENT;
4a4d8108 4122+ pr_err("%s no such branch\n", mod->path);
1facf9fc 4123+ goto out;
4124+ }
4125+ AuDbg("bindex b%d\n", bindex);
4126+
4127+ err = test_br(mod->h_root->d_inode, mod->perm, mod->path);
4128+ if (unlikely(err))
4129+ goto out;
4130+
4131+ br = au_sbr(sb, bindex);
86dc4139 4132+ AuDebugOn(mod->h_root != au_br_dentry(br));
1facf9fc 4133+ if (br->br_perm == mod->perm)
4134+ return 0; /* success */
4135+
076b876e
AM
4136+ /* pre-allocate for non-fhsm --> fhsm */
4137+ bf = NULL;
4138+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4139+ err = au_fhsm_br_alloc(br);
4140+ if (unlikely(err))
4141+ goto out;
4142+ bf = br->br_fhsm;
4143+ br->br_fhsm = NULL;
4144+ }
4145+
1facf9fc 4146+ if (au_br_writable(br->br_perm)) {
4147+ /* remove whiteout base */
86dc4139 4148+ err = au_br_init_wh(sb, br, mod->perm);
1facf9fc 4149+ if (unlikely(err))
076b876e 4150+ goto out_bf;
1facf9fc 4151+
4152+ if (!au_br_writable(mod->perm)) {
4153+ /* rw --> ro, file might be mmapped */
4154+ DiMustNoWaiters(root);
4155+ IiMustNoWaiters(root->d_inode);
4156+ di_write_unlock(root);
4157+ err = au_br_mod_files_ro(sb, bindex);
4158+ /* aufs_write_lock() calls ..._child() */
4159+ di_write_lock_child(root);
4160+
4161+ if (unlikely(err)) {
4162+ rerr = -ENOMEM;
7f2ca4b1 4163+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
1facf9fc 4164+ GFP_NOFS);
86dc4139
AM
4165+ if (br->br_wbr)
4166+ rerr = au_wbr_init(br, sb, br->br_perm);
1facf9fc 4167+ if (unlikely(rerr)) {
4168+ AuIOErr("nested error %d (%d)\n",
4169+ rerr, err);
4170+ br->br_perm = mod->perm;
4171+ }
4172+ }
4173+ }
4174+ } else if (au_br_writable(mod->perm)) {
4175+ /* ro --> rw */
4176+ err = -ENOMEM;
7f2ca4b1 4177+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
1facf9fc 4178+ if (br->br_wbr) {
86dc4139 4179+ err = au_wbr_init(br, sb, mod->perm);
1facf9fc 4180+ if (unlikely(err)) {
4181+ kfree(br->br_wbr);
4182+ br->br_wbr = NULL;
4183+ }
4184+ }
4185+ }
076b876e
AM
4186+ if (unlikely(err))
4187+ goto out_bf;
4188+
4189+ if (au_br_fhsm(br->br_perm)) {
4190+ if (!au_br_fhsm(mod->perm)) {
4191+ /* fhsm --> non-fhsm */
4192+ au_br_fhsm_fin(br->br_fhsm);
4193+ kfree(br->br_fhsm);
4194+ br->br_fhsm = NULL;
4195+ }
4196+ } else if (au_br_fhsm(mod->perm))
4197+ /* non-fhsm --> fhsm */
4198+ br->br_fhsm = bf;
4199+
076b876e
AM
4200+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4201+ br->br_perm = mod->perm;
4202+ goto out; /* success */
1facf9fc 4203+
076b876e
AM
4204+out_bf:
4205+ kfree(bf);
4206+out:
4207+ AuTraceErr(err);
4208+ return err;
4209+}
4210+
4211+/* ---------------------------------------------------------------------- */
4212+
4213+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4214+{
4215+ int err;
4216+ struct kstatfs kstfs;
4217+
4218+ err = vfs_statfs(&br->br_path, &kstfs);
1facf9fc 4219+ if (!err) {
076b876e
AM
4220+ stfs->f_blocks = kstfs.f_blocks;
4221+ stfs->f_bavail = kstfs.f_bavail;
4222+ stfs->f_files = kstfs.f_files;
4223+ stfs->f_ffree = kstfs.f_ffree;
1facf9fc 4224+ }
4225+
1facf9fc 4226+ return err;
4227+}
7f207e10
AM
4228diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4229--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
4230+++ linux/fs/aufs/branch.h 2016-02-28 11:27:01.277245613 +0100
4231@@ -0,0 +1,279 @@
1facf9fc 4232+/*
7f2ca4b1 4233+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4234+ *
4235+ * This program, aufs is free software; you can redistribute it and/or modify
4236+ * it under the terms of the GNU General Public License as published by
4237+ * the Free Software Foundation; either version 2 of the License, or
4238+ * (at your option) any later version.
dece6358
AM
4239+ *
4240+ * This program is distributed in the hope that it will be useful,
4241+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4242+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4243+ * GNU General Public License for more details.
4244+ *
4245+ * You should have received a copy of the GNU General Public License
523b37e3 4246+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4247+ */
4248+
4249+/*
4250+ * branch filesystems and xino for them
4251+ */
4252+
4253+#ifndef __AUFS_BRANCH_H__
4254+#define __AUFS_BRANCH_H__
4255+
4256+#ifdef __KERNEL__
4257+
1facf9fc 4258+#include <linux/mount.h>
4a4d8108 4259+#include "dynop.h"
1facf9fc 4260+#include "rwsem.h"
4261+#include "super.h"
4262+
4263+/* ---------------------------------------------------------------------- */
4264+
4265+/* a xino file */
4266+struct au_xino_file {
4267+ struct file *xi_file;
4268+ struct mutex xi_nondir_mtx;
4269+
4270+ /* todo: make xino files an array to support huge inode number */
4271+
4272+#ifdef CONFIG_DEBUG_FS
4273+ struct dentry *xi_dbgaufs;
4274+#endif
4275+};
4276+
076b876e
AM
4277+/* File-based Hierarchical Storage Management */
4278+struct au_br_fhsm {
4279+#ifdef CONFIG_AUFS_FHSM
4280+ struct mutex bf_lock;
4281+ unsigned long bf_jiffy;
4282+ struct aufs_stfs bf_stfs;
4283+ int bf_readable;
4284+#endif
4285+};
4286+
1facf9fc 4287+/* members for writable branch only */
4288+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4289+struct au_wbr {
dece6358 4290+ struct au_rwsem wbr_wh_rwsem;
1facf9fc 4291+ struct dentry *wbr_wh[AuBrWh_Last];
4a4d8108 4292+ atomic_t wbr_wh_running;
1facf9fc 4293+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4294+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4295+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4296+
4297+ /* mfs mode */
4298+ unsigned long long wbr_bytes;
4299+};
4300+
4a4d8108
AM
4301+/* ext2 has 3 types of operations at least, ext3 has 4 */
4302+#define AuBrDynOp (AuDyLast * 4)
4303+
1716fcea
AM
4304+#ifdef CONFIG_AUFS_HFSNOTIFY
4305+/* support for asynchronous destruction */
4306+struct au_br_hfsnotify {
4307+ struct fsnotify_group *hfsn_group;
4308+};
4309+#endif
4310+
392086de
AM
4311+/* sysfs entries */
4312+struct au_brsysfs {
4313+ char name[16];
4314+ struct attribute attr;
4315+};
4316+
4317+enum {
4318+ AuBrSysfs_BR,
4319+ AuBrSysfs_BRID,
4320+ AuBrSysfs_Last
4321+};
4322+
1facf9fc 4323+/* protected by superblock rwsem */
4324+struct au_branch {
4325+ struct au_xino_file br_xino;
4326+
4327+ aufs_bindex_t br_id;
4328+
4329+ int br_perm;
86dc4139 4330+ struct path br_path;
4a4d8108
AM
4331+ spinlock_t br_dykey_lock;
4332+ struct au_dykey *br_dykey[AuBrDynOp];
1facf9fc 4333+ atomic_t br_count;
4334+
4335+ struct au_wbr *br_wbr;
076b876e 4336+ struct au_br_fhsm *br_fhsm;
1facf9fc 4337+
4338+ /* xino truncation */
1facf9fc 4339+ atomic_t br_xino_running;
4340+
027c5e7a 4341+#ifdef CONFIG_AUFS_HFSNOTIFY
1716fcea 4342+ struct au_br_hfsnotify *br_hfsn;
027c5e7a
AM
4343+#endif
4344+
1facf9fc 4345+#ifdef CONFIG_SYSFS
392086de
AM
4346+ /* entries under sysfs per mount-point */
4347+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
1facf9fc 4348+#endif
4349+};
4350+
4351+/* ---------------------------------------------------------------------- */
4352+
86dc4139
AM
4353+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4354+{
4355+ return br->br_path.mnt;
4356+}
4357+
4358+static inline struct dentry *au_br_dentry(struct au_branch *br)
4359+{
4360+ return br->br_path.dentry;
4361+}
4362+
4363+static inline struct super_block *au_br_sb(struct au_branch *br)
4364+{
4365+ return au_br_mnt(br)->mnt_sb;
4366+}
4367+
1facf9fc 4368+static inline int au_br_rdonly(struct au_branch *br)
4369+{
86dc4139 4370+ return ((au_br_sb(br)->s_flags & MS_RDONLY)
1facf9fc 4371+ || !au_br_writable(br->br_perm))
4372+ ? -EROFS : 0;
4373+}
4374+
4a4d8108 4375+static inline int au_br_hnotifyable(int brperm __maybe_unused)
1facf9fc 4376+{
4a4d8108 4377+#ifdef CONFIG_AUFS_HNOTIFY
1e00d052 4378+ return !(brperm & AuBrPerm_RR);
1facf9fc 4379+#else
4380+ return 0;
4381+#endif
4382+}
4383+
7f2ca4b1
JR
4384+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4385+{
4386+ int err, exec_flag;
4387+
4388+ err = 0;
4389+ exec_flag = oflag & __FMODE_EXEC;
4390+ if (unlikely(exec_flag && (au_br_mnt(br)->mnt_flags & MNT_NOEXEC)))
4391+ err = -EACCES;
4392+
4393+ return err;
4394+}
4395+
1facf9fc 4396+/* ---------------------------------------------------------------------- */
4397+
4398+/* branch.c */
4399+struct au_sbinfo;
4400+void au_br_free(struct au_sbinfo *sinfo);
4401+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4402+struct au_opt_add;
4403+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4404+struct au_opt_del;
4405+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
027c5e7a
AM
4406+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4407+#ifdef CONFIG_COMPAT
4408+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4409+#endif
1facf9fc 4410+struct au_opt_mod;
4411+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
7f207e10 4412+ int *do_refresh);
076b876e
AM
4413+struct aufs_stfs;
4414+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
1facf9fc 4415+
4416+/* xino.c */
4417+static const loff_t au_loff_max = LLONG_MAX;
4418+
4419+int au_xib_trunc(struct super_block *sb);
4420+ssize_t xino_fread(au_readf_t func, struct file *file, void *buf, size_t size,
4421+ loff_t *pos);
4422+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
4423+ loff_t *pos);
4424+struct file *au_xino_create2(struct file *base_file, struct file *copy_src);
4425+struct file *au_xino_create(struct super_block *sb, char *fname, int silent);
4426+ino_t au_xino_new_ino(struct super_block *sb);
b752ccd1 4427+void au_xino_delete_inode(struct inode *inode, const int unlinked);
1facf9fc 4428+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4429+ ino_t ino);
4430+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4431+ ino_t *ino);
4432+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4433+ struct file *base_file, int do_test);
4434+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex);
4435+
4436+struct au_opt_xino;
4437+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount);
4438+void au_xino_clr(struct super_block *sb);
4439+struct file *au_xino_def(struct super_block *sb);
4440+int au_xino_path(struct seq_file *seq, struct file *file);
4441+
4442+/* ---------------------------------------------------------------------- */
4443+
4444+/* Superblock to branch */
4445+static inline
4446+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4447+{
4448+ return au_sbr(sb, bindex)->br_id;
4449+}
4450+
4451+static inline
4452+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4453+{
86dc4139 4454+ return au_br_mnt(au_sbr(sb, bindex));
1facf9fc 4455+}
4456+
4457+static inline
4458+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4459+{
86dc4139 4460+ return au_br_sb(au_sbr(sb, bindex));
1facf9fc 4461+}
4462+
4463+static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
4464+{
e49829fe 4465+ atomic_dec(&au_sbr(sb, bindex)->br_count);
1facf9fc 4466+}
4467+
4468+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4469+{
4470+ return au_sbr(sb, bindex)->br_perm;
4471+}
4472+
4473+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4474+{
4475+ return au_br_whable(au_sbr_perm(sb, bindex));
4476+}
4477+
4478+/* ---------------------------------------------------------------------- */
4479+
4480+/*
4481+ * wbr_wh_read_lock, wbr_wh_write_lock
4482+ * wbr_wh_read_unlock, wbr_wh_write_unlock, wbr_wh_downgrade_lock
4483+ */
4484+AuSimpleRwsemFuncs(wbr_wh, struct au_wbr *wbr, &wbr->wbr_wh_rwsem);
4485+
dece6358
AM
4486+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&wbr->wbr_wh_rwsem)
4487+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&wbr->wbr_wh_rwsem)
4488+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&wbr->wbr_wh_rwsem)
4489+
076b876e
AM
4490+/* ---------------------------------------------------------------------- */
4491+
4492+#ifdef CONFIG_AUFS_FHSM
4493+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4494+{
4495+ mutex_init(&brfhsm->bf_lock);
4496+ brfhsm->bf_jiffy = 0;
4497+ brfhsm->bf_readable = 0;
4498+}
4499+
4500+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4501+{
4502+ mutex_destroy(&brfhsm->bf_lock);
4503+}
4504+#else
4505+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4506+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4507+#endif
4508+
1facf9fc 4509+#endif /* __KERNEL__ */
4510+#endif /* __AUFS_BRANCH_H__ */
7f207e10
AM
4511diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4512--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 4513+++ linux/fs/aufs/conf.mk 2016-02-28 11:27:01.277245613 +0100
c1595e42 4514@@ -0,0 +1,38 @@
4a4d8108
AM
4515+
4516+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4517+
4518+define AuConf
4519+ifdef ${1}
4520+AuConfStr += ${1}=${${1}}
4521+endif
4522+endef
4523+
b752ccd1 4524+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
e49829fe 4525+ SBILIST \
7f207e10 4526+ HNOTIFY HFSNOTIFY \
4a4d8108 4527+ EXPORT INO_T_64 \
c1595e42 4528+ XATTR \
076b876e 4529+ FHSM \
4a4d8108 4530+ RDU \
4a4d8108
AM
4531+ SHWH \
4532+ BR_RAMFS \
4533+ BR_FUSE POLL \
4534+ BR_HFSPLUS \
4535+ BDEV_LOOP \
b752ccd1
AM
4536+ DEBUG MAGIC_SYSRQ
4537+$(foreach i, ${AuConfAll}, \
4a4d8108
AM
4538+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4539+
4540+AuConfName = ${obj}/conf.str
4541+${AuConfName}.tmp: FORCE
4542+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4543+${AuConfName}: ${AuConfName}.tmp
4544+ @diff -q $< $@ > /dev/null 2>&1 || { \
4545+ echo ' GEN ' $@; \
4546+ cp -p $< $@; \
4547+ }
4548+FORCE:
4549+clean-files += ${AuConfName} ${AuConfName}.tmp
4550+${obj}/sysfs.o: ${AuConfName}
b752ccd1
AM
4551+
4552+-include ${srctree}/${src}/conf_priv.mk
7f207e10
AM
4553diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4554--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
4555+++ linux/fs/aufs/cpup.c 2016-02-28 11:27:01.277245613 +0100
4556@@ -0,0 +1,1368 @@
1facf9fc 4557+/*
7f2ca4b1 4558+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 4559+ *
4560+ * This program, aufs is free software; you can redistribute it and/or modify
4561+ * it under the terms of the GNU General Public License as published by
4562+ * the Free Software Foundation; either version 2 of the License, or
4563+ * (at your option) any later version.
dece6358
AM
4564+ *
4565+ * This program is distributed in the hope that it will be useful,
4566+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4567+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4568+ * GNU General Public License for more details.
4569+ *
4570+ * You should have received a copy of the GNU General Public License
523b37e3 4571+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 4572+ */
4573+
4574+/*
4575+ * copy-up functions, see wbr_policy.c for copy-down
4576+ */
4577+
4578+#include <linux/fs_stack.h>
dece6358 4579+#include <linux/mm.h>
7f2ca4b1 4580+#include <linux/task_work.h>
1facf9fc 4581+#include "aufs.h"
4582+
86dc4139 4583+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
1facf9fc 4584+{
4585+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
367653fa 4586+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
1facf9fc 4587+
86dc4139
AM
4588+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4589+
4590+ dst->i_flags |= iflags & ~mask;
1facf9fc 4591+ if (au_test_fs_notime(dst->i_sb))
4592+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4593+}
4594+
4595+void au_cpup_attr_timesizes(struct inode *inode)
4596+{
4597+ struct inode *h_inode;
4598+
4599+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4600+ fsstack_copy_attr_times(inode, h_inode);
4a4d8108 4601+ fsstack_copy_inode_size(inode, h_inode);
1facf9fc 4602+}
4603+
4604+void au_cpup_attr_nlink(struct inode *inode, int force)
4605+{
4606+ struct inode *h_inode;
4607+ struct super_block *sb;
4608+ aufs_bindex_t bindex, bend;
4609+
4610+ sb = inode->i_sb;
4611+ bindex = au_ibstart(inode);
4612+ h_inode = au_h_iptr(inode, bindex);
4613+ if (!force
4614+ && !S_ISDIR(h_inode->i_mode)
4615+ && au_opt_test(au_mntflags(sb), PLINK)
4616+ && au_plink_test(inode))
4617+ return;
4618+
7eafdf33
AM
4619+ /*
4620+ * 0 can happen in revalidating.
38d290e6
JR
4621+ * h_inode->i_mutex may not be held here, but it is harmless since once
4622+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4623+ * case.
4624+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4625+ * the incorrect link count.
7eafdf33 4626+ */
92d182d2 4627+ set_nlink(inode, h_inode->i_nlink);
1facf9fc 4628+
4629+ /*
4630+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4631+ * it may includes whplink directory.
4632+ */
4633+ if (S_ISDIR(h_inode->i_mode)) {
4634+ bend = au_ibend(inode);
4635+ for (bindex++; bindex <= bend; bindex++) {
4636+ h_inode = au_h_iptr(inode, bindex);
4637+ if (h_inode)
4638+ au_add_nlink(inode, h_inode);
4639+ }
4640+ }
4641+}
4642+
4643+void au_cpup_attr_changeable(struct inode *inode)
4644+{
4645+ struct inode *h_inode;
4646+
4647+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4648+ inode->i_mode = h_inode->i_mode;
4649+ inode->i_uid = h_inode->i_uid;
4650+ inode->i_gid = h_inode->i_gid;
4651+ au_cpup_attr_timesizes(inode);
86dc4139 4652+ au_cpup_attr_flags(inode, h_inode->i_flags);
1facf9fc 4653+}
4654+
4655+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4656+{
4657+ struct au_iinfo *iinfo = au_ii(inode);
4658+
1308ab2a 4659+ IiMustWriteLock(inode);
4660+
1facf9fc 4661+ iinfo->ii_higen = h_inode->i_generation;
4662+ iinfo->ii_hsb1 = h_inode->i_sb;
4663+}
4664+
4665+void au_cpup_attr_all(struct inode *inode, int force)
4666+{
4667+ struct inode *h_inode;
4668+
4669+ h_inode = au_h_iptr(inode, au_ibstart(inode));
4670+ au_cpup_attr_changeable(inode);
4671+ if (inode->i_nlink > 0)
4672+ au_cpup_attr_nlink(inode, force);
4673+ inode->i_rdev = h_inode->i_rdev;
4674+ inode->i_blkbits = h_inode->i_blkbits;
4675+ au_cpup_igen(inode, h_inode);
4676+}
4677+
4678+/* ---------------------------------------------------------------------- */
4679+
4680+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4681+
4682+/* keep the timestamps of the parent dir when cpup */
4683+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4684+ struct path *h_path)
4685+{
4686+ struct inode *h_inode;
4687+
4688+ dt->dt_dentry = dentry;
4689+ dt->dt_h_path = *h_path;
4690+ h_inode = h_path->dentry->d_inode;
4691+ dt->dt_atime = h_inode->i_atime;
4692+ dt->dt_mtime = h_inode->i_mtime;
4693+ /* smp_mb(); */
4694+}
4695+
4696+void au_dtime_revert(struct au_dtime *dt)
4697+{
4698+ struct iattr attr;
4699+ int err;
4700+
4701+ attr.ia_atime = dt->dt_atime;
4702+ attr.ia_mtime = dt->dt_mtime;
4703+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4704+ | ATTR_ATIME | ATTR_ATIME_SET;
4705+
523b37e3
AM
4706+ /* no delegation since this is a directory */
4707+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
1facf9fc 4708+ if (unlikely(err))
0c3ec466 4709+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
1facf9fc 4710+}
4711+
4712+/* ---------------------------------------------------------------------- */
4713+
86dc4139
AM
4714+/* internal use only */
4715+struct au_cpup_reg_attr {
4716+ int valid;
4717+ struct kstat st;
4718+ unsigned int iflags; /* inode->i_flags */
4719+};
4720+
1facf9fc 4721+static noinline_for_stack
86dc4139
AM
4722+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct dentry *h_src,
4723+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4724+{
c1595e42 4725+ int err, sbits, icex;
7f2ca4b1
JR
4726+ unsigned int mnt_flags;
4727+ unsigned char verbose;
1facf9fc 4728+ struct iattr ia;
4729+ struct path h_path;
1308ab2a 4730+ struct inode *h_isrc, *h_idst;
86dc4139 4731+ struct kstat *h_st;
c1595e42 4732+ struct au_branch *br;
1facf9fc 4733+
4734+ h_path.dentry = au_h_dptr(dst, bindex);
1308ab2a 4735+ h_idst = h_path.dentry->d_inode;
c1595e42
JR
4736+ br = au_sbr(dst->d_sb, bindex);
4737+ h_path.mnt = au_br_mnt(br);
1facf9fc 4738+ h_isrc = h_src->d_inode;
1308ab2a 4739+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
1facf9fc 4740+ | ATTR_ATIME | ATTR_MTIME
4741+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
86dc4139
AM
4742+ if (h_src_attr && h_src_attr->valid) {
4743+ h_st = &h_src_attr->st;
4744+ ia.ia_uid = h_st->uid;
4745+ ia.ia_gid = h_st->gid;
4746+ ia.ia_atime = h_st->atime;
4747+ ia.ia_mtime = h_st->mtime;
4748+ if (h_idst->i_mode != h_st->mode
4749+ && !S_ISLNK(h_idst->i_mode)) {
4750+ ia.ia_valid |= ATTR_MODE;
4751+ ia.ia_mode = h_st->mode;
4752+ }
4753+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4754+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4755+ } else {
4756+ ia.ia_uid = h_isrc->i_uid;
4757+ ia.ia_gid = h_isrc->i_gid;
4758+ ia.ia_atime = h_isrc->i_atime;
4759+ ia.ia_mtime = h_isrc->i_mtime;
4760+ if (h_idst->i_mode != h_isrc->i_mode
4761+ && !S_ISLNK(h_idst->i_mode)) {
4762+ ia.ia_valid |= ATTR_MODE;
4763+ ia.ia_mode = h_isrc->i_mode;
4764+ }
4765+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4766+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
1308ab2a 4767+ }
523b37e3
AM
4768+ /* no delegation since it is just created */
4769+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4770+
4771+ /* is this nfs only? */
4772+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4773+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4774+ ia.ia_mode = h_isrc->i_mode;
523b37e3 4775+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
1facf9fc 4776+ }
4777+
c1595e42 4778+ icex = br->br_perm & AuBrAttr_ICEX;
7f2ca4b1
JR
4779+ if (!err) {
4780+ mnt_flags = au_mntflags(dst->d_sb);
4781+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4782+ err = au_cpup_xattr(h_path.dentry, h_src, icex, verbose);
4783+ }
c1595e42 4784+
1facf9fc 4785+ return err;
4786+}
4787+
4788+/* ---------------------------------------------------------------------- */
4789+
4790+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
4791+ char *buf, unsigned long blksize)
4792+{
4793+ int err;
4794+ size_t sz, rbytes, wbytes;
4795+ unsigned char all_zero;
4796+ char *p, *zp;
4797+ struct mutex *h_mtx;
4798+ /* reduce stack usage */
4799+ struct iattr *ia;
4800+
4801+ zp = page_address(ZERO_PAGE(0));
4802+ if (unlikely(!zp))
4803+ return -ENOMEM; /* possible? */
4804+
4805+ err = 0;
4806+ all_zero = 0;
4807+ while (len) {
4808+ AuDbg("len %lld\n", len);
4809+ sz = blksize;
4810+ if (len < blksize)
4811+ sz = len;
4812+
4813+ rbytes = 0;
4814+ /* todo: signal_pending? */
4815+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
4816+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
4817+ err = rbytes;
4818+ }
4819+ if (unlikely(err < 0))
4820+ break;
4821+
4822+ all_zero = 0;
4823+ if (len >= rbytes && rbytes == blksize)
4824+ all_zero = !memcmp(buf, zp, rbytes);
4825+ if (!all_zero) {
4826+ wbytes = rbytes;
4827+ p = buf;
4828+ while (wbytes) {
4829+ size_t b;
4830+
4831+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
4832+ err = b;
4833+ /* todo: signal_pending? */
4834+ if (unlikely(err == -EAGAIN || err == -EINTR))
4835+ continue;
4836+ if (unlikely(err < 0))
4837+ break;
4838+ wbytes -= b;
4839+ p += b;
4840+ }
392086de
AM
4841+ if (unlikely(err < 0))
4842+ break;
1facf9fc 4843+ } else {
4844+ loff_t res;
4845+
4846+ AuLabel(hole);
4847+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
4848+ err = res;
4849+ if (unlikely(res < 0))
4850+ break;
4851+ }
4852+ len -= rbytes;
4853+ err = 0;
4854+ }
4855+
4856+ /* the last block may be a hole */
4857+ if (!err && all_zero) {
4858+ AuLabel(last hole);
4859+
4860+ err = 1;
4861+ if (au_test_nfs(dst->f_dentry->d_sb)) {
4862+ /* nfs requires this step to make last hole */
4863+ /* is this only nfs? */
4864+ do {
4865+ /* todo: signal_pending? */
4866+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
4867+ } while (err == -EAGAIN || err == -EINTR);
4868+ if (err == 1)
4869+ dst->f_pos--;
4870+ }
4871+
4872+ if (err == 1) {
4873+ ia = (void *)buf;
4874+ ia->ia_size = dst->f_pos;
4875+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
4876+ ia->ia_file = dst;
c06a8ce3 4877+ h_mtx = &file_inode(dst)->i_mutex;
1facf9fc 4878+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
523b37e3
AM
4879+ /* no delegation since it is just created */
4880+ err = vfsub_notify_change(&dst->f_path, ia,
4881+ /*delegated*/NULL);
1facf9fc 4882+ mutex_unlock(h_mtx);
4883+ }
4884+ }
4885+
4886+ return err;
4887+}
4888+
4889+int au_copy_file(struct file *dst, struct file *src, loff_t len)
4890+{
4891+ int err;
4892+ unsigned long blksize;
4893+ unsigned char do_kfree;
4894+ char *buf;
4895+
4896+ err = -ENOMEM;
4897+ blksize = dst->f_dentry->d_sb->s_blocksize;
4898+ if (!blksize || PAGE_SIZE < blksize)
4899+ blksize = PAGE_SIZE;
4900+ AuDbg("blksize %lu\n", blksize);
4901+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
4902+ if (do_kfree)
4903+ buf = kmalloc(blksize, GFP_NOFS);
4904+ else
4905+ buf = (void *)__get_free_page(GFP_NOFS);
4906+ if (unlikely(!buf))
4907+ goto out;
4908+
4909+ if (len > (1 << 22))
4910+ AuDbg("copying a large file %lld\n", (long long)len);
4911+
4912+ src->f_pos = 0;
4913+ dst->f_pos = 0;
4914+ err = au_do_copy_file(dst, src, len, buf, blksize);
4915+ if (do_kfree)
4916+ kfree(buf);
4917+ else
4918+ free_page((unsigned long)buf);
4919+
4f0767ce 4920+out:
1facf9fc 4921+ return err;
4922+}
4923+
4924+/*
4925+ * to support a sparse file which is opened with O_APPEND,
4926+ * we need to close the file.
4927+ */
c2b27bf2 4928+static int au_cp_regular(struct au_cp_generic *cpg)
1facf9fc 4929+{
4930+ int err, i;
4931+ enum { SRC, DST };
4932+ struct {
4933+ aufs_bindex_t bindex;
4934+ unsigned int flags;
4935+ struct dentry *dentry;
392086de 4936+ int force_wr;
1facf9fc 4937+ struct file *file;
523b37e3 4938+ void *label;
1facf9fc 4939+ } *f, file[] = {
4940+ {
c2b27bf2 4941+ .bindex = cpg->bsrc,
1facf9fc 4942+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
523b37e3 4943+ .label = &&out
1facf9fc 4944+ },
4945+ {
c2b27bf2 4946+ .bindex = cpg->bdst,
1facf9fc 4947+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
392086de 4948+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
523b37e3 4949+ .label = &&out_src
1facf9fc 4950+ }
4951+ };
4952+ struct super_block *sb;
7f2ca4b1 4953+ struct task_struct *tsk = current;
1facf9fc 4954+
4955+ /* bsrc branch can be ro/rw. */
c2b27bf2 4956+ sb = cpg->dentry->d_sb;
1facf9fc 4957+ f = file;
4958+ for (i = 0; i < 2; i++, f++) {
c2b27bf2
AM
4959+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
4960+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
392086de 4961+ /*file*/NULL, f->force_wr);
1facf9fc 4962+ err = PTR_ERR(f->file);
4963+ if (IS_ERR(f->file))
4964+ goto *f->label;
1facf9fc 4965+ }
4966+
4967+ /* try stopping to update while we copyup */
4968+ IMustLock(file[SRC].dentry->d_inode);
c2b27bf2 4969+ err = au_copy_file(file[DST].file, file[SRC].file, cpg->len);
1facf9fc 4970+
7f2ca4b1
JR
4971+ /* i wonder if we had O_NO_DELAY_FPUT flag */
4972+ if (tsk->flags & PF_KTHREAD)
4973+ __fput_sync(file[DST].file);
4974+ else {
4975+ WARN(1, "%pD\nPlease report this warning to aufs-users ML",
4976+ file[DST].file);
4977+ fput(file[DST].file);
4978+ /*
4979+ * too bad.
4980+ * we have to call both since we don't know which place the file
4981+ * was added to.
4982+ */
4983+ task_work_run();
4984+ flush_delayed_fput();
4985+ }
1facf9fc 4986+ au_sbr_put(sb, file[DST].bindex);
523b37e3 4987+
4f0767ce 4988+out_src:
1facf9fc 4989+ fput(file[SRC].file);
4990+ au_sbr_put(sb, file[SRC].bindex);
4f0767ce 4991+out:
1facf9fc 4992+ return err;
4993+}
4994+
c2b27bf2 4995+static int au_do_cpup_regular(struct au_cp_generic *cpg,
86dc4139 4996+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 4997+{
4998+ int err, rerr;
4999+ loff_t l;
86dc4139 5000+ struct path h_path;
38d290e6 5001+ struct inode *h_src_inode, *h_dst_inode;
1facf9fc 5002+
5003+ err = 0;
c2b27bf2 5004+ h_src_inode = au_h_iptr(cpg->dentry->d_inode, cpg->bsrc);
86dc4139 5005+ l = i_size_read(h_src_inode);
c2b27bf2
AM
5006+ if (cpg->len == -1 || l < cpg->len)
5007+ cpg->len = l;
5008+ if (cpg->len) {
86dc4139
AM
5009+ /* try stopping to update while we are referencing */
5010+ mutex_lock_nested(&h_src_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2 5011+ au_pin_hdir_unlock(cpg->pin);
1facf9fc 5012+
c2b27bf2
AM
5013+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5014+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
86dc4139
AM
5015+ h_src_attr->iflags = h_src_inode->i_flags;
5016+ err = vfs_getattr(&h_path, &h_src_attr->st);
5017+ if (unlikely(err)) {
5018+ mutex_unlock(&h_src_inode->i_mutex);
5019+ goto out;
5020+ }
5021+ h_src_attr->valid = 1;
c2b27bf2 5022+ err = au_cp_regular(cpg);
86dc4139 5023+ mutex_unlock(&h_src_inode->i_mutex);
c2b27bf2 5024+ rerr = au_pin_hdir_relock(cpg->pin);
86dc4139
AM
5025+ if (!err && rerr)
5026+ err = rerr;
1facf9fc 5027+ }
38d290e6
JR
5028+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
5029+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5030+ h_dst_inode = h_path.dentry->d_inode;
5031+ spin_lock(&h_dst_inode->i_lock);
5032+ h_dst_inode->i_state |= I_LINKABLE;
5033+ spin_unlock(&h_dst_inode->i_lock);
5034+ }
1facf9fc 5035+
4f0767ce 5036+out:
1facf9fc 5037+ return err;
5038+}
5039+
5040+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
5041+ struct inode *h_dir)
5042+{
5043+ int err, symlen;
5044+ mm_segment_t old_fs;
b752ccd1
AM
5045+ union {
5046+ char *k;
5047+ char __user *u;
5048+ } sym;
1facf9fc 5049+
5050+ err = -ENOSYS;
5051+ if (unlikely(!h_src->d_inode->i_op->readlink))
5052+ goto out;
5053+
5054+ err = -ENOMEM;
537831f9 5055+ sym.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 5056+ if (unlikely(!sym.k))
1facf9fc 5057+ goto out;
5058+
9dbd164d 5059+ /* unnecessary to support mmap_sem since symlink is not mmap-able */
1facf9fc 5060+ old_fs = get_fs();
5061+ set_fs(KERNEL_DS);
b752ccd1 5062+ symlen = h_src->d_inode->i_op->readlink(h_src, sym.u, PATH_MAX);
1facf9fc 5063+ err = symlen;
5064+ set_fs(old_fs);
5065+
5066+ if (symlen > 0) {
b752ccd1
AM
5067+ sym.k[symlen] = 0;
5068+ err = vfsub_symlink(h_dir, h_path, sym.k);
1facf9fc 5069+ }
537831f9 5070+ free_page((unsigned long)sym.k);
1facf9fc 5071+
4f0767ce 5072+out:
1facf9fc 5073+ return err;
5074+}
5075+
7f2ca4b1
JR
5076+/*
5077+ * regardless 'acl' option, reset all ACL.
5078+ * All ACL will be copied up later from the original entry on the lower branch.
5079+ */
5080+static int au_reset_acl(struct inode *h_dir, struct path *h_path, umode_t mode)
5081+{
5082+ int err;
5083+ struct dentry *h_dentry;
5084+ struct inode *h_inode;
5085+
5086+ h_dentry = h_path->dentry;
5087+ h_inode = h_dentry->d_inode;
5088+ /* forget_all_cached_acls(h_inode)); */
5089+ err = vfsub_removexattr(h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
5090+ AuTraceErr(err);
5091+ if (err == -EOPNOTSUPP)
5092+ err = 0;
5093+ if (!err)
5094+ err = vfsub_acl_chmod(h_inode, mode);
5095+
5096+ AuTraceErr(err);
5097+ return err;
5098+}
5099+
5100+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
5101+ struct inode *h_dir, struct path *h_path)
5102+{
5103+ int err;
5104+ struct inode *dir;
5105+
5106+ err = vfsub_removexattr(h_path->dentry, XATTR_NAME_POSIX_ACL_DEFAULT);
5107+ AuTraceErr(err);
5108+ if (err == -EOPNOTSUPP)
5109+ err = 0;
5110+ if (unlikely(err))
5111+ goto out;
5112+
5113+ /*
5114+ * strange behaviour from the users view,
5115+ * particularry setattr case
5116+ */
5117+ dir = dst_parent->d_inode;
5118+ if (au_ibstart(dir) == cpg->bdst)
5119+ au_cpup_attr_nlink(dir, /*force*/1);
5120+ au_cpup_attr_nlink(cpg->dentry->d_inode, /*force*/1);
5121+
5122+out:
5123+ return err;
5124+}
5125+
1facf9fc 5126+static noinline_for_stack
c2b27bf2 5127+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
86dc4139 5128+ struct au_cpup_reg_attr *h_src_attr)
1facf9fc 5129+{
5130+ int err;
5131+ umode_t mode;
5132+ unsigned int mnt_flags;
076b876e 5133+ unsigned char isdir, isreg, force;
c2b27bf2 5134+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5135+ struct au_dtime dt;
5136+ struct path h_path;
5137+ struct dentry *h_src, *h_dst, *h_parent;
5138+ struct inode *h_inode, *h_dir;
5139+ struct super_block *sb;
5140+
5141+ /* bsrc branch can be ro/rw. */
c2b27bf2 5142+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
1facf9fc 5143+ h_inode = h_src->d_inode;
c2b27bf2 5144+ AuDebugOn(h_inode != au_h_iptr(cpg->dentry->d_inode, cpg->bsrc));
1facf9fc 5145+
5146+ /* try stopping to be referenced while we are creating */
c2b27bf2
AM
5147+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5148+ if (au_ftest_cpup(cpg->flags, RENAME))
86dc4139
AM
5149+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5150+ AUFS_WH_PFX_LEN));
1facf9fc 5151+ h_parent = h_dst->d_parent; /* dir inode is locked */
5152+ h_dir = h_parent->d_inode;
5153+ IMustLock(h_dir);
5154+ AuDebugOn(h_parent != h_dst->d_parent);
5155+
c2b27bf2
AM
5156+ sb = cpg->dentry->d_sb;
5157+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
1facf9fc 5158+ if (do_dt) {
5159+ h_path.dentry = h_parent;
5160+ au_dtime_store(&dt, dst_parent, &h_path);
5161+ }
5162+ h_path.dentry = h_dst;
5163+
076b876e 5164+ isreg = 0;
1facf9fc 5165+ isdir = 0;
5166+ mode = h_inode->i_mode;
5167+ switch (mode & S_IFMT) {
5168+ case S_IFREG:
076b876e 5169+ isreg = 1;
7f2ca4b1 5170+ err = vfsub_create(h_dir, &h_path, S_IRUSR | S_IWUSR,
b4510431 5171+ /*want_excl*/true);
1facf9fc 5172+ if (!err)
c2b27bf2 5173+ err = au_do_cpup_regular(cpg, h_src_attr);
1facf9fc 5174+ break;
5175+ case S_IFDIR:
5176+ isdir = 1;
5177+ err = vfsub_mkdir(h_dir, &h_path, mode);
7f2ca4b1
JR
5178+ if (!err)
5179+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
1facf9fc 5180+ break;
5181+ case S_IFLNK:
5182+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5183+ break;
5184+ case S_IFCHR:
5185+ case S_IFBLK:
5186+ AuDebugOn(!capable(CAP_MKNOD));
5187+ /*FALLTHROUGH*/
5188+ case S_IFIFO:
5189+ case S_IFSOCK:
5190+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5191+ break;
5192+ default:
5193+ AuIOErr("Unknown inode type 0%o\n", mode);
5194+ err = -EIO;
5195+ }
7f2ca4b1
JR
5196+ if (!err)
5197+ err = au_reset_acl(h_dir, &h_path, mode);
1facf9fc 5198+
5199+ mnt_flags = au_mntflags(sb);
5200+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5201+ && !isdir
5202+ && au_opt_test(mnt_flags, XINO)
38d290e6
JR
5203+ && (h_inode->i_nlink == 1
5204+ || (h_inode->i_state & I_LINKABLE))
1facf9fc 5205+ /* todo: unnecessary? */
c2b27bf2
AM
5206+ /* && cpg->dentry->d_inode->i_nlink == 1 */
5207+ && cpg->bdst < cpg->bsrc
5208+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5209+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
1facf9fc 5210+ /* ignore this error */
5211+
076b876e
AM
5212+ if (!err) {
5213+ force = 0;
5214+ if (isreg) {
5215+ force = !!cpg->len;
5216+ if (cpg->len == -1)
5217+ force = !!i_size_read(h_inode);
5218+ }
5219+ au_fhsm_wrote(sb, cpg->bdst, force);
5220+ }
5221+
1facf9fc 5222+ if (do_dt)
5223+ au_dtime_revert(&dt);
5224+ return err;
5225+}
5226+
392086de 5227+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
86dc4139
AM
5228+{
5229+ int err;
392086de 5230+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
86dc4139 5231+ struct inode *h_dir;
392086de 5232+ aufs_bindex_t bdst;
86dc4139 5233+
392086de
AM
5234+ dentry = cpg->dentry;
5235+ bdst = cpg->bdst;
5236+ h_dentry = au_h_dptr(dentry, bdst);
5237+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5238+ dget(h_dentry);
5239+ au_set_h_dptr(dentry, bdst, NULL);
5240+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5241+ if (!err)
5242+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
86dc4139 5243+ au_set_h_dptr(dentry, bdst, h_dentry);
392086de
AM
5244+ } else {
5245+ err = 0;
5246+ parent = dget_parent(dentry);
5247+ h_parent = au_h_dptr(parent, bdst);
5248+ dput(parent);
5249+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
5250+ if (IS_ERR(h_path->dentry))
5251+ err = PTR_ERR(h_path->dentry);
86dc4139 5252+ }
392086de
AM
5253+ if (unlikely(err))
5254+ goto out;
86dc4139 5255+
86dc4139
AM
5256+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5257+ h_dir = h_parent->d_inode;
5258+ IMustLock(h_dir);
523b37e3
AM
5259+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5260+ /* no delegation since it is just created */
5261+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL);
86dc4139
AM
5262+ dput(h_path->dentry);
5263+
5264+out:
5265+ return err;
5266+}
5267+
1facf9fc 5268+/*
5269+ * copyup the @dentry from @bsrc to @bdst.
5270+ * the caller must set the both of lower dentries.
5271+ * @len is for truncating when it is -1 copyup the entire file.
5272+ * in link/rename cases, @dst_parent may be different from the real one.
c2b27bf2 5273+ * basic->bsrc can be larger than basic->bdst.
1facf9fc 5274+ */
c2b27bf2 5275+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5276+{
5277+ int err, rerr;
5278+ aufs_bindex_t old_ibstart;
5279+ unsigned char isdir, plink;
1facf9fc 5280+ struct dentry *h_src, *h_dst, *h_parent;
523b37e3 5281+ struct inode *dst_inode, *h_dir, *inode, *delegated;
1facf9fc 5282+ struct super_block *sb;
86dc4139 5283+ struct au_branch *br;
c2b27bf2
AM
5284+ /* to reuduce stack size */
5285+ struct {
5286+ struct au_dtime dt;
5287+ struct path h_path;
5288+ struct au_cpup_reg_attr h_src_attr;
5289+ } *a;
1facf9fc 5290+
c2b27bf2
AM
5291+ err = -ENOMEM;
5292+ a = kmalloc(sizeof(*a), GFP_NOFS);
5293+ if (unlikely(!a))
5294+ goto out;
5295+ a->h_src_attr.valid = 0;
1facf9fc 5296+
c2b27bf2
AM
5297+ sb = cpg->dentry->d_sb;
5298+ br = au_sbr(sb, cpg->bdst);
5299+ a->h_path.mnt = au_br_mnt(br);
5300+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
1facf9fc 5301+ h_parent = h_dst->d_parent; /* dir inode is locked */
5302+ h_dir = h_parent->d_inode;
5303+ IMustLock(h_dir);
5304+
c2b27bf2
AM
5305+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5306+ inode = cpg->dentry->d_inode;
1facf9fc 5307+
5308+ if (!dst_parent)
c2b27bf2 5309+ dst_parent = dget_parent(cpg->dentry);
1facf9fc 5310+ else
5311+ dget(dst_parent);
5312+
5313+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
c2b27bf2 5314+ dst_inode = au_h_iptr(inode, cpg->bdst);
1facf9fc 5315+ if (dst_inode) {
5316+ if (unlikely(!plink)) {
5317+ err = -EIO;
027c5e7a
AM
5318+ AuIOErr("hi%lu(i%lu) exists on b%d "
5319+ "but plink is disabled\n",
c2b27bf2
AM
5320+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5321+ goto out_parent;
1facf9fc 5322+ }
5323+
5324+ if (dst_inode->i_nlink) {
c2b27bf2 5325+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
1facf9fc 5326+
c2b27bf2 5327+ h_src = au_plink_lkup(inode, cpg->bdst);
1facf9fc 5328+ err = PTR_ERR(h_src);
5329+ if (IS_ERR(h_src))
c2b27bf2 5330+ goto out_parent;
1facf9fc 5331+ if (unlikely(!h_src->d_inode)) {
5332+ err = -EIO;
7f2ca4b1 5333+ AuIOErr("i%lu exists on b%d "
027c5e7a 5334+ "but not pseudo-linked\n",
7f2ca4b1 5335+ inode->i_ino, cpg->bdst);
1facf9fc 5336+ dput(h_src);
c2b27bf2 5337+ goto out_parent;
1facf9fc 5338+ }
5339+
5340+ if (do_dt) {
c2b27bf2
AM
5341+ a->h_path.dentry = h_parent;
5342+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
1facf9fc 5343+ }
86dc4139 5344+
c2b27bf2 5345+ a->h_path.dentry = h_dst;
523b37e3
AM
5346+ delegated = NULL;
5347+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
c2b27bf2 5348+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
392086de 5349+ err = au_do_ren_after_cpup(cpg, &a->h_path);
1facf9fc 5350+ if (do_dt)
c2b27bf2 5351+ au_dtime_revert(&a->dt);
523b37e3
AM
5352+ if (unlikely(err == -EWOULDBLOCK)) {
5353+ pr_warn("cannot retry for NFSv4 delegation"
5354+ " for an internal link\n");
5355+ iput(delegated);
5356+ }
1facf9fc 5357+ dput(h_src);
c2b27bf2 5358+ goto out_parent;
1facf9fc 5359+ } else
5360+ /* todo: cpup_wh_file? */
5361+ /* udba work */
4a4d8108 5362+ au_update_ibrange(inode, /*do_put_zero*/1);
1facf9fc 5363+ }
5364+
86dc4139 5365+ isdir = S_ISDIR(inode->i_mode);
1facf9fc 5366+ old_ibstart = au_ibstart(inode);
c2b27bf2 5367+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
1facf9fc 5368+ if (unlikely(err))
86dc4139 5369+ goto out_rev;
1facf9fc 5370+ dst_inode = h_dst->d_inode;
5371+ mutex_lock_nested(&dst_inode->i_mutex, AuLsc_I_CHILD2);
86dc4139 5372+ /* todo: necessary? */
c2b27bf2 5373+ /* au_pin_hdir_unlock(cpg->pin); */
1facf9fc 5374+
c2b27bf2 5375+ err = cpup_iattr(cpg->dentry, cpg->bdst, h_src, &a->h_src_attr);
86dc4139
AM
5376+ if (unlikely(err)) {
5377+ /* todo: necessary? */
c2b27bf2 5378+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
86dc4139
AM
5379+ mutex_unlock(&dst_inode->i_mutex);
5380+ goto out_rev;
5381+ }
5382+
c2b27bf2 5383+ if (cpg->bdst < old_ibstart) {
86dc4139 5384+ if (S_ISREG(inode->i_mode)) {
c2b27bf2 5385+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
86dc4139 5386+ if (unlikely(err)) {
c2b27bf2
AM
5387+ /* ignore an error */
5388+ /* au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5389+ mutex_unlock(&dst_inode->i_mutex);
5390+ goto out_rev;
4a4d8108 5391+ }
4a4d8108 5392+ }
c2b27bf2
AM
5393+ au_set_ibstart(inode, cpg->bdst);
5394+ } else
5395+ au_set_ibend(inode, cpg->bdst);
5396+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
86dc4139
AM
5397+ au_hi_flags(inode, isdir));
5398+
5399+ /* todo: necessary? */
c2b27bf2 5400+ /* err = au_pin_hdir_relock(cpg->pin); */
86dc4139
AM
5401+ mutex_unlock(&dst_inode->i_mutex);
5402+ if (unlikely(err))
5403+ goto out_rev;
5404+
5405+ if (!isdir
38d290e6
JR
5406+ && (h_src->d_inode->i_nlink > 1
5407+ || h_src->d_inode->i_state & I_LINKABLE)
86dc4139 5408+ && plink)
c2b27bf2 5409+ au_plink_append(inode, cpg->bdst, h_dst);
86dc4139 5410+
c2b27bf2
AM
5411+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5412+ a->h_path.dentry = h_dst;
392086de 5413+ err = au_do_ren_after_cpup(cpg, &a->h_path);
86dc4139
AM
5414+ }
5415+ if (!err)
c2b27bf2 5416+ goto out_parent; /* success */
1facf9fc 5417+
5418+ /* revert */
4a4d8108 5419+out_rev:
c2b27bf2
AM
5420+ a->h_path.dentry = h_parent;
5421+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5422+ a->h_path.dentry = h_dst;
86dc4139
AM
5423+ rerr = 0;
5424+ if (h_dst->d_inode) {
523b37e3
AM
5425+ if (!isdir) {
5426+ /* no delegation since it is just created */
5427+ rerr = vfsub_unlink(h_dir, &a->h_path,
5428+ /*delegated*/NULL, /*force*/0);
5429+ } else
c2b27bf2 5430+ rerr = vfsub_rmdir(h_dir, &a->h_path);
86dc4139 5431+ }
c2b27bf2 5432+ au_dtime_revert(&a->dt);
1facf9fc 5433+ if (rerr) {
5434+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5435+ err = -EIO;
5436+ }
c2b27bf2 5437+out_parent:
1facf9fc 5438+ dput(dst_parent);
c2b27bf2
AM
5439+ kfree(a);
5440+out:
1facf9fc 5441+ return err;
5442+}
5443+
7f2ca4b1 5444+#if 0 /* reserved */
1facf9fc 5445+struct au_cpup_single_args {
5446+ int *errp;
c2b27bf2 5447+ struct au_cp_generic *cpg;
1facf9fc 5448+ struct dentry *dst_parent;
5449+};
5450+
5451+static void au_call_cpup_single(void *args)
5452+{
5453+ struct au_cpup_single_args *a = args;
86dc4139 5454+
c2b27bf2
AM
5455+ au_pin_hdir_acquire_nest(a->cpg->pin);
5456+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5457+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5458+}
c2b27bf2 5459+#endif
1facf9fc 5460+
53392da6
AM
5461+/*
5462+ * prevent SIGXFSZ in copy-up.
5463+ * testing CAP_MKNOD is for generic fs,
5464+ * but CAP_FSETID is for xfs only, currently.
5465+ */
86dc4139 5466+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
53392da6
AM
5467+{
5468+ int do_sio;
86dc4139
AM
5469+ struct super_block *sb;
5470+ struct inode *h_dir;
53392da6
AM
5471+
5472+ do_sio = 0;
86dc4139 5473+ sb = au_pinned_parent(pin)->d_sb;
53392da6
AM
5474+ if (!au_wkq_test()
5475+ && (!au_sbi(sb)->si_plink_maint_pid
5476+ || au_plink_maint(sb, AuLock_NOPLM))) {
5477+ switch (mode & S_IFMT) {
5478+ case S_IFREG:
5479+ /* no condition about RLIMIT_FSIZE and the file size */
5480+ do_sio = 1;
5481+ break;
5482+ case S_IFCHR:
5483+ case S_IFBLK:
5484+ do_sio = !capable(CAP_MKNOD);
5485+ break;
5486+ }
5487+ if (!do_sio)
5488+ do_sio = ((mode & (S_ISUID | S_ISGID))
5489+ && !capable(CAP_FSETID));
86dc4139
AM
5490+ /* this workaround may be removed in the future */
5491+ if (!do_sio) {
5492+ h_dir = au_pinned_h_dir(pin);
5493+ do_sio = h_dir->i_mode & S_ISVTX;
5494+ }
53392da6
AM
5495+ }
5496+
5497+ return do_sio;
5498+}
5499+
7f2ca4b1 5500+#if 0 /* reserved */
c2b27bf2 5501+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
1facf9fc 5502+{
5503+ int err, wkq_err;
1facf9fc 5504+ struct dentry *h_dentry;
5505+
c2b27bf2 5506+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
86dc4139 5507+ if (!au_cpup_sio_test(pin, h_dentry->d_inode->i_mode))
c2b27bf2 5508+ err = au_cpup_single(cpg, dst_parent);
1facf9fc 5509+ else {
5510+ struct au_cpup_single_args args = {
5511+ .errp = &err,
c2b27bf2
AM
5512+ .cpg = cpg,
5513+ .dst_parent = dst_parent
1facf9fc 5514+ };
5515+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5516+ if (unlikely(wkq_err))
5517+ err = wkq_err;
5518+ }
5519+
5520+ return err;
5521+}
c2b27bf2 5522+#endif
1facf9fc 5523+
5524+/*
5525+ * copyup the @dentry from the first active lower branch to @bdst,
5526+ * using au_cpup_single().
5527+ */
c2b27bf2 5528+static int au_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5529+{
5530+ int err;
c2b27bf2
AM
5531+ unsigned int flags_orig;
5532+ struct dentry *dentry;
5533+
5534+ AuDebugOn(cpg->bsrc < 0);
1facf9fc 5535+
c2b27bf2 5536+ dentry = cpg->dentry;
86dc4139 5537+ DiMustWriteLock(dentry);
1facf9fc 5538+
c2b27bf2 5539+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
1facf9fc 5540+ if (!err) {
c2b27bf2
AM
5541+ flags_orig = cpg->flags;
5542+ au_fset_cpup(cpg->flags, RENAME);
5543+ err = au_cpup_single(cpg, NULL);
5544+ cpg->flags = flags_orig;
1facf9fc 5545+ if (!err)
5546+ return 0; /* success */
5547+
5548+ /* revert */
c2b27bf2
AM
5549+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5550+ au_set_dbstart(dentry, cpg->bsrc);
1facf9fc 5551+ }
5552+
5553+ return err;
5554+}
5555+
5556+struct au_cpup_simple_args {
5557+ int *errp;
c2b27bf2 5558+ struct au_cp_generic *cpg;
1facf9fc 5559+};
5560+
5561+static void au_call_cpup_simple(void *args)
5562+{
5563+ struct au_cpup_simple_args *a = args;
86dc4139 5564+
c2b27bf2
AM
5565+ au_pin_hdir_acquire_nest(a->cpg->pin);
5566+ *a->errp = au_cpup_simple(a->cpg);
5567+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5568+}
5569+
c2b27bf2 5570+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
1facf9fc 5571+{
5572+ int err, wkq_err;
c2b27bf2
AM
5573+ struct dentry *dentry, *parent;
5574+ struct file *h_file;
1facf9fc 5575+ struct inode *h_dir;
5576+
c2b27bf2
AM
5577+ dentry = cpg->dentry;
5578+ h_file = NULL;
5579+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5580+ AuDebugOn(cpg->bsrc < 0);
392086de 5581+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
c2b27bf2
AM
5582+ err = PTR_ERR(h_file);
5583+ if (IS_ERR(h_file))
5584+ goto out;
5585+ }
5586+
1facf9fc 5587+ parent = dget_parent(dentry);
c2b27bf2 5588+ h_dir = au_h_iptr(parent->d_inode, cpg->bdst);
53392da6 5589+ if (!au_test_h_perm_sio(h_dir, MAY_EXEC | MAY_WRITE)
c2b27bf2
AM
5590+ && !au_cpup_sio_test(cpg->pin, dentry->d_inode->i_mode))
5591+ err = au_cpup_simple(cpg);
1facf9fc 5592+ else {
5593+ struct au_cpup_simple_args args = {
5594+ .errp = &err,
c2b27bf2 5595+ .cpg = cpg
1facf9fc 5596+ };
5597+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5598+ if (unlikely(wkq_err))
5599+ err = wkq_err;
5600+ }
5601+
5602+ dput(parent);
c2b27bf2
AM
5603+ if (h_file)
5604+ au_h_open_post(dentry, cpg->bsrc, h_file);
5605+
5606+out:
1facf9fc 5607+ return err;
5608+}
5609+
c2b27bf2 5610+int au_sio_cpup_simple(struct au_cp_generic *cpg)
367653fa 5611+{
c2b27bf2
AM
5612+ aufs_bindex_t bsrc, bend;
5613+ struct dentry *dentry, *h_dentry;
367653fa 5614+
c2b27bf2
AM
5615+ if (cpg->bsrc < 0) {
5616+ dentry = cpg->dentry;
5617+ bend = au_dbend(dentry);
5618+ for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
5619+ h_dentry = au_h_dptr(dentry, bsrc);
5620+ if (h_dentry) {
5621+ AuDebugOn(!h_dentry->d_inode);
5622+ break;
5623+ }
5624+ }
5625+ AuDebugOn(bsrc > bend);
5626+ cpg->bsrc = bsrc;
367653fa 5627+ }
c2b27bf2
AM
5628+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5629+ return au_do_sio_cpup_simple(cpg);
5630+}
367653fa 5631+
c2b27bf2
AM
5632+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5633+{
5634+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5635+ return au_do_sio_cpup_simple(cpg);
367653fa
AM
5636+}
5637+
1facf9fc 5638+/* ---------------------------------------------------------------------- */
5639+
5640+/*
5641+ * copyup the deleted file for writing.
5642+ */
c2b27bf2
AM
5643+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5644+ struct file *file)
1facf9fc 5645+{
5646+ int err;
c2b27bf2
AM
5647+ unsigned int flags_orig;
5648+ aufs_bindex_t bsrc_orig;
1facf9fc 5649+ struct dentry *h_d_dst, *h_d_start;
c2b27bf2 5650+ struct au_dinfo *dinfo;
4a4d8108 5651+ struct au_hdentry *hdp;
1facf9fc 5652+
c2b27bf2 5653+ dinfo = au_di(cpg->dentry);
1308ab2a 5654+ AuRwMustWriteLock(&dinfo->di_rwsem);
5655+
c2b27bf2
AM
5656+ bsrc_orig = cpg->bsrc;
5657+ cpg->bsrc = dinfo->di_bstart;
4a4d8108 5658+ hdp = dinfo->di_hdentry;
c2b27bf2
AM
5659+ h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
5660+ dinfo->di_bstart = cpg->bdst;
5661+ hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
86dc4139 5662+ h_d_start = NULL;
027c5e7a 5663+ if (file) {
c2b27bf2
AM
5664+ h_d_start = hdp[0 + cpg->bsrc].hd_dentry;
5665+ hdp[0 + cpg->bsrc].hd_dentry = au_hf_top(file)->f_dentry;
027c5e7a 5666+ }
c2b27bf2
AM
5667+ flags_orig = cpg->flags;
5668+ cpg->flags = !AuCpup_DTIME;
5669+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5670+ cpg->flags = flags_orig;
027c5e7a
AM
5671+ if (file) {
5672+ if (!err)
5673+ err = au_reopen_nondir(file);
c2b27bf2 5674+ hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
1facf9fc 5675+ }
c2b27bf2
AM
5676+ hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
5677+ dinfo->di_bstart = cpg->bsrc;
5678+ cpg->bsrc = bsrc_orig;
1facf9fc 5679+
5680+ return err;
5681+}
5682+
c2b27bf2 5683+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5684+{
5685+ int err;
c2b27bf2 5686+ aufs_bindex_t bdst;
1facf9fc 5687+ struct au_dtime dt;
c2b27bf2 5688+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
1facf9fc 5689+ struct au_branch *br;
5690+ struct path h_path;
5691+
c2b27bf2
AM
5692+ dentry = cpg->dentry;
5693+ bdst = cpg->bdst;
1facf9fc 5694+ br = au_sbr(dentry->d_sb, bdst);
5695+ parent = dget_parent(dentry);
5696+ h_parent = au_h_dptr(parent, bdst);
5697+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
5698+ err = PTR_ERR(wh_dentry);
5699+ if (IS_ERR(wh_dentry))
5700+ goto out;
5701+
5702+ h_path.dentry = h_parent;
86dc4139 5703+ h_path.mnt = au_br_mnt(br);
1facf9fc 5704+ au_dtime_store(&dt, parent, &h_path);
c2b27bf2 5705+ err = au_do_cpup_wh(cpg, wh_dentry, file);
1facf9fc 5706+ if (unlikely(err))
5707+ goto out_wh;
5708+
5709+ dget(wh_dentry);
5710+ h_path.dentry = wh_dentry;
7f2ca4b1 5711+ if (!d_is_dir(wh_dentry)) {
523b37e3
AM
5712+ /* no delegation since it is just created */
5713+ err = vfsub_unlink(h_parent->d_inode, &h_path,
5714+ /*delegated*/NULL, /*force*/0);
5715+ } else
4a4d8108 5716+ err = vfsub_rmdir(h_parent->d_inode, &h_path);
1facf9fc 5717+ if (unlikely(err)) {
523b37e3
AM
5718+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
5719+ wh_dentry, err);
1facf9fc 5720+ err = -EIO;
5721+ }
5722+ au_dtime_revert(&dt);
5723+ au_set_hi_wh(dentry->d_inode, bdst, wh_dentry);
5724+
4f0767ce 5725+out_wh:
1facf9fc 5726+ dput(wh_dentry);
4f0767ce 5727+out:
1facf9fc 5728+ dput(parent);
5729+ return err;
5730+}
5731+
5732+struct au_cpup_wh_args {
5733+ int *errp;
c2b27bf2 5734+ struct au_cp_generic *cpg;
1facf9fc 5735+ struct file *file;
5736+};
5737+
5738+static void au_call_cpup_wh(void *args)
5739+{
5740+ struct au_cpup_wh_args *a = args;
86dc4139 5741+
c2b27bf2
AM
5742+ au_pin_hdir_acquire_nest(a->cpg->pin);
5743+ *a->errp = au_cpup_wh(a->cpg, a->file);
5744+ au_pin_hdir_release(a->cpg->pin);
1facf9fc 5745+}
5746+
c2b27bf2 5747+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
1facf9fc 5748+{
5749+ int err, wkq_err;
c2b27bf2 5750+ aufs_bindex_t bdst;
c1595e42 5751+ struct dentry *dentry, *parent, *h_orph, *h_parent;
86dc4139 5752+ struct inode *dir, *h_dir, *h_tmpdir;
1facf9fc 5753+ struct au_wbr *wbr;
c2b27bf2 5754+ struct au_pin wh_pin, *pin_orig;
1facf9fc 5755+
c2b27bf2
AM
5756+ dentry = cpg->dentry;
5757+ bdst = cpg->bdst;
1facf9fc 5758+ parent = dget_parent(dentry);
5759+ dir = parent->d_inode;
5760+ h_orph = NULL;
5761+ h_parent = NULL;
5762+ h_dir = au_igrab(au_h_iptr(dir, bdst));
5763+ h_tmpdir = h_dir;
c2b27bf2 5764+ pin_orig = NULL;
1facf9fc 5765+ if (!h_dir->i_nlink) {
5766+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
5767+ h_orph = wbr->wbr_orph;
5768+
5769+ h_parent = dget(au_h_dptr(parent, bdst));
1facf9fc 5770+ au_set_h_dptr(parent, bdst, dget(h_orph));
5771+ h_tmpdir = h_orph->d_inode;
1facf9fc 5772+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
5773+
dece6358 5774+ mutex_lock_nested(&h_tmpdir->i_mutex, AuLsc_I_PARENT3);
4a4d8108 5775+ /* todo: au_h_open_pre()? */
86dc4139 5776+
c2b27bf2 5777+ pin_orig = cpg->pin;
86dc4139 5778+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
c2b27bf2
AM
5779+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
5780+ cpg->pin = &wh_pin;
1facf9fc 5781+ }
5782+
53392da6 5783+ if (!au_test_h_perm_sio(h_tmpdir, MAY_EXEC | MAY_WRITE)
c2b27bf2
AM
5784+ && !au_cpup_sio_test(cpg->pin, dentry->d_inode->i_mode))
5785+ err = au_cpup_wh(cpg, file);
1facf9fc 5786+ else {
5787+ struct au_cpup_wh_args args = {
5788+ .errp = &err,
c2b27bf2
AM
5789+ .cpg = cpg,
5790+ .file = file
1facf9fc 5791+ };
5792+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
5793+ if (unlikely(wkq_err))
5794+ err = wkq_err;
5795+ }
5796+
5797+ if (h_orph) {
5798+ mutex_unlock(&h_tmpdir->i_mutex);
4a4d8108 5799+ /* todo: au_h_open_post()? */
1facf9fc 5800+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
1facf9fc 5801+ au_set_h_dptr(parent, bdst, h_parent);
c2b27bf2
AM
5802+ AuDebugOn(!pin_orig);
5803+ cpg->pin = pin_orig;
1facf9fc 5804+ }
5805+ iput(h_dir);
5806+ dput(parent);
5807+
5808+ return err;
5809+}
5810+
5811+/* ---------------------------------------------------------------------- */
5812+
5813+/*
5814+ * generic routine for both of copy-up and copy-down.
5815+ */
5816+/* cf. revalidate function in file.c */
5817+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
5818+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5819+ struct au_pin *pin,
1facf9fc 5820+ struct dentry *h_parent, void *arg),
5821+ void *arg)
5822+{
5823+ int err;
5824+ struct au_pin pin;
5825+ struct dentry *d, *parent, *h_parent, *real_parent;
5826+
5827+ err = 0;
5828+ parent = dget_parent(dentry);
5829+ if (IS_ROOT(parent))
5830+ goto out;
5831+
5832+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
5833+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
5834+
5835+ /* do not use au_dpage */
5836+ real_parent = parent;
5837+ while (1) {
5838+ dput(parent);
5839+ parent = dget_parent(dentry);
5840+ h_parent = au_h_dptr(parent, bdst);
5841+ if (h_parent)
5842+ goto out; /* success */
5843+
5844+ /* find top dir which is necessary to cpup */
5845+ do {
5846+ d = parent;
5847+ dput(parent);
5848+ parent = dget_parent(d);
5849+ di_read_lock_parent3(parent, !AuLock_IR);
5850+ h_parent = au_h_dptr(parent, bdst);
5851+ di_read_unlock(parent, !AuLock_IR);
5852+ } while (!h_parent);
5853+
5854+ if (d != real_parent)
5855+ di_write_lock_child3(d);
5856+
5857+ /* somebody else might create while we were sleeping */
5858+ if (!au_h_dptr(d, bdst) || !au_h_dptr(d, bdst)->d_inode) {
5859+ if (au_h_dptr(d, bdst))
5860+ au_update_dbstart(d);
5861+
5862+ au_pin_set_dentry(&pin, d);
5863+ err = au_do_pin(&pin);
5864+ if (!err) {
86dc4139 5865+ err = cp(d, bdst, &pin, h_parent, arg);
1facf9fc 5866+ au_unpin(&pin);
5867+ }
5868+ }
5869+
5870+ if (d != real_parent)
5871+ di_write_unlock(d);
5872+ if (unlikely(err))
5873+ break;
5874+ }
5875+
4f0767ce 5876+out:
1facf9fc 5877+ dput(parent);
5878+ return err;
5879+}
5880+
5881+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 5882+ struct au_pin *pin,
7f2ca4b1 5883+ struct dentry *h_parent __maybe_unused,
1facf9fc 5884+ void *arg __maybe_unused)
5885+{
c2b27bf2
AM
5886+ struct au_cp_generic cpg = {
5887+ .dentry = dentry,
5888+ .bdst = bdst,
5889+ .bsrc = -1,
5890+ .len = 0,
5891+ .pin = pin,
5892+ .flags = AuCpup_DTIME
5893+ };
5894+ return au_sio_cpup_simple(&cpg);
1facf9fc 5895+}
5896+
5897+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5898+{
5899+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
5900+}
5901+
5902+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
5903+{
5904+ int err;
5905+ struct dentry *parent;
5906+ struct inode *dir;
5907+
5908+ parent = dget_parent(dentry);
5909+ dir = parent->d_inode;
5910+ err = 0;
5911+ if (au_h_iptr(dir, bdst))
5912+ goto out;
5913+
5914+ di_read_unlock(parent, AuLock_IR);
5915+ di_write_lock_parent(parent);
5916+ /* someone else might change our inode while we were sleeping */
5917+ if (!au_h_iptr(dir, bdst))
5918+ err = au_cpup_dirs(dentry, bdst);
5919+ di_downgrade_lock(parent, AuLock_IR);
5920+
4f0767ce 5921+out:
1facf9fc 5922+ dput(parent);
5923+ return err;
5924+}
7f207e10
AM
5925diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
5926--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 5927+++ linux/fs/aufs/cpup.h 2016-02-28 11:27:01.277245613 +0100
523b37e3 5928@@ -0,0 +1,94 @@
1facf9fc 5929+/*
7f2ca4b1 5930+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 5931+ *
5932+ * This program, aufs is free software; you can redistribute it and/or modify
5933+ * it under the terms of the GNU General Public License as published by
5934+ * the Free Software Foundation; either version 2 of the License, or
5935+ * (at your option) any later version.
dece6358
AM
5936+ *
5937+ * This program is distributed in the hope that it will be useful,
5938+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
5939+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
5940+ * GNU General Public License for more details.
5941+ *
5942+ * You should have received a copy of the GNU General Public License
523b37e3 5943+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 5944+ */
5945+
5946+/*
5947+ * copy-up/down functions
5948+ */
5949+
5950+#ifndef __AUFS_CPUP_H__
5951+#define __AUFS_CPUP_H__
5952+
5953+#ifdef __KERNEL__
5954+
dece6358 5955+#include <linux/path.h>
1facf9fc 5956+
dece6358
AM
5957+struct inode;
5958+struct file;
86dc4139 5959+struct au_pin;
dece6358 5960+
86dc4139 5961+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
1facf9fc 5962+void au_cpup_attr_timesizes(struct inode *inode);
5963+void au_cpup_attr_nlink(struct inode *inode, int force);
5964+void au_cpup_attr_changeable(struct inode *inode);
5965+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
5966+void au_cpup_attr_all(struct inode *inode, int force);
5967+
5968+/* ---------------------------------------------------------------------- */
5969+
c2b27bf2
AM
5970+struct au_cp_generic {
5971+ struct dentry *dentry;
5972+ aufs_bindex_t bdst, bsrc;
5973+ loff_t len;
5974+ struct au_pin *pin;
5975+ unsigned int flags;
5976+};
5977+
1facf9fc 5978+/* cpup flags */
392086de
AM
5979+#define AuCpup_DTIME 1 /* do dtime_store/revert */
5980+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
5981+ for link(2) */
5982+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
5983+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
5984+ cpup */
5985+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
5986+ existing entry */
5987+#define AuCpup_RWDST (1 << 5) /* force write target even if
5988+ the branch is marked as RO */
c2b27bf2 5989+
1facf9fc 5990+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
7f207e10
AM
5991+#define au_fset_cpup(flags, name) \
5992+ do { (flags) |= AuCpup_##name; } while (0)
5993+#define au_fclr_cpup(flags, name) \
5994+ do { (flags) &= ~AuCpup_##name; } while (0)
1facf9fc 5995+
5996+int au_copy_file(struct file *dst, struct file *src, loff_t len);
c2b27bf2
AM
5997+int au_sio_cpup_simple(struct au_cp_generic *cpg);
5998+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
5999+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
1facf9fc 6000+
6001+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
6002+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 6003+ struct au_pin *pin,
1facf9fc 6004+ struct dentry *h_parent, void *arg),
6005+ void *arg);
6006+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6007+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6008+
6009+/* ---------------------------------------------------------------------- */
6010+
6011+/* keep timestamps when copyup */
6012+struct au_dtime {
6013+ struct dentry *dt_dentry;
6014+ struct path dt_h_path;
6015+ struct timespec dt_atime, dt_mtime;
6016+};
6017+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
6018+ struct path *h_path);
6019+void au_dtime_revert(struct au_dtime *dt);
6020+
6021+#endif /* __KERNEL__ */
6022+#endif /* __AUFS_CPUP_H__ */
7f207e10
AM
6023diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
6024--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 6025+++ linux/fs/aufs/dbgaufs.c 2016-02-28 11:27:01.277245613 +0100
523b37e3 6026@@ -0,0 +1,432 @@
1facf9fc 6027+/*
7f2ca4b1 6028+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6029+ *
6030+ * This program, aufs is free software; you can redistribute it and/or modify
6031+ * it under the terms of the GNU General Public License as published by
6032+ * the Free Software Foundation; either version 2 of the License, or
6033+ * (at your option) any later version.
dece6358
AM
6034+ *
6035+ * This program is distributed in the hope that it will be useful,
6036+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6037+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6038+ * GNU General Public License for more details.
6039+ *
6040+ * You should have received a copy of the GNU General Public License
523b37e3 6041+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6042+ */
6043+
6044+/*
6045+ * debugfs interface
6046+ */
6047+
6048+#include <linux/debugfs.h>
6049+#include "aufs.h"
6050+
6051+#ifndef CONFIG_SYSFS
6052+#error DEBUG_FS depends upon SYSFS
6053+#endif
6054+
6055+static struct dentry *dbgaufs;
6056+static const mode_t dbgaufs_mode = S_IRUSR | S_IRGRP | S_IROTH;
6057+
6058+/* 20 is max digits length of ulong 64 */
6059+struct dbgaufs_arg {
6060+ int n;
6061+ char a[20 * 4];
6062+};
6063+
6064+/*
6065+ * common function for all XINO files
6066+ */
6067+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
6068+ struct file *file)
6069+{
6070+ kfree(file->private_data);
6071+ return 0;
6072+}
6073+
6074+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt)
6075+{
6076+ int err;
6077+ struct kstat st;
6078+ struct dbgaufs_arg *p;
6079+
6080+ err = -ENOMEM;
6081+ p = kmalloc(sizeof(*p), GFP_NOFS);
6082+ if (unlikely(!p))
6083+ goto out;
6084+
6085+ err = 0;
6086+ p->n = 0;
6087+ file->private_data = p;
6088+ if (!xf)
6089+ goto out;
6090+
c06a8ce3 6091+ err = vfs_getattr(&xf->f_path, &st);
1facf9fc 6092+ if (!err) {
6093+ if (do_fcnt)
6094+ p->n = snprintf
6095+ (p->a, sizeof(p->a), "%ld, %llux%lu %lld\n",
6096+ (long)file_count(xf), st.blocks, st.blksize,
6097+ (long long)st.size);
6098+ else
6099+ p->n = snprintf(p->a, sizeof(p->a), "%llux%lu %lld\n",
6100+ st.blocks, st.blksize,
6101+ (long long)st.size);
6102+ AuDebugOn(p->n >= sizeof(p->a));
6103+ } else {
6104+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
6105+ err = 0;
6106+ }
6107+
4f0767ce 6108+out:
1facf9fc 6109+ return err;
6110+
6111+}
6112+
6113+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
6114+ size_t count, loff_t *ppos)
6115+{
6116+ struct dbgaufs_arg *p;
6117+
6118+ p = file->private_data;
6119+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6120+}
6121+
6122+/* ---------------------------------------------------------------------- */
6123+
86dc4139
AM
6124+struct dbgaufs_plink_arg {
6125+ int n;
6126+ char a[];
6127+};
6128+
6129+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6130+ struct file *file)
6131+{
6132+ free_page((unsigned long)file->private_data);
6133+ return 0;
6134+}
6135+
6136+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6137+{
6138+ int err, i, limit;
6139+ unsigned long n, sum;
6140+ struct dbgaufs_plink_arg *p;
6141+ struct au_sbinfo *sbinfo;
6142+ struct super_block *sb;
6143+ struct au_sphlhead *sphl;
6144+
6145+ err = -ENOMEM;
6146+ p = (void *)get_zeroed_page(GFP_NOFS);
6147+ if (unlikely(!p))
6148+ goto out;
6149+
6150+ err = -EFBIG;
6151+ sbinfo = inode->i_private;
6152+ sb = sbinfo->si_sb;
6153+ si_noflush_read_lock(sb);
6154+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6155+ limit = PAGE_SIZE - sizeof(p->n);
6156+
6157+ /* the number of buckets */
6158+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6159+ p->n += n;
6160+ limit -= n;
6161+
6162+ sum = 0;
6163+ for (i = 0, sphl = sbinfo->si_plink;
6164+ i < AuPlink_NHASH;
6165+ i++, sphl++) {
6166+ n = au_sphl_count(sphl);
6167+ sum += n;
6168+
6169+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6170+ p->n += n;
6171+ limit -= n;
6172+ if (unlikely(limit <= 0))
6173+ goto out_free;
6174+ }
6175+ p->a[p->n - 1] = '\n';
6176+
6177+ /* the sum of plinks */
6178+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6179+ p->n += n;
6180+ limit -= n;
6181+ if (unlikely(limit <= 0))
6182+ goto out_free;
6183+ } else {
6184+#define str "1\n0\n0\n"
6185+ p->n = sizeof(str) - 1;
6186+ strcpy(p->a, str);
6187+#undef str
6188+ }
6189+ si_read_unlock(sb);
6190+
6191+ err = 0;
6192+ file->private_data = p;
6193+ goto out; /* success */
6194+
6195+out_free:
6196+ free_page((unsigned long)p);
6197+out:
6198+ return err;
6199+}
6200+
6201+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6202+ size_t count, loff_t *ppos)
6203+{
6204+ struct dbgaufs_plink_arg *p;
6205+
6206+ p = file->private_data;
6207+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6208+}
6209+
6210+static const struct file_operations dbgaufs_plink_fop = {
6211+ .owner = THIS_MODULE,
6212+ .open = dbgaufs_plink_open,
6213+ .release = dbgaufs_plink_release,
6214+ .read = dbgaufs_plink_read
6215+};
6216+
6217+/* ---------------------------------------------------------------------- */
6218+
1facf9fc 6219+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6220+{
6221+ int err;
6222+ struct au_sbinfo *sbinfo;
6223+ struct super_block *sb;
6224+
6225+ sbinfo = inode->i_private;
6226+ sb = sbinfo->si_sb;
6227+ si_noflush_read_lock(sb);
6228+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0);
6229+ si_read_unlock(sb);
6230+ return err;
6231+}
6232+
6233+static const struct file_operations dbgaufs_xib_fop = {
4a4d8108 6234+ .owner = THIS_MODULE,
1facf9fc 6235+ .open = dbgaufs_xib_open,
6236+ .release = dbgaufs_xi_release,
6237+ .read = dbgaufs_xi_read
6238+};
6239+
6240+/* ---------------------------------------------------------------------- */
6241+
6242+#define DbgaufsXi_PREFIX "xi"
6243+
6244+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6245+{
6246+ int err;
6247+ long l;
6248+ struct au_sbinfo *sbinfo;
6249+ struct super_block *sb;
6250+ struct file *xf;
6251+ struct qstr *name;
6252+
6253+ err = -ENOENT;
6254+ xf = NULL;
6255+ name = &file->f_dentry->d_name;
6256+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6257+ || memcmp(name->name, DbgaufsXi_PREFIX,
6258+ sizeof(DbgaufsXi_PREFIX) - 1)))
6259+ goto out;
9dbd164d 6260+ err = kstrtol(name->name + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
1facf9fc 6261+ if (unlikely(err))
6262+ goto out;
6263+
6264+ sbinfo = inode->i_private;
6265+ sb = sbinfo->si_sb;
6266+ si_noflush_read_lock(sb);
6267+ if (l <= au_sbend(sb)) {
6268+ xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
6269+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
6270+ } else
6271+ err = -ENOENT;
6272+ si_read_unlock(sb);
6273+
4f0767ce 6274+out:
1facf9fc 6275+ return err;
6276+}
6277+
6278+static const struct file_operations dbgaufs_xino_fop = {
4a4d8108 6279+ .owner = THIS_MODULE,
1facf9fc 6280+ .open = dbgaufs_xino_open,
6281+ .release = dbgaufs_xi_release,
6282+ .read = dbgaufs_xi_read
6283+};
6284+
6285+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6286+{
6287+ aufs_bindex_t bend;
6288+ struct au_branch *br;
6289+ struct au_xino_file *xi;
6290+
6291+ if (!au_sbi(sb)->si_dbgaufs)
6292+ return;
6293+
6294+ bend = au_sbend(sb);
6295+ for (; bindex <= bend; bindex++) {
6296+ br = au_sbr(sb, bindex);
6297+ xi = &br->br_xino;
c06a8ce3
AM
6298+ debugfs_remove(xi->xi_dbgaufs);
6299+ xi->xi_dbgaufs = NULL;
1facf9fc 6300+ }
6301+}
6302+
6303+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
6304+{
6305+ struct au_sbinfo *sbinfo;
6306+ struct dentry *parent;
6307+ struct au_branch *br;
6308+ struct au_xino_file *xi;
6309+ aufs_bindex_t bend;
6310+ char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
6311+
6312+ sbinfo = au_sbi(sb);
6313+ parent = sbinfo->si_dbgaufs;
6314+ if (!parent)
6315+ return;
6316+
6317+ bend = au_sbend(sb);
6318+ for (; bindex <= bend; bindex++) {
6319+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6320+ br = au_sbr(sb, bindex);
6321+ xi = &br->br_xino;
6322+ AuDebugOn(xi->xi_dbgaufs);
6323+ xi->xi_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6324+ sbinfo, &dbgaufs_xino_fop);
6325+ /* ignore an error */
6326+ if (unlikely(!xi->xi_dbgaufs))
6327+ AuWarn1("failed %s under debugfs\n", name);
6328+ }
6329+}
6330+
6331+/* ---------------------------------------------------------------------- */
6332+
6333+#ifdef CONFIG_AUFS_EXPORT
6334+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6335+{
6336+ int err;
6337+ struct au_sbinfo *sbinfo;
6338+ struct super_block *sb;
6339+
6340+ sbinfo = inode->i_private;
6341+ sb = sbinfo->si_sb;
6342+ si_noflush_read_lock(sb);
6343+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0);
6344+ si_read_unlock(sb);
6345+ return err;
6346+}
6347+
6348+static const struct file_operations dbgaufs_xigen_fop = {
4a4d8108 6349+ .owner = THIS_MODULE,
1facf9fc 6350+ .open = dbgaufs_xigen_open,
6351+ .release = dbgaufs_xi_release,
6352+ .read = dbgaufs_xi_read
6353+};
6354+
6355+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6356+{
6357+ int err;
6358+
dece6358 6359+ /*
c1595e42 6360+ * This function is a dynamic '__init' function actually,
dece6358
AM
6361+ * so the tiny check for si_rwsem is unnecessary.
6362+ */
6363+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6364+
1facf9fc 6365+ err = -EIO;
6366+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6367+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6368+ &dbgaufs_xigen_fop);
6369+ if (sbinfo->si_dbgaufs_xigen)
6370+ err = 0;
6371+
6372+ return err;
6373+}
6374+#else
6375+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6376+{
6377+ return 0;
6378+}
6379+#endif /* CONFIG_AUFS_EXPORT */
6380+
6381+/* ---------------------------------------------------------------------- */
6382+
6383+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6384+{
dece6358 6385+ /*
7f2ca4b1 6386+ * This function is a dynamic '__fin' function actually,
dece6358
AM
6387+ * so the tiny check for si_rwsem is unnecessary.
6388+ */
6389+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6390+
1facf9fc 6391+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6392+ sbinfo->si_dbgaufs = NULL;
6393+ kobject_put(&sbinfo->si_kobj);
6394+}
6395+
6396+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6397+{
6398+ int err;
6399+ char name[SysaufsSiNameLen];
6400+
dece6358 6401+ /*
c1595e42 6402+ * This function is a dynamic '__init' function actually,
dece6358
AM
6403+ * so the tiny check for si_rwsem is unnecessary.
6404+ */
6405+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6406+
1facf9fc 6407+ err = -ENOENT;
6408+ if (!dbgaufs) {
6409+ AuErr1("/debug/aufs is uninitialized\n");
6410+ goto out;
6411+ }
6412+
6413+ err = -EIO;
6414+ sysaufs_name(sbinfo, name);
6415+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6416+ if (unlikely(!sbinfo->si_dbgaufs))
6417+ goto out;
6418+ kobject_get(&sbinfo->si_kobj);
6419+
6420+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6421+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6422+ &dbgaufs_xib_fop);
6423+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6424+ goto out_dir;
6425+
86dc4139
AM
6426+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6427+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6428+ &dbgaufs_plink_fop);
6429+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6430+ goto out_dir;
6431+
1facf9fc 6432+ err = dbgaufs_xigen_init(sbinfo);
6433+ if (!err)
6434+ goto out; /* success */
6435+
4f0767ce 6436+out_dir:
1facf9fc 6437+ dbgaufs_si_fin(sbinfo);
4f0767ce 6438+out:
1facf9fc 6439+ return err;
6440+}
6441+
6442+/* ---------------------------------------------------------------------- */
6443+
6444+void dbgaufs_fin(void)
6445+{
6446+ debugfs_remove(dbgaufs);
6447+}
6448+
6449+int __init dbgaufs_init(void)
6450+{
6451+ int err;
6452+
6453+ err = -EIO;
6454+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6455+ if (dbgaufs)
6456+ err = 0;
6457+ return err;
6458+}
7f207e10
AM
6459diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6460--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 6461+++ linux/fs/aufs/dbgaufs.h 2016-02-28 11:27:01.277245613 +0100
523b37e3 6462@@ -0,0 +1,48 @@
1facf9fc 6463+/*
7f2ca4b1 6464+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6465+ *
6466+ * This program, aufs is free software; you can redistribute it and/or modify
6467+ * it under the terms of the GNU General Public License as published by
6468+ * the Free Software Foundation; either version 2 of the License, or
6469+ * (at your option) any later version.
dece6358
AM
6470+ *
6471+ * This program is distributed in the hope that it will be useful,
6472+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6473+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6474+ * GNU General Public License for more details.
6475+ *
6476+ * You should have received a copy of the GNU General Public License
523b37e3 6477+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6478+ */
6479+
6480+/*
6481+ * debugfs interface
6482+ */
6483+
6484+#ifndef __DBGAUFS_H__
6485+#define __DBGAUFS_H__
6486+
6487+#ifdef __KERNEL__
6488+
dece6358 6489+struct super_block;
1facf9fc 6490+struct au_sbinfo;
dece6358 6491+
1facf9fc 6492+#ifdef CONFIG_DEBUG_FS
6493+/* dbgaufs.c */
6494+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6495+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
6496+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6497+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6498+void dbgaufs_fin(void);
6499+int __init dbgaufs_init(void);
1facf9fc 6500+#else
4a4d8108
AM
6501+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6502+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
6503+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6504+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6505+AuStubVoid(dbgaufs_fin, void)
6506+AuStubInt0(__init dbgaufs_init, void)
1facf9fc 6507+#endif /* CONFIG_DEBUG_FS */
6508+
6509+#endif /* __KERNEL__ */
6510+#endif /* __DBGAUFS_H__ */
7f207e10
AM
6511diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6512--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 6513+++ linux/fs/aufs/dcsub.c 2016-02-28 11:27:01.277245613 +0100
c1595e42 6514@@ -0,0 +1,224 @@
1facf9fc 6515+/*
7f2ca4b1 6516+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6517+ *
6518+ * This program, aufs is free software; you can redistribute it and/or modify
6519+ * it under the terms of the GNU General Public License as published by
6520+ * the Free Software Foundation; either version 2 of the License, or
6521+ * (at your option) any later version.
dece6358
AM
6522+ *
6523+ * This program is distributed in the hope that it will be useful,
6524+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6525+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6526+ * GNU General Public License for more details.
6527+ *
6528+ * You should have received a copy of the GNU General Public License
523b37e3 6529+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6530+ */
6531+
6532+/*
6533+ * sub-routines for dentry cache
6534+ */
6535+
6536+#include "aufs.h"
6537+
6538+static void au_dpage_free(struct au_dpage *dpage)
6539+{
6540+ int i;
6541+ struct dentry **p;
6542+
6543+ p = dpage->dentries;
6544+ for (i = 0; i < dpage->ndentry; i++)
6545+ dput(*p++);
6546+ free_page((unsigned long)dpage->dentries);
6547+}
6548+
6549+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6550+{
6551+ int err;
6552+ void *p;
6553+
6554+ err = -ENOMEM;
6555+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6556+ if (unlikely(!dpages->dpages))
6557+ goto out;
6558+
6559+ p = (void *)__get_free_page(gfp);
6560+ if (unlikely(!p))
6561+ goto out_dpages;
6562+
6563+ dpages->dpages[0].ndentry = 0;
6564+ dpages->dpages[0].dentries = p;
6565+ dpages->ndpage = 1;
6566+ return 0; /* success */
6567+
4f0767ce 6568+out_dpages:
1facf9fc 6569+ kfree(dpages->dpages);
4f0767ce 6570+out:
1facf9fc 6571+ return err;
6572+}
6573+
6574+void au_dpages_free(struct au_dcsub_pages *dpages)
6575+{
6576+ int i;
6577+ struct au_dpage *p;
6578+
6579+ p = dpages->dpages;
6580+ for (i = 0; i < dpages->ndpage; i++)
6581+ au_dpage_free(p++);
6582+ kfree(dpages->dpages);
6583+}
6584+
6585+static int au_dpages_append(struct au_dcsub_pages *dpages,
6586+ struct dentry *dentry, gfp_t gfp)
6587+{
6588+ int err, sz;
6589+ struct au_dpage *dpage;
6590+ void *p;
6591+
6592+ dpage = dpages->dpages + dpages->ndpage - 1;
6593+ sz = PAGE_SIZE / sizeof(dentry);
6594+ if (unlikely(dpage->ndentry >= sz)) {
6595+ AuLabel(new dpage);
6596+ err = -ENOMEM;
6597+ sz = dpages->ndpage * sizeof(*dpages->dpages);
6598+ p = au_kzrealloc(dpages->dpages, sz,
6599+ sz + sizeof(*dpages->dpages), gfp);
6600+ if (unlikely(!p))
6601+ goto out;
6602+
6603+ dpages->dpages = p;
6604+ dpage = dpages->dpages + dpages->ndpage;
6605+ p = (void *)__get_free_page(gfp);
6606+ if (unlikely(!p))
6607+ goto out;
6608+
6609+ dpage->ndentry = 0;
6610+ dpage->dentries = p;
6611+ dpages->ndpage++;
6612+ }
6613+
c1595e42 6614+ AuDebugOn(au_dcount(dentry) <= 0);
027c5e7a 6615+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
1facf9fc 6616+ return 0; /* success */
6617+
4f0767ce 6618+out:
1facf9fc 6619+ return err;
6620+}
6621+
c1595e42
JR
6622+/* todo: BAD approach */
6623+/* copied from linux/fs/dcache.c */
6624+enum d_walk_ret {
6625+ D_WALK_CONTINUE,
6626+ D_WALK_QUIT,
6627+ D_WALK_NORETRY,
6628+ D_WALK_SKIP,
6629+};
6630+
6631+extern void d_walk(struct dentry *parent, void *data,
6632+ enum d_walk_ret (*enter)(void *, struct dentry *),
6633+ void (*finish)(void *));
6634+
6635+struct ac_dpages_arg {
1facf9fc 6636+ int err;
c1595e42
JR
6637+ struct au_dcsub_pages *dpages;
6638+ struct super_block *sb;
6639+ au_dpages_test test;
6640+ void *arg;
6641+};
1facf9fc 6642+
c1595e42
JR
6643+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
6644+{
6645+ enum d_walk_ret ret;
6646+ struct ac_dpages_arg *arg = _arg;
1facf9fc 6647+
c1595e42
JR
6648+ ret = D_WALK_CONTINUE;
6649+ if (dentry->d_sb == arg->sb
6650+ && !IS_ROOT(dentry)
6651+ && au_dcount(dentry) > 0
6652+ && au_di(dentry)
6653+ && (!arg->test || arg->test(dentry, arg->arg))) {
6654+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
6655+ if (unlikely(arg->err))
6656+ ret = D_WALK_QUIT;
1facf9fc 6657+ }
6658+
c1595e42
JR
6659+ return ret;
6660+}
027c5e7a 6661+
c1595e42
JR
6662+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6663+ au_dpages_test test, void *arg)
6664+{
6665+ struct ac_dpages_arg args = {
6666+ .err = 0,
6667+ .dpages = dpages,
6668+ .sb = root->d_sb,
6669+ .test = test,
6670+ .arg = arg
6671+ };
027c5e7a 6672+
c1595e42
JR
6673+ d_walk(root, &args, au_call_dpages_append, NULL);
6674+
6675+ return args.err;
1facf9fc 6676+}
6677+
6678+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6679+ int do_include, au_dpages_test test, void *arg)
6680+{
6681+ int err;
6682+
6683+ err = 0;
027c5e7a
AM
6684+ write_seqlock(&rename_lock);
6685+ spin_lock(&dentry->d_lock);
6686+ if (do_include
c1595e42 6687+ && au_dcount(dentry) > 0
027c5e7a 6688+ && (!test || test(dentry, arg)))
1facf9fc 6689+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6690+ spin_unlock(&dentry->d_lock);
6691+ if (unlikely(err))
6692+ goto out;
6693+
6694+ /*
523b37e3 6695+ * RCU for vfsmount is unnecessary since this is a traverse in a single
027c5e7a
AM
6696+ * mount
6697+ */
1facf9fc 6698+ while (!IS_ROOT(dentry)) {
027c5e7a
AM
6699+ dentry = dentry->d_parent; /* rename_lock is locked */
6700+ spin_lock(&dentry->d_lock);
c1595e42 6701+ if (au_dcount(dentry) > 0
027c5e7a 6702+ && (!test || test(dentry, arg)))
1facf9fc 6703+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
027c5e7a
AM
6704+ spin_unlock(&dentry->d_lock);
6705+ if (unlikely(err))
6706+ break;
1facf9fc 6707+ }
6708+
4f0767ce 6709+out:
027c5e7a 6710+ write_sequnlock(&rename_lock);
1facf9fc 6711+ return err;
6712+}
6713+
027c5e7a
AM
6714+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
6715+{
6716+ return au_di(dentry) && dentry->d_sb == arg;
6717+}
6718+
6719+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6720+ struct dentry *dentry, int do_include)
6721+{
6722+ return au_dcsub_pages_rev(dpages, dentry, do_include,
6723+ au_dcsub_dpages_aufs, dentry->d_sb);
6724+}
6725+
4a4d8108 6726+int au_test_subdir(struct dentry *d1, struct dentry *d2)
1facf9fc 6727+{
4a4d8108
AM
6728+ struct path path[2] = {
6729+ {
6730+ .dentry = d1
6731+ },
6732+ {
6733+ .dentry = d2
6734+ }
6735+ };
1facf9fc 6736+
4a4d8108 6737+ return path_is_under(path + 0, path + 1);
1facf9fc 6738+}
7f207e10
AM
6739diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
6740--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
6741+++ linux/fs/aufs/dcsub.h 2016-02-28 11:27:01.277245613 +0100
6742@@ -0,0 +1,123 @@
1facf9fc 6743+/*
7f2ca4b1 6744+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6745+ *
6746+ * This program, aufs is free software; you can redistribute it and/or modify
6747+ * it under the terms of the GNU General Public License as published by
6748+ * the Free Software Foundation; either version 2 of the License, or
6749+ * (at your option) any later version.
dece6358
AM
6750+ *
6751+ * This program is distributed in the hope that it will be useful,
6752+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6753+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6754+ * GNU General Public License for more details.
6755+ *
6756+ * You should have received a copy of the GNU General Public License
523b37e3 6757+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6758+ */
6759+
6760+/*
6761+ * sub-routines for dentry cache
6762+ */
6763+
6764+#ifndef __AUFS_DCSUB_H__
6765+#define __AUFS_DCSUB_H__
6766+
6767+#ifdef __KERNEL__
6768+
7f207e10 6769+#include <linux/dcache.h>
027c5e7a 6770+#include <linux/fs.h>
dece6358 6771+
1facf9fc 6772+struct au_dpage {
6773+ int ndentry;
6774+ struct dentry **dentries;
6775+};
6776+
6777+struct au_dcsub_pages {
6778+ int ndpage;
6779+ struct au_dpage *dpages;
6780+};
6781+
6782+/* ---------------------------------------------------------------------- */
6783+
7f207e10 6784+/* dcsub.c */
1facf9fc 6785+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
6786+void au_dpages_free(struct au_dcsub_pages *dpages);
6787+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
6788+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
6789+ au_dpages_test test, void *arg);
6790+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
6791+ int do_include, au_dpages_test test, void *arg);
027c5e7a
AM
6792+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
6793+ struct dentry *dentry, int do_include);
4a4d8108 6794+int au_test_subdir(struct dentry *d1, struct dentry *d2);
1facf9fc 6795+
7f207e10
AM
6796+/* ---------------------------------------------------------------------- */
6797+
523b37e3
AM
6798+/*
6799+ * todo: in linux-3.13, several similar (but faster) helpers are added to
6800+ * include/linux/dcache.h. Try them (in the future).
6801+ */
6802+
027c5e7a
AM
6803+static inline int au_d_hashed_positive(struct dentry *d)
6804+{
6805+ int err;
6806+ struct inode *inode = d->d_inode;
076b876e 6807+
027c5e7a
AM
6808+ err = 0;
6809+ if (unlikely(d_unhashed(d) || !inode || !inode->i_nlink))
6810+ err = -ENOENT;
6811+ return err;
6812+}
6813+
38d290e6
JR
6814+static inline int au_d_linkable(struct dentry *d)
6815+{
6816+ int err;
6817+ struct inode *inode = d->d_inode;
076b876e 6818+
38d290e6
JR
6819+ err = au_d_hashed_positive(d);
6820+ if (err
6821+ && inode
6822+ && (inode->i_state & I_LINKABLE))
6823+ err = 0;
6824+ return err;
6825+}
6826+
027c5e7a
AM
6827+static inline int au_d_alive(struct dentry *d)
6828+{
6829+ int err;
6830+ struct inode *inode;
076b876e 6831+
027c5e7a
AM
6832+ err = 0;
6833+ if (!IS_ROOT(d))
6834+ err = au_d_hashed_positive(d);
6835+ else {
6836+ inode = d->d_inode;
6837+ if (unlikely(d_unlinked(d) || !inode || !inode->i_nlink))
6838+ err = -ENOENT;
6839+ }
6840+ return err;
6841+}
6842+
6843+static inline int au_alive_dir(struct dentry *d)
7f207e10 6844+{
027c5e7a 6845+ int err;
076b876e 6846+
027c5e7a
AM
6847+ err = au_d_alive(d);
6848+ if (unlikely(err || IS_DEADDIR(d->d_inode)))
6849+ err = -ENOENT;
6850+ return err;
7f207e10
AM
6851+}
6852+
38d290e6
JR
6853+static inline int au_qstreq(struct qstr *a, struct qstr *b)
6854+{
6855+ return a->len == b->len
6856+ && !memcmp(a->name, b->name, a->len);
6857+}
6858+
c1595e42
JR
6859+static inline int au_dcount(struct dentry *d)
6860+{
6861+ return (int)d_count(d);
6862+}
6863+
1facf9fc 6864+#endif /* __KERNEL__ */
6865+#endif /* __AUFS_DCSUB_H__ */
7f207e10
AM
6866diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
6867--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
6868+++ linux/fs/aufs/debug.c 2016-02-28 12:40:45.724388517 +0100
6869@@ -0,0 +1,436 @@
1facf9fc 6870+/*
7f2ca4b1 6871+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 6872+ *
6873+ * This program, aufs is free software; you can redistribute it and/or modify
6874+ * it under the terms of the GNU General Public License as published by
6875+ * the Free Software Foundation; either version 2 of the License, or
6876+ * (at your option) any later version.
dece6358
AM
6877+ *
6878+ * This program is distributed in the hope that it will be useful,
6879+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6880+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6881+ * GNU General Public License for more details.
6882+ *
6883+ * You should have received a copy of the GNU General Public License
523b37e3 6884+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 6885+ */
6886+
6887+/*
6888+ * debug print functions
6889+ */
6890+
6891+#include "aufs.h"
6892+
392086de
AM
6893+/* Returns 0, or -errno. arg is in kp->arg. */
6894+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
6895+{
6896+ int err, n;
6897+
6898+ err = kstrtoint(val, 0, &n);
6899+ if (!err) {
6900+ if (n > 0)
6901+ au_debug_on();
6902+ else
6903+ au_debug_off();
6904+ }
6905+ return err;
6906+}
6907+
6908+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
6909+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
6910+{
6911+ atomic_t *a;
6912+
6913+ a = kp->arg;
6914+ return sprintf(buffer, "%d", atomic_read(a));
6915+}
6916+
6917+static struct kernel_param_ops param_ops_atomic_t = {
6918+ .set = param_atomic_t_set,
6919+ .get = param_atomic_t_get
6920+ /* void (*free)(void *arg) */
6921+};
6922+
6923+atomic_t aufs_debug = ATOMIC_INIT(0);
1facf9fc 6924+MODULE_PARM_DESC(debug, "debug print");
392086de 6925+module_param_named(debug, aufs_debug, atomic_t, S_IRUGO | S_IWUSR | S_IWGRP);
1facf9fc 6926+
c1595e42 6927+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
1facf9fc 6928+char *au_plevel = KERN_DEBUG;
e49829fe
JR
6929+#define dpri(fmt, ...) do { \
6930+ if ((au_plevel \
6931+ && strcmp(au_plevel, KERN_DEBUG)) \
6932+ || au_debug_test()) \
6933+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
1facf9fc 6934+} while (0)
6935+
6936+/* ---------------------------------------------------------------------- */
6937+
6938+void au_dpri_whlist(struct au_nhash *whlist)
6939+{
6940+ unsigned long ul, n;
6941+ struct hlist_head *head;
c06a8ce3 6942+ struct au_vdir_wh *pos;
1facf9fc 6943+
6944+ n = whlist->nh_num;
6945+ head = whlist->nh_head;
6946+ for (ul = 0; ul < n; ul++) {
c06a8ce3 6947+ hlist_for_each_entry(pos, head, wh_hash)
1facf9fc 6948+ dpri("b%d, %.*s, %d\n",
c06a8ce3
AM
6949+ pos->wh_bindex,
6950+ pos->wh_str.len, pos->wh_str.name,
6951+ pos->wh_str.len);
1facf9fc 6952+ head++;
6953+ }
6954+}
6955+
6956+void au_dpri_vdir(struct au_vdir *vdir)
6957+{
6958+ unsigned long ul;
6959+ union au_vdir_deblk_p p;
6960+ unsigned char *o;
6961+
6962+ if (!vdir || IS_ERR(vdir)) {
6963+ dpri("err %ld\n", PTR_ERR(vdir));
6964+ return;
6965+ }
6966+
6967+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %lu\n",
6968+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
6969+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
6970+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
6971+ p.deblk = vdir->vd_deblk[ul];
6972+ o = p.deblk;
6973+ dpri("[%lu]: %p\n", ul, o);
6974+ }
6975+}
6976+
53392da6 6977+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
1facf9fc 6978+ struct dentry *wh)
6979+{
6980+ char *n = NULL;
6981+ int l = 0;
6982+
6983+ if (!inode || IS_ERR(inode)) {
6984+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
6985+ return -1;
6986+ }
6987+
c2b27bf2 6988+ /* the type of i_blocks depends upon CONFIG_LBDAF */
1facf9fc 6989+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
6990+ && sizeof(inode->i_blocks) != sizeof(u64));
6991+ if (wh) {
6992+ n = (void *)wh->d_name.name;
6993+ l = wh->d_name.len;
6994+ }
6995+
53392da6
AM
6996+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
6997+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
6998+ bindex, inode,
1facf9fc 6999+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
7000+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
7001+ i_size_read(inode), (unsigned long long)inode->i_blocks,
53392da6 7002+ hn, (long long)timespec_to_ns(&inode->i_ctime) & 0x0ffff,
1facf9fc 7003+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
b752ccd1
AM
7004+ inode->i_state, inode->i_flags, inode->i_version,
7005+ inode->i_generation,
1facf9fc 7006+ l ? ", wh " : "", l, n);
7007+ return 0;
7008+}
7009+
7010+void au_dpri_inode(struct inode *inode)
7011+{
7012+ struct au_iinfo *iinfo;
7013+ aufs_bindex_t bindex;
53392da6 7014+ int err, hn;
1facf9fc 7015+
53392da6 7016+ err = do_pri_inode(-1, inode, -1, NULL);
1facf9fc 7017+ if (err || !au_test_aufs(inode->i_sb))
7018+ return;
7019+
7020+ iinfo = au_ii(inode);
7021+ if (!iinfo)
7022+ return;
7023+ dpri("i-1: bstart %d, bend %d, gen %d\n",
537831f9 7024+ iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL));
1facf9fc 7025+ if (iinfo->ii_bstart < 0)
7026+ return;
53392da6
AM
7027+ hn = 0;
7028+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
7029+ hn = !!au_hn(iinfo->ii_hinode + bindex);
7030+ do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
1facf9fc 7031+ iinfo->ii_hinode[0 + bindex].hi_whdentry);
53392da6 7032+ }
1facf9fc 7033+}
7034+
2cbb1c4b
JR
7035+void au_dpri_dalias(struct inode *inode)
7036+{
7037+ struct dentry *d;
7038+
7039+ spin_lock(&inode->i_lock);
c1595e42 7040+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
2cbb1c4b
JR
7041+ au_dpri_dentry(d);
7042+ spin_unlock(&inode->i_lock);
7043+}
7044+
1facf9fc 7045+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
7046+{
7047+ struct dentry *wh = NULL;
53392da6 7048+ int hn;
076b876e 7049+ struct au_iinfo *iinfo;
1facf9fc 7050+
7051+ if (!dentry || IS_ERR(dentry)) {
7052+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
7053+ return -1;
7054+ }
7055+ /* do not call dget_parent() here */
027c5e7a 7056+ /* note: access d_xxx without d_lock */
523b37e3
AM
7057+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
7058+ bindex, dentry, dentry,
1facf9fc 7059+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
c1595e42 7060+ au_dcount(dentry), dentry->d_flags,
523b37e3 7061+ d_unhashed(dentry) ? "un" : "");
53392da6 7062+ hn = -1;
1facf9fc 7063+ if (bindex >= 0 && dentry->d_inode && au_test_aufs(dentry->d_sb)) {
076b876e 7064+ iinfo = au_ii(dentry->d_inode);
53392da6
AM
7065+ if (iinfo) {
7066+ hn = !!au_hn(iinfo->ii_hinode + bindex);
1facf9fc 7067+ wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
53392da6 7068+ }
1facf9fc 7069+ }
53392da6 7070+ do_pri_inode(bindex, dentry->d_inode, hn, wh);
1facf9fc 7071+ return 0;
7072+}
7073+
7074+void au_dpri_dentry(struct dentry *dentry)
7075+{
7076+ struct au_dinfo *dinfo;
7077+ aufs_bindex_t bindex;
7078+ int err;
4a4d8108 7079+ struct au_hdentry *hdp;
1facf9fc 7080+
7081+ err = do_pri_dentry(-1, dentry);
7082+ if (err || !au_test_aufs(dentry->d_sb))
7083+ return;
7084+
7085+ dinfo = au_di(dentry);
7086+ if (!dinfo)
7087+ return;
38d290e6 7088+ dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
1facf9fc 7089+ dinfo->di_bstart, dinfo->di_bend,
38d290e6
JR
7090+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
7091+ dinfo->di_tmpfile);
1facf9fc 7092+ if (dinfo->di_bstart < 0)
7093+ return;
4a4d8108 7094+ hdp = dinfo->di_hdentry;
1facf9fc 7095+ for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
4a4d8108 7096+ do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
1facf9fc 7097+}
7098+
7099+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
7100+{
7101+ char a[32];
7102+
7103+ if (!file || IS_ERR(file)) {
7104+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7105+ return -1;
7106+ }
7107+ a[0] = 0;
7108+ if (bindex < 0
7f2ca4b1 7109+ && !IS_ERR_OR_NULL(file->f_dentry)
1facf9fc 7110+ && au_test_aufs(file->f_dentry->d_sb)
7111+ && au_fi(file))
e49829fe 7112+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
2cbb1c4b 7113+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
b752ccd1 7114+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
1facf9fc 7115+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
b752ccd1 7116+ file->f_version, file->f_pos, a);
7f2ca4b1 7117+ if (!IS_ERR_OR_NULL(file->f_dentry))
1facf9fc 7118+ do_pri_dentry(bindex, file->f_dentry);
7119+ return 0;
7120+}
7121+
7122+void au_dpri_file(struct file *file)
7123+{
7124+ struct au_finfo *finfo;
4a4d8108
AM
7125+ struct au_fidir *fidir;
7126+ struct au_hfile *hfile;
1facf9fc 7127+ aufs_bindex_t bindex;
7128+ int err;
7129+
7130+ err = do_pri_file(-1, file);
7f2ca4b1
JR
7131+ if (err
7132+ || IS_ERR_OR_NULL(file->f_dentry)
7133+ || !au_test_aufs(file->f_dentry->d_sb))
1facf9fc 7134+ return;
7135+
7136+ finfo = au_fi(file);
7137+ if (!finfo)
7138+ return;
4a4d8108 7139+ if (finfo->fi_btop < 0)
1facf9fc 7140+ return;
4a4d8108
AM
7141+ fidir = finfo->fi_hdir;
7142+ if (!fidir)
7143+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7144+ else
e49829fe
JR
7145+ for (bindex = finfo->fi_btop;
7146+ bindex >= 0 && bindex <= fidir->fd_bbot;
4a4d8108
AM
7147+ bindex++) {
7148+ hfile = fidir->fd_hfile + bindex;
7149+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7150+ }
1facf9fc 7151+}
7152+
7153+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7154+{
7155+ struct vfsmount *mnt;
7156+ struct super_block *sb;
7157+
7158+ if (!br || IS_ERR(br))
7159+ goto out;
86dc4139 7160+ mnt = au_br_mnt(br);
1facf9fc 7161+ if (!mnt || IS_ERR(mnt))
7162+ goto out;
7163+ sb = mnt->mnt_sb;
7164+ if (!sb || IS_ERR(sb))
7165+ goto out;
7166+
1e00d052 7167+ dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
b752ccd1 7168+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
1facf9fc 7169+ "xino %d\n",
1e00d052
AM
7170+ bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
7171+ br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
b752ccd1 7172+ sb->s_flags, sb->s_count,
1facf9fc 7173+ atomic_read(&sb->s_active), !!br->br_xino.xi_file);
7174+ return 0;
7175+
4f0767ce 7176+out:
1facf9fc 7177+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7178+ return -1;
7179+}
7180+
7181+void au_dpri_sb(struct super_block *sb)
7182+{
7183+ struct au_sbinfo *sbinfo;
7184+ aufs_bindex_t bindex;
7185+ int err;
7186+ /* to reuduce stack size */
7187+ struct {
7188+ struct vfsmount mnt;
7189+ struct au_branch fake;
7190+ } *a;
7191+
7192+ /* this function can be called from magic sysrq */
7193+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7194+ if (unlikely(!a)) {
7195+ dpri("no memory\n");
7196+ return;
7197+ }
7198+
7199+ a->mnt.mnt_sb = sb;
86dc4139 7200+ a->fake.br_path.mnt = &a->mnt;
1facf9fc 7201+ atomic_set(&a->fake.br_count, 0);
7202+ smp_mb(); /* atomic_set */
7203+ err = do_pri_br(-1, &a->fake);
7204+ kfree(a);
7205+ dpri("dev 0x%x\n", sb->s_dev);
7206+ if (err || !au_test_aufs(sb))
7207+ return;
7208+
7209+ sbinfo = au_sbi(sb);
7210+ if (!sbinfo)
7211+ return;
7212+ dpri("nw %d, gen %u, kobj %d\n",
7213+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
7214+ atomic_read(&sbinfo->si_kobj.kref.refcount));
7215+ for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
7216+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7217+}
7218+
7219+/* ---------------------------------------------------------------------- */
7220+
027c5e7a
AM
7221+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7222+{
7223+ struct inode *h_inode, *inode = dentry->d_inode;
7224+ struct dentry *h_dentry;
7225+ aufs_bindex_t bindex, bend, bi;
7226+
7227+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7228+ return;
7229+
7230+ bend = au_dbend(dentry);
7231+ bi = au_ibend(inode);
7232+ if (bi < bend)
7233+ bend = bi;
7234+ bindex = au_dbstart(dentry);
7235+ bi = au_ibstart(inode);
7236+ if (bi > bindex)
7237+ bindex = bi;
7238+
7239+ for (; bindex <= bend; bindex++) {
7240+ h_dentry = au_h_dptr(dentry, bindex);
7241+ if (!h_dentry)
7242+ continue;
7243+ h_inode = au_h_iptr(inode, bindex);
7244+ if (unlikely(h_inode != h_dentry->d_inode)) {
392086de 7245+ au_debug_on();
027c5e7a
AM
7246+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7247+ AuDbgDentry(dentry);
7248+ AuDbgInode(inode);
392086de 7249+ au_debug_off();
027c5e7a
AM
7250+ BUG();
7251+ }
7252+ }
7253+}
7254+
1facf9fc 7255+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7256+{
7257+ int err, i, j;
7258+ struct au_dcsub_pages dpages;
7259+ struct au_dpage *dpage;
7260+ struct dentry **dentries;
7261+
7262+ err = au_dpages_init(&dpages, GFP_NOFS);
7263+ AuDebugOn(err);
027c5e7a 7264+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
1facf9fc 7265+ AuDebugOn(err);
7266+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7267+ dpage = dpages.dpages + i;
7268+ dentries = dpage->dentries;
7269+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
027c5e7a 7270+ AuDebugOn(au_digen_test(dentries[j], sigen));
1facf9fc 7271+ }
7272+ au_dpages_free(&dpages);
7273+}
7274+
1facf9fc 7275+void au_dbg_verify_kthread(void)
7276+{
53392da6 7277+ if (au_wkq_test()) {
1facf9fc 7278+ au_dbg_blocked();
1e00d052
AM
7279+ /*
7280+ * It may be recursive, but udba=notify between two aufs mounts,
7281+ * where a single ro branch is shared, is not a problem.
7282+ */
7283+ /* WARN_ON(1); */
1facf9fc 7284+ }
7285+}
7286+
7287+/* ---------------------------------------------------------------------- */
7288+
1facf9fc 7289+int __init au_debug_init(void)
7290+{
7291+ aufs_bindex_t bindex;
7292+ struct au_vdir_destr destr;
7293+
7294+ bindex = -1;
7295+ AuDebugOn(bindex >= 0);
7296+
7297+ destr.len = -1;
7298+ AuDebugOn(destr.len < NAME_MAX);
7299+
7300+#ifdef CONFIG_4KSTACKS
0c3ec466 7301+ pr_warn("CONFIG_4KSTACKS is defined.\n");
1facf9fc 7302+#endif
7303+
1facf9fc 7304+ return 0;
7305+}
7f207e10
AM
7306diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7307--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
7308+++ linux/fs/aufs/debug.h 2016-02-28 11:27:01.277245613 +0100
7309@@ -0,0 +1,228 @@
1facf9fc 7310+/*
7f2ca4b1 7311+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7312+ *
7313+ * This program, aufs is free software; you can redistribute it and/or modify
7314+ * it under the terms of the GNU General Public License as published by
7315+ * the Free Software Foundation; either version 2 of the License, or
7316+ * (at your option) any later version.
dece6358
AM
7317+ *
7318+ * This program is distributed in the hope that it will be useful,
7319+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7320+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7321+ * GNU General Public License for more details.
7322+ *
7323+ * You should have received a copy of the GNU General Public License
523b37e3 7324+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7325+ */
7326+
7327+/*
7328+ * debug print functions
7329+ */
7330+
7331+#ifndef __AUFS_DEBUG_H__
7332+#define __AUFS_DEBUG_H__
7333+
7334+#ifdef __KERNEL__
7335+
392086de 7336+#include <linux/atomic.h>
4a4d8108
AM
7337+#include <linux/module.h>
7338+#include <linux/kallsyms.h>
1facf9fc 7339+#include <linux/sysrq.h>
4a4d8108 7340+
1facf9fc 7341+#ifdef CONFIG_AUFS_DEBUG
7342+#define AuDebugOn(a) BUG_ON(a)
7343+
7344+/* module parameter */
392086de
AM
7345+extern atomic_t aufs_debug;
7346+static inline void au_debug_on(void)
1facf9fc 7347+{
392086de
AM
7348+ atomic_inc(&aufs_debug);
7349+}
7350+static inline void au_debug_off(void)
7351+{
7352+ atomic_dec_if_positive(&aufs_debug);
1facf9fc 7353+}
7354+
7355+static inline int au_debug_test(void)
7356+{
392086de 7357+ return atomic_read(&aufs_debug) > 0;
1facf9fc 7358+}
7359+#else
7360+#define AuDebugOn(a) do {} while (0)
392086de
AM
7361+AuStubVoid(au_debug_on, void)
7362+AuStubVoid(au_debug_off, void)
4a4d8108 7363+AuStubInt0(au_debug_test, void)
1facf9fc 7364+#endif /* CONFIG_AUFS_DEBUG */
7365+
392086de
AM
7366+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7367+
1facf9fc 7368+/* ---------------------------------------------------------------------- */
7369+
7370+/* debug print */
7371+
4a4d8108 7372+#define AuDbg(fmt, ...) do { \
1facf9fc 7373+ if (au_debug_test()) \
4a4d8108 7374+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
1facf9fc 7375+} while (0)
4a4d8108
AM
7376+#define AuLabel(l) AuDbg(#l "\n")
7377+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7378+#define AuWarn1(fmt, ...) do { \
1facf9fc 7379+ static unsigned char _c; \
7380+ if (!_c++) \
0c3ec466 7381+ pr_warn(fmt, ##__VA_ARGS__); \
1facf9fc 7382+} while (0)
7383+
4a4d8108 7384+#define AuErr1(fmt, ...) do { \
1facf9fc 7385+ static unsigned char _c; \
7386+ if (!_c++) \
4a4d8108 7387+ pr_err(fmt, ##__VA_ARGS__); \
1facf9fc 7388+} while (0)
7389+
4a4d8108 7390+#define AuIOErr1(fmt, ...) do { \
1facf9fc 7391+ static unsigned char _c; \
7392+ if (!_c++) \
4a4d8108 7393+ AuIOErr(fmt, ##__VA_ARGS__); \
1facf9fc 7394+} while (0)
7395+
7396+#define AuUnsupportMsg "This operation is not supported." \
7397+ " Please report this application to aufs-users ML."
4a4d8108
AM
7398+#define AuUnsupport(fmt, ...) do { \
7399+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
1facf9fc 7400+ dump_stack(); \
7401+} while (0)
7402+
7403+#define AuTraceErr(e) do { \
7404+ if (unlikely((e) < 0)) \
7405+ AuDbg("err %d\n", (int)(e)); \
7406+} while (0)
7407+
7408+#define AuTraceErrPtr(p) do { \
7409+ if (IS_ERR(p)) \
7410+ AuDbg("err %ld\n", PTR_ERR(p)); \
7411+} while (0)
7412+
7413+/* dirty macros for debug print, use with "%.*s" and caution */
7414+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
1facf9fc 7415+
7416+/* ---------------------------------------------------------------------- */
7417+
dece6358 7418+struct dentry;
1facf9fc 7419+#ifdef CONFIG_AUFS_DEBUG
c1595e42 7420+extern struct mutex au_dbg_mtx;
1facf9fc 7421+extern char *au_plevel;
7422+struct au_nhash;
7423+void au_dpri_whlist(struct au_nhash *whlist);
7424+struct au_vdir;
7425+void au_dpri_vdir(struct au_vdir *vdir);
dece6358 7426+struct inode;
1facf9fc 7427+void au_dpri_inode(struct inode *inode);
2cbb1c4b 7428+void au_dpri_dalias(struct inode *inode);
1facf9fc 7429+void au_dpri_dentry(struct dentry *dentry);
dece6358 7430+struct file;
1facf9fc 7431+void au_dpri_file(struct file *filp);
dece6358 7432+struct super_block;
1facf9fc 7433+void au_dpri_sb(struct super_block *sb);
7434+
027c5e7a
AM
7435+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7436+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
1facf9fc 7437+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
1facf9fc 7438+void au_dbg_verify_kthread(void);
7439+
7440+int __init au_debug_init(void);
7f2ca4b1 7441+
1facf9fc 7442+#define AuDbgWhlist(w) do { \
c1595e42 7443+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7444+ AuDbg(#w "\n"); \
7445+ au_dpri_whlist(w); \
c1595e42 7446+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7447+} while (0)
7448+
7449+#define AuDbgVdir(v) do { \
c1595e42 7450+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7451+ AuDbg(#v "\n"); \
7452+ au_dpri_vdir(v); \
c1595e42 7453+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7454+} while (0)
7455+
7456+#define AuDbgInode(i) do { \
c1595e42 7457+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7458+ AuDbg(#i "\n"); \
7459+ au_dpri_inode(i); \
c1595e42 7460+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7461+} while (0)
7462+
2cbb1c4b 7463+#define AuDbgDAlias(i) do { \
c1595e42 7464+ mutex_lock(&au_dbg_mtx); \
2cbb1c4b
JR
7465+ AuDbg(#i "\n"); \
7466+ au_dpri_dalias(i); \
c1595e42 7467+ mutex_unlock(&au_dbg_mtx); \
2cbb1c4b
JR
7468+} while (0)
7469+
1facf9fc 7470+#define AuDbgDentry(d) do { \
c1595e42 7471+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7472+ AuDbg(#d "\n"); \
7473+ au_dpri_dentry(d); \
c1595e42 7474+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7475+} while (0)
7476+
7477+#define AuDbgFile(f) do { \
c1595e42 7478+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7479+ AuDbg(#f "\n"); \
7480+ au_dpri_file(f); \
c1595e42 7481+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7482+} while (0)
7483+
7484+#define AuDbgSb(sb) do { \
c1595e42 7485+ mutex_lock(&au_dbg_mtx); \
1facf9fc 7486+ AuDbg(#sb "\n"); \
7487+ au_dpri_sb(sb); \
c1595e42 7488+ mutex_unlock(&au_dbg_mtx); \
1facf9fc 7489+} while (0)
7490+
4a4d8108
AM
7491+#define AuDbgSym(addr) do { \
7492+ char sym[KSYM_SYMBOL_LEN]; \
7493+ sprint_symbol(sym, (unsigned long)addr); \
7494+ AuDbg("%s\n", sym); \
7495+} while (0)
1facf9fc 7496+#else
027c5e7a 7497+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
4a4d8108
AM
7498+AuStubVoid(au_dbg_verify_dir_parent, struct dentry *dentry, unsigned int sigen)
7499+AuStubVoid(au_dbg_verify_nondir_parent, struct dentry *dentry,
7500+ unsigned int sigen)
7501+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7502+AuStubVoid(au_dbg_verify_kthread, void)
7503+AuStubInt0(__init au_debug_init, void)
1facf9fc 7504+
1facf9fc 7505+#define AuDbgWhlist(w) do {} while (0)
7506+#define AuDbgVdir(v) do {} while (0)
7507+#define AuDbgInode(i) do {} while (0)
2cbb1c4b 7508+#define AuDbgDAlias(i) do {} while (0)
1facf9fc 7509+#define AuDbgDentry(d) do {} while (0)
7510+#define AuDbgFile(f) do {} while (0)
7511+#define AuDbgSb(sb) do {} while (0)
4a4d8108 7512+#define AuDbgSym(addr) do {} while (0)
1facf9fc 7513+#endif /* CONFIG_AUFS_DEBUG */
7514+
7515+/* ---------------------------------------------------------------------- */
7516+
7517+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7518+int __init au_sysrq_init(void);
7519+void au_sysrq_fin(void);
7520+
7521+#ifdef CONFIG_HW_CONSOLE
7522+#define au_dbg_blocked() do { \
7523+ WARN_ON(1); \
0c5527e5 7524+ handle_sysrq('w'); \
1facf9fc 7525+} while (0)
7526+#else
4a4d8108 7527+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7528+#endif
7529+
7530+#else
4a4d8108
AM
7531+AuStubInt0(__init au_sysrq_init, void)
7532+AuStubVoid(au_sysrq_fin, void)
7533+AuStubVoid(au_dbg_blocked, void)
1facf9fc 7534+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7535+
7536+#endif /* __KERNEL__ */
7537+#endif /* __AUFS_DEBUG_H__ */
7f207e10
AM
7538diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7539--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
7540+++ linux/fs/aufs/dentry.c 2016-02-28 11:27:01.277245613 +0100
7541@@ -0,0 +1,1129 @@
1facf9fc 7542+/*
7f2ca4b1 7543+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 7544+ *
7545+ * This program, aufs is free software; you can redistribute it and/or modify
7546+ * it under the terms of the GNU General Public License as published by
7547+ * the Free Software Foundation; either version 2 of the License, or
7548+ * (at your option) any later version.
dece6358
AM
7549+ *
7550+ * This program is distributed in the hope that it will be useful,
7551+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7552+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7553+ * GNU General Public License for more details.
7554+ *
7555+ * You should have received a copy of the GNU General Public License
523b37e3 7556+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 7557+ */
7558+
7559+/*
7560+ * lookup and dentry operations
7561+ */
7562+
dece6358 7563+#include <linux/namei.h>
1facf9fc 7564+#include "aufs.h"
7565+
1facf9fc 7566+#define AuLkup_ALLOW_NEG 1
076b876e 7567+#define AuLkup_IGNORE_PERM (1 << 1)
1facf9fc 7568+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
7f207e10
AM
7569+#define au_fset_lkup(flags, name) \
7570+ do { (flags) |= AuLkup_##name; } while (0)
7571+#define au_fclr_lkup(flags, name) \
7572+ do { (flags) &= ~AuLkup_##name; } while (0)
1facf9fc 7573+
7574+struct au_do_lookup_args {
7575+ unsigned int flags;
7576+ mode_t type;
1facf9fc 7577+};
7578+
7579+/*
7580+ * returns positive/negative dentry, NULL or an error.
7581+ * NULL means whiteout-ed or not-found.
7582+ */
7583+static struct dentry*
7584+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
7585+ aufs_bindex_t bindex, struct qstr *wh_name,
7586+ struct au_do_lookup_args *args)
7587+{
7588+ struct dentry *h_dentry;
7f2ca4b1 7589+ struct inode *h_inode;
1facf9fc 7590+ struct au_branch *br;
7591+ int wh_found, opq;
7592+ unsigned char wh_able;
7593+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
076b876e
AM
7594+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
7595+ IGNORE_PERM);
1facf9fc 7596+
1facf9fc 7597+ wh_found = 0;
7598+ br = au_sbr(dentry->d_sb, bindex);
7599+ wh_able = !!au_br_whable(br->br_perm);
7600+ if (wh_able)
076b876e 7601+ wh_found = au_wh_test(h_parent, wh_name, /*try_sio*/0);
1facf9fc 7602+ h_dentry = ERR_PTR(wh_found);
7603+ if (!wh_found)
7604+ goto real_lookup;
7605+ if (unlikely(wh_found < 0))
7606+ goto out;
7607+
7608+ /* We found a whiteout */
7609+ /* au_set_dbend(dentry, bindex); */
7610+ au_set_dbwh(dentry, bindex);
7611+ if (!allow_neg)
7612+ return NULL; /* success */
7613+
4f0767ce 7614+real_lookup:
076b876e
AM
7615+ if (!ignore_perm)
7616+ h_dentry = vfsub_lkup_one(&dentry->d_name, h_parent);
7617+ else
7618+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
7f2ca4b1
JR
7619+ if (IS_ERR(h_dentry)) {
7620+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
7621+ && !allow_neg)
7622+ h_dentry = NULL;
1facf9fc 7623+ goto out;
7f2ca4b1 7624+ }
1facf9fc 7625+
7626+ h_inode = h_dentry->d_inode;
7627+ if (!h_inode) {
7628+ if (!allow_neg)
7629+ goto out_neg;
7630+ } else if (wh_found
7631+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
7632+ goto out_neg;
7633+
7634+ if (au_dbend(dentry) <= bindex)
7635+ au_set_dbend(dentry, bindex);
7636+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
7637+ au_set_dbstart(dentry, bindex);
7638+ au_set_h_dptr(dentry, bindex, h_dentry);
7639+
7f2ca4b1
JR
7640+ if (!d_is_dir(h_dentry)
7641+ || !wh_able
7642+ || (d_is_positive(dentry) && !d_is_dir(dentry)))
1facf9fc 7643+ goto out; /* success */
7644+
7645+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
076b876e 7646+ opq = au_diropq_test(h_dentry);
1facf9fc 7647+ mutex_unlock(&h_inode->i_mutex);
7648+ if (opq > 0)
7649+ au_set_dbdiropq(dentry, bindex);
7650+ else if (unlikely(opq < 0)) {
7651+ au_set_h_dptr(dentry, bindex, NULL);
7652+ h_dentry = ERR_PTR(opq);
7653+ }
7654+ goto out;
7655+
4f0767ce 7656+out_neg:
1facf9fc 7657+ dput(h_dentry);
7658+ h_dentry = NULL;
4f0767ce 7659+out:
1facf9fc 7660+ return h_dentry;
7661+}
7662+
dece6358
AM
7663+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
7664+{
7665+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
7666+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
7667+ return -EPERM;
7668+ return 0;
7669+}
7670+
1facf9fc 7671+/*
7672+ * returns the number of lower positive dentries,
7673+ * otherwise an error.
7674+ * can be called at unlinking with @type is zero.
7675+ */
537831f9 7676+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
1facf9fc 7677+{
7678+ int npositive, err;
7679+ aufs_bindex_t bindex, btail, bdiropq;
076b876e 7680+ unsigned char isdir, dirperm1;
1facf9fc 7681+ struct qstr whname;
7682+ struct au_do_lookup_args args = {
b4510431 7683+ .flags = 0,
537831f9 7684+ .type = type
1facf9fc 7685+ };
7686+ const struct qstr *name = &dentry->d_name;
7687+ struct dentry *parent;
7688+ struct inode *inode;
076b876e 7689+ struct super_block *sb;
1facf9fc 7690+
076b876e
AM
7691+ sb = dentry->d_sb;
7692+ err = au_test_shwh(sb, name);
dece6358 7693+ if (unlikely(err))
1facf9fc 7694+ goto out;
7695+
7696+ err = au_wh_name_alloc(&whname, name);
7697+ if (unlikely(err))
7698+ goto out;
7699+
7700+ inode = dentry->d_inode;
7f2ca4b1 7701+ isdir = !!d_is_dir(dentry);
1facf9fc 7702+ if (!type)
7703+ au_fset_lkup(args.flags, ALLOW_NEG);
076b876e 7704+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
1facf9fc 7705+
7706+ npositive = 0;
4a4d8108 7707+ parent = dget_parent(dentry);
1facf9fc 7708+ btail = au_dbtaildir(parent);
7709+ for (bindex = bstart; bindex <= btail; bindex++) {
7710+ struct dentry *h_parent, *h_dentry;
7711+ struct inode *h_inode, *h_dir;
7712+
7713+ h_dentry = au_h_dptr(dentry, bindex);
7714+ if (h_dentry) {
7715+ if (h_dentry->d_inode)
7716+ npositive++;
7717+ if (type != S_IFDIR)
7718+ break;
7719+ continue;
7720+ }
7721+ h_parent = au_h_dptr(parent, bindex);
7f2ca4b1 7722+ if (!h_parent || !d_is_dir(h_parent))
1facf9fc 7723+ continue;
7724+
7f2ca4b1 7725+ h_dir = h_parent->d_inode;
1facf9fc 7726+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7727+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &whname,
7728+ &args);
7729+ mutex_unlock(&h_dir->i_mutex);
7730+ err = PTR_ERR(h_dentry);
7731+ if (IS_ERR(h_dentry))
4a4d8108 7732+ goto out_parent;
7f2ca4b1
JR
7733+ if (h_dentry)
7734+ au_fclr_lkup(args.flags, ALLOW_NEG);
076b876e
AM
7735+ if (dirperm1)
7736+ au_fset_lkup(args.flags, IGNORE_PERM);
1facf9fc 7737+
7f2ca4b1 7738+ if (au_dbwh(dentry) == bindex)
1facf9fc 7739+ break;
7740+ if (!h_dentry)
7741+ continue;
7742+ h_inode = h_dentry->d_inode;
7743+ if (!h_inode)
7744+ continue;
7745+ npositive++;
7746+ if (!args.type)
7747+ args.type = h_inode->i_mode & S_IFMT;
7748+ if (args.type != S_IFDIR)
7749+ break;
7750+ else if (isdir) {
7751+ /* the type of lower may be different */
7752+ bdiropq = au_dbdiropq(dentry);
7753+ if (bdiropq >= 0 && bdiropq <= bindex)
7754+ break;
7755+ }
7756+ }
7757+
7758+ if (npositive) {
7759+ AuLabel(positive);
7760+ au_update_dbstart(dentry);
7761+ }
7762+ err = npositive;
076b876e 7763+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
027c5e7a 7764+ && au_dbstart(dentry) < 0)) {
1facf9fc 7765+ err = -EIO;
523b37e3
AM
7766+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
7767+ dentry, err);
027c5e7a 7768+ }
1facf9fc 7769+
4f0767ce 7770+out_parent:
4a4d8108 7771+ dput(parent);
1facf9fc 7772+ kfree(whname.name);
4f0767ce 7773+out:
1facf9fc 7774+ return err;
7775+}
7776+
076b876e 7777+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent)
1facf9fc 7778+{
7779+ struct dentry *dentry;
7780+ int wkq_err;
7781+
7782+ if (!au_test_h_perm_sio(parent->d_inode, MAY_EXEC))
b4510431 7783+ dentry = vfsub_lkup_one(name, parent);
1facf9fc 7784+ else {
b4510431
AM
7785+ struct vfsub_lkup_one_args args = {
7786+ .errp = &dentry,
7787+ .name = name,
7788+ .parent = parent
1facf9fc 7789+ };
7790+
b4510431 7791+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
1facf9fc 7792+ if (unlikely(wkq_err))
7793+ dentry = ERR_PTR(wkq_err);
7794+ }
7795+
7796+ return dentry;
7797+}
7798+
7799+/*
7800+ * lookup @dentry on @bindex which should be negative.
7801+ */
86dc4139 7802+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
1facf9fc 7803+{
7804+ int err;
7805+ struct dentry *parent, *h_parent, *h_dentry;
86dc4139 7806+ struct au_branch *br;
1facf9fc 7807+
1facf9fc 7808+ parent = dget_parent(dentry);
7809+ h_parent = au_h_dptr(parent, bindex);
86dc4139
AM
7810+ br = au_sbr(dentry->d_sb, bindex);
7811+ if (wh)
7812+ h_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
7813+ else
076b876e 7814+ h_dentry = au_sio_lkup_one(&dentry->d_name, h_parent);
1facf9fc 7815+ err = PTR_ERR(h_dentry);
7816+ if (IS_ERR(h_dentry))
7817+ goto out;
7818+ if (unlikely(h_dentry->d_inode)) {
7819+ err = -EIO;
523b37e3 7820+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
1facf9fc 7821+ dput(h_dentry);
7822+ goto out;
7823+ }
7824+
4a4d8108 7825+ err = 0;
1facf9fc 7826+ if (bindex < au_dbstart(dentry))
7827+ au_set_dbstart(dentry, bindex);
7828+ if (au_dbend(dentry) < bindex)
7829+ au_set_dbend(dentry, bindex);
7830+ au_set_h_dptr(dentry, bindex, h_dentry);
1facf9fc 7831+
4f0767ce 7832+out:
1facf9fc 7833+ dput(parent);
7834+ return err;
7835+}
7836+
7837+/* ---------------------------------------------------------------------- */
7838+
7839+/* subset of struct inode */
7840+struct au_iattr {
7841+ unsigned long i_ino;
7842+ /* unsigned int i_nlink; */
0c3ec466
AM
7843+ kuid_t i_uid;
7844+ kgid_t i_gid;
1facf9fc 7845+ u64 i_version;
7846+/*
7847+ loff_t i_size;
7848+ blkcnt_t i_blocks;
7849+*/
7850+ umode_t i_mode;
7851+};
7852+
7853+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
7854+{
7855+ ia->i_ino = h_inode->i_ino;
7856+ /* ia->i_nlink = h_inode->i_nlink; */
7857+ ia->i_uid = h_inode->i_uid;
7858+ ia->i_gid = h_inode->i_gid;
7859+ ia->i_version = h_inode->i_version;
7860+/*
7861+ ia->i_size = h_inode->i_size;
7862+ ia->i_blocks = h_inode->i_blocks;
7863+*/
7864+ ia->i_mode = (h_inode->i_mode & S_IFMT);
7865+}
7866+
7867+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
7868+{
7869+ return ia->i_ino != h_inode->i_ino
7870+ /* || ia->i_nlink != h_inode->i_nlink */
0c3ec466 7871+ || !uid_eq(ia->i_uid, h_inode->i_uid)
2dfbb274 7872+ || !gid_eq(ia->i_gid, h_inode->i_gid)
1facf9fc 7873+ || ia->i_version != h_inode->i_version
7874+/*
7875+ || ia->i_size != h_inode->i_size
7876+ || ia->i_blocks != h_inode->i_blocks
7877+*/
7878+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
7879+}
7880+
7881+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
7882+ struct au_branch *br)
7883+{
7884+ int err;
7885+ struct au_iattr ia;
7886+ struct inode *h_inode;
7887+ struct dentry *h_d;
7888+ struct super_block *h_sb;
7889+
7890+ err = 0;
7891+ memset(&ia, -1, sizeof(ia));
7892+ h_sb = h_dentry->d_sb;
7893+ h_inode = h_dentry->d_inode;
7894+ if (h_inode)
7895+ au_iattr_save(&ia, h_inode);
7896+ else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
7897+ /* nfs d_revalidate may return 0 for negative dentry */
7898+ /* fuse d_revalidate always return 0 for negative dentry */
7899+ goto out;
7900+
7901+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
b4510431 7902+ h_d = vfsub_lkup_one(&h_dentry->d_name, h_parent);
1facf9fc 7903+ err = PTR_ERR(h_d);
7904+ if (IS_ERR(h_d))
7905+ goto out;
7906+
7907+ err = 0;
7908+ if (unlikely(h_d != h_dentry
7909+ || h_d->d_inode != h_inode
7910+ || (h_inode && au_iattr_test(&ia, h_inode))))
7911+ err = au_busy_or_stale();
7912+ dput(h_d);
7913+
4f0767ce 7914+out:
1facf9fc 7915+ AuTraceErr(err);
7916+ return err;
7917+}
7918+
7919+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
7920+ struct dentry *h_parent, struct au_branch *br)
7921+{
7922+ int err;
7923+
7924+ err = 0;
027c5e7a
AM
7925+ if (udba == AuOpt_UDBA_REVAL
7926+ && !au_test_fs_remote(h_dentry->d_sb)) {
1facf9fc 7927+ IMustLock(h_dir);
7928+ err = (h_dentry->d_parent->d_inode != h_dir);
027c5e7a 7929+ } else if (udba != AuOpt_UDBA_NONE)
1facf9fc 7930+ err = au_h_verify_dentry(h_dentry, h_parent, br);
7931+
7932+ return err;
7933+}
7934+
7935+/* ---------------------------------------------------------------------- */
7936+
027c5e7a 7937+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
1facf9fc 7938+{
027c5e7a 7939+ int err;
1facf9fc 7940+ aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
027c5e7a
AM
7941+ struct au_hdentry tmp, *p, *q;
7942+ struct au_dinfo *dinfo;
7943+ struct super_block *sb;
1facf9fc 7944+
027c5e7a 7945+ DiMustWriteLock(dentry);
1308ab2a 7946+
027c5e7a
AM
7947+ sb = dentry->d_sb;
7948+ dinfo = au_di(dentry);
1facf9fc 7949+ bend = dinfo->di_bend;
7950+ bwh = dinfo->di_bwh;
7951+ bdiropq = dinfo->di_bdiropq;
027c5e7a 7952+ p = dinfo->di_hdentry + dinfo->di_bstart;
1facf9fc 7953+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
027c5e7a 7954+ if (!p->hd_dentry)
1facf9fc 7955+ continue;
7956+
027c5e7a
AM
7957+ new_bindex = au_br_index(sb, p->hd_id);
7958+ if (new_bindex == bindex)
1facf9fc 7959+ continue;
1facf9fc 7960+
1facf9fc 7961+ if (dinfo->di_bwh == bindex)
7962+ bwh = new_bindex;
7963+ if (dinfo->di_bdiropq == bindex)
7964+ bdiropq = new_bindex;
7965+ if (new_bindex < 0) {
7966+ au_hdput(p);
7967+ p->hd_dentry = NULL;
7968+ continue;
7969+ }
7970+
7971+ /* swap two lower dentries, and loop again */
7972+ q = dinfo->di_hdentry + new_bindex;
7973+ tmp = *q;
7974+ *q = *p;
7975+ *p = tmp;
7976+ if (tmp.hd_dentry) {
7977+ bindex--;
7978+ p--;
7979+ }
7980+ }
7981+
1facf9fc 7982+ dinfo->di_bwh = -1;
7983+ if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
7984+ dinfo->di_bwh = bwh;
7985+
7986+ dinfo->di_bdiropq = -1;
7987+ if (bdiropq >= 0
7988+ && bdiropq <= au_sbend(sb)
7989+ && au_sbr_whable(sb, bdiropq))
7990+ dinfo->di_bdiropq = bdiropq;
7991+
027c5e7a
AM
7992+ err = -EIO;
7993+ dinfo->di_bstart = -1;
7994+ dinfo->di_bend = -1;
1facf9fc 7995+ bend = au_dbend(parent);
7996+ p = dinfo->di_hdentry;
7997+ for (bindex = 0; bindex <= bend; bindex++, p++)
7998+ if (p->hd_dentry) {
7999+ dinfo->di_bstart = bindex;
8000+ break;
8001+ }
8002+
027c5e7a
AM
8003+ if (dinfo->di_bstart >= 0) {
8004+ p = dinfo->di_hdentry + bend;
8005+ for (bindex = bend; bindex >= 0; bindex--, p--)
8006+ if (p->hd_dentry) {
8007+ dinfo->di_bend = bindex;
8008+ err = 0;
8009+ break;
8010+ }
8011+ }
8012+
8013+ return err;
1facf9fc 8014+}
8015+
027c5e7a 8016+static void au_do_hide(struct dentry *dentry)
1facf9fc 8017+{
027c5e7a 8018+ struct inode *inode;
1facf9fc 8019+
027c5e7a
AM
8020+ inode = dentry->d_inode;
8021+ if (inode) {
8022+ if (!S_ISDIR(inode->i_mode)) {
8023+ if (inode->i_nlink && !d_unhashed(dentry))
8024+ drop_nlink(inode);
8025+ } else {
8026+ clear_nlink(inode);
8027+ /* stop next lookup */
8028+ inode->i_flags |= S_DEAD;
8029+ }
8030+ smp_mb(); /* necessary? */
8031+ }
8032+ d_drop(dentry);
8033+}
1308ab2a 8034+
027c5e7a
AM
8035+static int au_hide_children(struct dentry *parent)
8036+{
8037+ int err, i, j, ndentry;
8038+ struct au_dcsub_pages dpages;
8039+ struct au_dpage *dpage;
8040+ struct dentry *dentry;
1facf9fc 8041+
027c5e7a 8042+ err = au_dpages_init(&dpages, GFP_NOFS);
1facf9fc 8043+ if (unlikely(err))
8044+ goto out;
027c5e7a
AM
8045+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
8046+ if (unlikely(err))
8047+ goto out_dpages;
1facf9fc 8048+
027c5e7a
AM
8049+ /* in reverse order */
8050+ for (i = dpages.ndpage - 1; i >= 0; i--) {
8051+ dpage = dpages.dpages + i;
8052+ ndentry = dpage->ndentry;
8053+ for (j = ndentry - 1; j >= 0; j--) {
8054+ dentry = dpage->dentries[j];
8055+ if (dentry != parent)
8056+ au_do_hide(dentry);
8057+ }
8058+ }
1facf9fc 8059+
027c5e7a
AM
8060+out_dpages:
8061+ au_dpages_free(&dpages);
4f0767ce 8062+out:
027c5e7a 8063+ return err;
1facf9fc 8064+}
8065+
027c5e7a 8066+static void au_hide(struct dentry *dentry)
1facf9fc 8067+{
027c5e7a 8068+ int err;
1facf9fc 8069+
027c5e7a 8070+ AuDbgDentry(dentry);
7f2ca4b1 8071+ if (d_is_dir(dentry)) {
027c5e7a
AM
8072+ /* shrink_dcache_parent(dentry); */
8073+ err = au_hide_children(dentry);
8074+ if (unlikely(err))
523b37e3
AM
8075+ AuIOErr("%pd, failed hiding children, ignored %d\n",
8076+ dentry, err);
027c5e7a
AM
8077+ }
8078+ au_do_hide(dentry);
8079+}
1facf9fc 8080+
027c5e7a
AM
8081+/*
8082+ * By adding a dirty branch, a cached dentry may be affected in various ways.
8083+ *
8084+ * a dirty branch is added
8085+ * - on the top of layers
8086+ * - in the middle of layers
8087+ * - to the bottom of layers
8088+ *
8089+ * on the added branch there exists
8090+ * - a whiteout
8091+ * - a diropq
8092+ * - a same named entry
8093+ * + exist
8094+ * * negative --> positive
8095+ * * positive --> positive
8096+ * - type is unchanged
8097+ * - type is changed
8098+ * + doesn't exist
8099+ * * negative --> negative
8100+ * * positive --> negative (rejected by au_br_del() for non-dir case)
8101+ * - none
8102+ */
8103+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8104+ struct au_dinfo *tmp)
8105+{
8106+ int err;
8107+ aufs_bindex_t bindex, bend;
8108+ struct {
8109+ struct dentry *dentry;
8110+ struct inode *inode;
8111+ mode_t mode;
7f2ca4b1
JR
8112+ } orig_h, tmp_h = {
8113+ .dentry = NULL
8114+ };
027c5e7a
AM
8115+ struct au_hdentry *hd;
8116+ struct inode *inode, *h_inode;
8117+ struct dentry *h_dentry;
8118+
8119+ err = 0;
8120+ AuDebugOn(dinfo->di_bstart < 0);
8121+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
8122+ orig_h.inode = orig_h.dentry->d_inode;
8123+ orig_h.mode = 0;
8124+ if (orig_h.inode)
8125+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
027c5e7a
AM
8126+ if (tmp->di_bstart >= 0) {
8127+ tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
8128+ tmp_h.inode = tmp_h.dentry->d_inode;
8129+ if (tmp_h.inode)
8130+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
8131+ }
8132+
8133+ inode = dentry->d_inode;
8134+ if (!orig_h.inode) {
8135+ AuDbg("nagative originally\n");
8136+ if (inode) {
8137+ au_hide(dentry);
8138+ goto out;
8139+ }
8140+ AuDebugOn(inode);
8141+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8142+ AuDebugOn(dinfo->di_bdiropq != -1);
8143+
8144+ if (!tmp_h.inode) {
8145+ AuDbg("negative --> negative\n");
8146+ /* should have only one negative lower */
8147+ if (tmp->di_bstart >= 0
8148+ && tmp->di_bstart < dinfo->di_bstart) {
8149+ AuDebugOn(tmp->di_bstart != tmp->di_bend);
8150+ AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
8151+ au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
8152+ au_di_cp(dinfo, tmp);
8153+ hd = tmp->di_hdentry + tmp->di_bstart;
8154+ au_set_h_dptr(dentry, tmp->di_bstart,
8155+ dget(hd->hd_dentry));
8156+ }
8157+ au_dbg_verify_dinode(dentry);
8158+ } else {
8159+ AuDbg("negative --> positive\n");
8160+ /*
8161+ * similar to the behaviour of creating with bypassing
8162+ * aufs.
8163+ * unhash it in order to force an error in the
8164+ * succeeding create operation.
8165+ * we should not set S_DEAD here.
8166+ */
8167+ d_drop(dentry);
8168+ /* au_di_swap(tmp, dinfo); */
8169+ au_dbg_verify_dinode(dentry);
8170+ }
8171+ } else {
8172+ AuDbg("positive originally\n");
8173+ /* inode may be NULL */
8174+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8175+ if (!tmp_h.inode) {
8176+ AuDbg("positive --> negative\n");
8177+ /* or bypassing aufs */
8178+ au_hide(dentry);
8179+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
8180+ dinfo->di_bwh = tmp->di_bwh;
8181+ if (inode)
8182+ err = au_refresh_hinode_self(inode);
8183+ au_dbg_verify_dinode(dentry);
8184+ } else if (orig_h.mode == tmp_h.mode) {
8185+ AuDbg("positive --> positive, same type\n");
8186+ if (!S_ISDIR(orig_h.mode)
8187+ && dinfo->di_bstart > tmp->di_bstart) {
8188+ /*
8189+ * similar to the behaviour of removing and
8190+ * creating.
8191+ */
8192+ au_hide(dentry);
8193+ if (inode)
8194+ err = au_refresh_hinode_self(inode);
8195+ au_dbg_verify_dinode(dentry);
8196+ } else {
8197+ /* fill empty slots */
8198+ if (dinfo->di_bstart > tmp->di_bstart)
8199+ dinfo->di_bstart = tmp->di_bstart;
8200+ if (dinfo->di_bend < tmp->di_bend)
8201+ dinfo->di_bend = tmp->di_bend;
8202+ dinfo->di_bwh = tmp->di_bwh;
8203+ dinfo->di_bdiropq = tmp->di_bdiropq;
8204+ hd = tmp->di_hdentry;
8205+ bend = dinfo->di_bend;
8206+ for (bindex = tmp->di_bstart; bindex <= bend;
8207+ bindex++) {
8208+ if (au_h_dptr(dentry, bindex))
8209+ continue;
8210+ h_dentry = hd[bindex].hd_dentry;
8211+ if (!h_dentry)
8212+ continue;
8213+ h_inode = h_dentry->d_inode;
8214+ AuDebugOn(!h_inode);
8215+ AuDebugOn(orig_h.mode
8216+ != (h_inode->i_mode
8217+ & S_IFMT));
8218+ au_set_h_dptr(dentry, bindex,
8219+ dget(h_dentry));
8220+ }
8221+ err = au_refresh_hinode(inode, dentry);
8222+ au_dbg_verify_dinode(dentry);
8223+ }
8224+ } else {
8225+ AuDbg("positive --> positive, different type\n");
8226+ /* similar to the behaviour of removing and creating */
8227+ au_hide(dentry);
8228+ if (inode)
8229+ err = au_refresh_hinode_self(inode);
8230+ au_dbg_verify_dinode(dentry);
8231+ }
8232+ }
8233+
8234+out:
8235+ return err;
8236+}
8237+
7f2ca4b1
JR
8238+void au_refresh_dop(struct dentry *dentry, int force_reval)
8239+{
8240+ const struct dentry_operations *dop
8241+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8242+ static const unsigned int mask
8243+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8244+
8245+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8246+
8247+ if (dentry->d_op == dop)
8248+ return;
8249+
8250+ AuDbg("%pd\n", dentry);
8251+ spin_lock(&dentry->d_lock);
8252+ if (dop == &aufs_dop)
8253+ dentry->d_flags |= mask;
8254+ else
8255+ dentry->d_flags &= ~mask;
8256+ dentry->d_op = dop;
8257+ spin_unlock(&dentry->d_lock);
8258+}
8259+
027c5e7a
AM
8260+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8261+{
8262+ int err, ebrange;
8263+ unsigned int sigen;
8264+ struct au_dinfo *dinfo, *tmp;
8265+ struct super_block *sb;
8266+ struct inode *inode;
8267+
8268+ DiMustWriteLock(dentry);
8269+ AuDebugOn(IS_ROOT(dentry));
8270+ AuDebugOn(!parent->d_inode);
8271+
8272+ sb = dentry->d_sb;
8273+ inode = dentry->d_inode;
8274+ sigen = au_sigen(sb);
8275+ err = au_digen_test(parent, sigen);
8276+ if (unlikely(err))
8277+ goto out;
8278+
8279+ dinfo = au_di(dentry);
8280+ err = au_di_realloc(dinfo, au_sbend(sb) + 1);
8281+ if (unlikely(err))
8282+ goto out;
8283+ ebrange = au_dbrange_test(dentry);
8284+ if (!ebrange)
8285+ ebrange = au_do_refresh_hdentry(dentry, parent);
8286+
38d290e6 8287+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
027c5e7a
AM
8288+ AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
8289+ if (inode)
8290+ err = au_refresh_hinode_self(inode);
8291+ au_dbg_verify_dinode(dentry);
8292+ if (!err)
8293+ goto out_dgen; /* success */
8294+ goto out;
8295+ }
8296+
8297+ /* temporary dinfo */
8298+ AuDbgDentry(dentry);
8299+ err = -ENOMEM;
8300+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8301+ if (unlikely(!tmp))
8302+ goto out;
8303+ au_di_swap(tmp, dinfo);
8304+ /* returns the number of positive dentries */
8305+ /*
8306+ * if current working dir is removed, it returns an error.
8307+ * but the dentry is legal.
8308+ */
537831f9 8309+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
027c5e7a
AM
8310+ AuDbgDentry(dentry);
8311+ au_di_swap(tmp, dinfo);
8312+ if (err == -ENOENT)
8313+ err = 0;
8314+ if (err >= 0) {
8315+ /* compare/refresh by dinfo */
8316+ AuDbgDentry(dentry);
8317+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8318+ au_dbg_verify_dinode(dentry);
8319+ AuTraceErr(err);
8320+ }
8321+ au_rw_write_unlock(&tmp->di_rwsem);
8322+ au_di_free(tmp);
8323+ if (unlikely(err))
8324+ goto out;
8325+
8326+out_dgen:
8327+ au_update_digen(dentry);
8328+out:
8329+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
523b37e3 8330+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
027c5e7a
AM
8331+ AuDbgDentry(dentry);
8332+ }
8333+ AuTraceErr(err);
8334+ return err;
8335+}
8336+
b4510431
AM
8337+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8338+ struct dentry *dentry, aufs_bindex_t bindex)
027c5e7a
AM
8339+{
8340+ int err, valid;
027c5e7a
AM
8341+
8342+ err = 0;
8343+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8344+ goto out;
027c5e7a
AM
8345+
8346+ AuDbg("b%d\n", bindex);
b4510431
AM
8347+ /*
8348+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8349+ * due to whiteout and branch permission.
8350+ */
8351+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8352+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8353+ /* it may return tri-state */
8354+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
1facf9fc 8355+
8356+ if (unlikely(valid < 0))
8357+ err = valid;
8358+ else if (!valid)
8359+ err = -EINVAL;
8360+
4f0767ce 8361+out:
1facf9fc 8362+ AuTraceErr(err);
8363+ return err;
8364+}
8365+
8366+/* todo: remove this */
8367+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
b4510431 8368+ unsigned int flags, int do_udba)
1facf9fc 8369+{
8370+ int err;
8371+ umode_t mode, h_mode;
8372+ aufs_bindex_t bindex, btail, bstart, ibs, ibe;
38d290e6 8373+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
4a4d8108 8374+ struct inode *h_inode, *h_cached_inode;
1facf9fc 8375+ struct dentry *h_dentry;
8376+ struct qstr *name, *h_name;
8377+
8378+ err = 0;
8379+ plus = 0;
8380+ mode = 0;
1facf9fc 8381+ ibs = -1;
8382+ ibe = -1;
8383+ unhashed = !!d_unhashed(dentry);
8384+ is_root = !!IS_ROOT(dentry);
8385+ name = &dentry->d_name;
38d290e6 8386+ tmpfile = au_di(dentry)->di_tmpfile;
1facf9fc 8387+
8388+ /*
7f207e10
AM
8389+ * Theoretically, REVAL test should be unnecessary in case of
8390+ * {FS,I}NOTIFY.
8391+ * But {fs,i}notify doesn't fire some necessary events,
1facf9fc 8392+ * IN_ATTRIB for atime/nlink/pageio
1facf9fc 8393+ * Let's do REVAL test too.
8394+ */
8395+ if (do_udba && inode) {
8396+ mode = (inode->i_mode & S_IFMT);
8397+ plus = (inode->i_nlink > 0);
1facf9fc 8398+ ibs = au_ibstart(inode);
8399+ ibe = au_ibend(inode);
8400+ }
8401+
8402+ bstart = au_dbstart(dentry);
8403+ btail = bstart;
8404+ if (inode && S_ISDIR(inode->i_mode))
8405+ btail = au_dbtaildir(dentry);
8406+ for (bindex = bstart; bindex <= btail; bindex++) {
8407+ h_dentry = au_h_dptr(dentry, bindex);
8408+ if (!h_dentry)
8409+ continue;
8410+
523b37e3
AM
8411+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8412+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
027c5e7a 8413+ spin_lock(&h_dentry->d_lock);
1facf9fc 8414+ h_name = &h_dentry->d_name;
8415+ if (unlikely(do_udba
8416+ && !is_root
523b37e3
AM
8417+ && ((!h_nfs
8418+ && (unhashed != !!d_unhashed(h_dentry)
38d290e6
JR
8419+ || (!tmpfile
8420+ && !au_qstreq(name, h_name))
8421+ ))
523b37e3
AM
8422+ || (h_nfs
8423+ && !(flags & LOOKUP_OPEN)
8424+ && (h_dentry->d_flags
8425+ & DCACHE_NFSFS_RENAMED)))
1facf9fc 8426+ )) {
38d290e6
JR
8427+ int h_unhashed;
8428+
8429+ h_unhashed = d_unhashed(h_dentry);
027c5e7a 8430+ spin_unlock(&h_dentry->d_lock);
38d290e6
JR
8431+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8432+ unhashed, h_unhashed, dentry, h_dentry);
1facf9fc 8433+ goto err;
8434+ }
027c5e7a 8435+ spin_unlock(&h_dentry->d_lock);
1facf9fc 8436+
b4510431 8437+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
1facf9fc 8438+ if (unlikely(err))
8439+ /* do not goto err, to keep the errno */
8440+ break;
8441+
8442+ /* todo: plink too? */
8443+ if (!do_udba)
8444+ continue;
8445+
8446+ /* UDBA tests */
8447+ h_inode = h_dentry->d_inode;
8448+ if (unlikely(!!inode != !!h_inode))
8449+ goto err;
8450+
8451+ h_plus = plus;
8452+ h_mode = mode;
8453+ h_cached_inode = h_inode;
8454+ if (h_inode) {
8455+ h_mode = (h_inode->i_mode & S_IFMT);
8456+ h_plus = (h_inode->i_nlink > 0);
8457+ }
8458+ if (inode && ibs <= bindex && bindex <= ibe)
8459+ h_cached_inode = au_h_iptr(inode, bindex);
8460+
523b37e3 8461+ if (!h_nfs) {
38d290e6 8462+ if (unlikely(plus != h_plus && !tmpfile))
523b37e3
AM
8463+ goto err;
8464+ } else {
8465+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8466+ && !is_root
8467+ && !IS_ROOT(h_dentry)
8468+ && unhashed != d_unhashed(h_dentry)))
8469+ goto err;
8470+ }
8471+ if (unlikely(mode != h_mode
1facf9fc 8472+ || h_cached_inode != h_inode))
8473+ goto err;
8474+ continue;
8475+
f6b6e03d 8476+err:
1facf9fc 8477+ err = -EINVAL;
8478+ break;
8479+ }
8480+
523b37e3 8481+ AuTraceErr(err);
1facf9fc 8482+ return err;
8483+}
8484+
027c5e7a 8485+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
1facf9fc 8486+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8487+{
8488+ int err;
8489+ struct dentry *parent;
1facf9fc 8490+
027c5e7a 8491+ if (!au_digen_test(dentry, sigen))
1facf9fc 8492+ return 0;
8493+
8494+ parent = dget_parent(dentry);
8495+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8496+ AuDebugOn(au_digen_test(parent, sigen));
1facf9fc 8497+ au_dbg_verify_gen(parent, sigen);
027c5e7a 8498+ err = au_refresh_dentry(dentry, parent);
1facf9fc 8499+ di_read_unlock(parent, AuLock_IR);
8500+ dput(parent);
027c5e7a 8501+ AuTraceErr(err);
1facf9fc 8502+ return err;
8503+}
8504+
8505+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8506+{
8507+ int err;
8508+ struct dentry *d, *parent;
8509+ struct inode *inode;
8510+
027c5e7a 8511+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
1facf9fc 8512+ return simple_reval_dpath(dentry, sigen);
8513+
8514+ /* slow loop, keep it simple and stupid */
8515+ /* cf: au_cpup_dirs() */
8516+ err = 0;
8517+ parent = NULL;
027c5e7a 8518+ while (au_digen_test(dentry, sigen)) {
1facf9fc 8519+ d = dentry;
8520+ while (1) {
8521+ dput(parent);
8522+ parent = dget_parent(d);
027c5e7a 8523+ if (!au_digen_test(parent, sigen))
1facf9fc 8524+ break;
8525+ d = parent;
8526+ }
8527+
8528+ inode = d->d_inode;
8529+ if (d != dentry)
027c5e7a 8530+ di_write_lock_child2(d);
1facf9fc 8531+
8532+ /* someone might update our dentry while we were sleeping */
027c5e7a
AM
8533+ if (au_digen_test(d, sigen)) {
8534+ /*
8535+ * todo: consolidate with simple_reval_dpath(),
8536+ * do_refresh() and au_reval_for_attr().
8537+ */
1facf9fc 8538+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 8539+ err = au_refresh_dentry(d, parent);
1facf9fc 8540+ di_read_unlock(parent, AuLock_IR);
8541+ }
8542+
8543+ if (d != dentry)
8544+ di_write_unlock(d);
8545+ dput(parent);
8546+ if (unlikely(err))
8547+ break;
8548+ }
8549+
8550+ return err;
8551+}
8552+
8553+/*
8554+ * if valid returns 1, otherwise 0.
8555+ */
b4510431 8556+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
1facf9fc 8557+{
8558+ int valid, err;
8559+ unsigned int sigen;
8560+ unsigned char do_udba;
8561+ struct super_block *sb;
8562+ struct inode *inode;
8563+
027c5e7a 8564+ /* todo: support rcu-walk? */
b4510431 8565+ if (flags & LOOKUP_RCU)
027c5e7a
AM
8566+ return -ECHILD;
8567+
8568+ valid = 0;
8569+ if (unlikely(!au_di(dentry)))
8570+ goto out;
8571+
e49829fe 8572+ valid = 1;
1facf9fc 8573+ sb = dentry->d_sb;
e49829fe
JR
8574+ /*
8575+ * todo: very ugly
8576+ * i_mutex of parent dir may be held,
8577+ * but we should not return 'invalid' due to busy.
8578+ */
8579+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
8580+ if (unlikely(err)) {
8581+ valid = err;
027c5e7a 8582+ AuTraceErr(err);
e49829fe
JR
8583+ goto out;
8584+ }
c1595e42
JR
8585+ inode = dentry->d_inode;
8586+ if (unlikely(inode && is_bad_inode(inode))) {
8587+ err = -EINVAL;
8588+ AuTraceErr(err);
8589+ goto out_dgrade;
8590+ }
027c5e7a
AM
8591+ if (unlikely(au_dbrange_test(dentry))) {
8592+ err = -EINVAL;
8593+ AuTraceErr(err);
8594+ goto out_dgrade;
1facf9fc 8595+ }
027c5e7a
AM
8596+
8597+ sigen = au_sigen(sb);
8598+ if (au_digen_test(dentry, sigen)) {
1facf9fc 8599+ AuDebugOn(IS_ROOT(dentry));
027c5e7a
AM
8600+ err = au_reval_dpath(dentry, sigen);
8601+ if (unlikely(err)) {
8602+ AuTraceErr(err);
1facf9fc 8603+ goto out_dgrade;
027c5e7a 8604+ }
1facf9fc 8605+ }
8606+ di_downgrade_lock(dentry, AuLock_IR);
8607+
1facf9fc 8608+ err = -EINVAL;
c1595e42 8609+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
523b37e3 8610+ && inode
38d290e6 8611+ && !(inode->i_state && I_LINKABLE)
7f2ca4b1
JR
8612+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
8613+ AuTraceErr(err);
027c5e7a 8614+ goto out_inval;
7f2ca4b1 8615+ }
027c5e7a 8616+
1facf9fc 8617+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
8618+ if (do_udba && inode) {
8619+ aufs_bindex_t bstart = au_ibstart(inode);
027c5e7a 8620+ struct inode *h_inode;
1facf9fc 8621+
027c5e7a
AM
8622+ if (bstart >= 0) {
8623+ h_inode = au_h_iptr(inode, bstart);
7f2ca4b1
JR
8624+ if (h_inode && au_test_higen(inode, h_inode)) {
8625+ AuTraceErr(err);
027c5e7a 8626+ goto out_inval;
7f2ca4b1 8627+ }
027c5e7a 8628+ }
1facf9fc 8629+ }
8630+
b4510431 8631+ err = h_d_revalidate(dentry, inode, flags, do_udba);
027c5e7a 8632+ if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
1facf9fc 8633+ err = -EIO;
523b37e3
AM
8634+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
8635+ dentry, err);
027c5e7a 8636+ }
e49829fe 8637+ goto out_inval;
1facf9fc 8638+
4f0767ce 8639+out_dgrade:
1facf9fc 8640+ di_downgrade_lock(dentry, AuLock_IR);
e49829fe 8641+out_inval:
1facf9fc 8642+ aufs_read_unlock(dentry, AuLock_IR);
8643+ AuTraceErr(err);
8644+ valid = !err;
e49829fe 8645+out:
027c5e7a 8646+ if (!valid) {
523b37e3 8647+ AuDbg("%pd invalid, %d\n", dentry, valid);
027c5e7a
AM
8648+ d_drop(dentry);
8649+ }
1facf9fc 8650+ return valid;
8651+}
8652+
8653+static void aufs_d_release(struct dentry *dentry)
8654+{
027c5e7a 8655+ if (au_di(dentry)) {
4a4d8108
AM
8656+ au_di_fin(dentry);
8657+ au_hn_di_reinit(dentry);
1facf9fc 8658+ }
1facf9fc 8659+}
8660+
4a4d8108 8661+const struct dentry_operations aufs_dop = {
c06a8ce3
AM
8662+ .d_revalidate = aufs_d_revalidate,
8663+ .d_weak_revalidate = aufs_d_revalidate,
8664+ .d_release = aufs_d_release
1facf9fc 8665+};
7f2ca4b1
JR
8666+
8667+/* aufs_dop without d_revalidate */
8668+const struct dentry_operations aufs_dop_noreval = {
8669+ .d_release = aufs_d_release
8670+};
7f207e10
AM
8671diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
8672--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
8673+++ linux/fs/aufs/dentry.h 2016-02-28 11:27:01.277245613 +0100
8674@@ -0,0 +1,234 @@
1facf9fc 8675+/*
7f2ca4b1 8676+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 8677+ *
8678+ * This program, aufs is free software; you can redistribute it and/or modify
8679+ * it under the terms of the GNU General Public License as published by
8680+ * the Free Software Foundation; either version 2 of the License, or
8681+ * (at your option) any later version.
dece6358
AM
8682+ *
8683+ * This program is distributed in the hope that it will be useful,
8684+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8685+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8686+ * GNU General Public License for more details.
8687+ *
8688+ * You should have received a copy of the GNU General Public License
523b37e3 8689+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8690+ */
8691+
8692+/*
8693+ * lookup and dentry operations
8694+ */
8695+
8696+#ifndef __AUFS_DENTRY_H__
8697+#define __AUFS_DENTRY_H__
8698+
8699+#ifdef __KERNEL__
8700+
dece6358 8701+#include <linux/dcache.h>
1facf9fc 8702+#include "rwsem.h"
8703+
1facf9fc 8704+struct au_hdentry {
8705+ struct dentry *hd_dentry;
027c5e7a 8706+ aufs_bindex_t hd_id;
1facf9fc 8707+};
8708+
8709+struct au_dinfo {
8710+ atomic_t di_generation;
8711+
dece6358 8712+ struct au_rwsem di_rwsem;
1facf9fc 8713+ aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
38d290e6 8714+ unsigned char di_tmpfile; /* to allow the different name */
1facf9fc 8715+ struct au_hdentry *di_hdentry;
4a4d8108 8716+} ____cacheline_aligned_in_smp;
1facf9fc 8717+
8718+/* ---------------------------------------------------------------------- */
8719+
8720+/* dentry.c */
7f2ca4b1 8721+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
1facf9fc 8722+struct au_branch;
076b876e 8723+struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
1facf9fc 8724+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8725+ struct dentry *h_parent, struct au_branch *br);
8726+
537831f9 8727+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type);
86dc4139 8728+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
027c5e7a 8729+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
1facf9fc 8730+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
7f2ca4b1 8731+void au_refresh_dop(struct dentry *dentry, int force_reval);
1facf9fc 8732+
8733+/* dinfo.c */
4a4d8108 8734+void au_di_init_once(void *_di);
027c5e7a
AM
8735+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
8736+void au_di_free(struct au_dinfo *dinfo);
8737+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
8738+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
4a4d8108
AM
8739+int au_di_init(struct dentry *dentry);
8740+void au_di_fin(struct dentry *dentry);
1facf9fc 8741+int au_di_realloc(struct au_dinfo *dinfo, int nbr);
8742+
8743+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
8744+void di_read_unlock(struct dentry *d, int flags);
8745+void di_downgrade_lock(struct dentry *d, int flags);
8746+void di_write_lock(struct dentry *d, unsigned int lsc);
8747+void di_write_unlock(struct dentry *d);
8748+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
8749+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
8750+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
8751+
8752+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
2cbb1c4b 8753+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
1facf9fc 8754+aufs_bindex_t au_dbtail(struct dentry *dentry);
8755+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
8756+
8757+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
8758+ struct dentry *h_dentry);
027c5e7a
AM
8759+int au_digen_test(struct dentry *dentry, unsigned int sigen);
8760+int au_dbrange_test(struct dentry *dentry);
1facf9fc 8761+void au_update_digen(struct dentry *dentry);
8762+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
8763+void au_update_dbstart(struct dentry *dentry);
8764+void au_update_dbend(struct dentry *dentry);
8765+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
8766+
8767+/* ---------------------------------------------------------------------- */
8768+
8769+static inline struct au_dinfo *au_di(struct dentry *dentry)
8770+{
8771+ return dentry->d_fsdata;
8772+}
8773+
8774+/* ---------------------------------------------------------------------- */
8775+
8776+/* lock subclass for dinfo */
8777+enum {
8778+ AuLsc_DI_CHILD, /* child first */
4a4d8108 8779+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
1facf9fc 8780+ AuLsc_DI_CHILD3, /* copyup dirs */
8781+ AuLsc_DI_PARENT,
8782+ AuLsc_DI_PARENT2,
027c5e7a
AM
8783+ AuLsc_DI_PARENT3,
8784+ AuLsc_DI_TMP /* temp for replacing dinfo */
1facf9fc 8785+};
8786+
8787+/*
8788+ * di_read_lock_child, di_write_lock_child,
8789+ * di_read_lock_child2, di_write_lock_child2,
8790+ * di_read_lock_child3, di_write_lock_child3,
8791+ * di_read_lock_parent, di_write_lock_parent,
8792+ * di_read_lock_parent2, di_write_lock_parent2,
8793+ * di_read_lock_parent3, di_write_lock_parent3,
8794+ */
8795+#define AuReadLockFunc(name, lsc) \
8796+static inline void di_read_lock_##name(struct dentry *d, int flags) \
8797+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
8798+
8799+#define AuWriteLockFunc(name, lsc) \
8800+static inline void di_write_lock_##name(struct dentry *d) \
8801+{ di_write_lock(d, AuLsc_DI_##lsc); }
8802+
8803+#define AuRWLockFuncs(name, lsc) \
8804+ AuReadLockFunc(name, lsc) \
8805+ AuWriteLockFunc(name, lsc)
8806+
8807+AuRWLockFuncs(child, CHILD);
8808+AuRWLockFuncs(child2, CHILD2);
8809+AuRWLockFuncs(child3, CHILD3);
8810+AuRWLockFuncs(parent, PARENT);
8811+AuRWLockFuncs(parent2, PARENT2);
8812+AuRWLockFuncs(parent3, PARENT3);
8813+
8814+#undef AuReadLockFunc
8815+#undef AuWriteLockFunc
8816+#undef AuRWLockFuncs
8817+
8818+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
dece6358
AM
8819+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
8820+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
1facf9fc 8821+
8822+/* ---------------------------------------------------------------------- */
8823+
8824+/* todo: memory barrier? */
8825+static inline unsigned int au_digen(struct dentry *d)
8826+{
8827+ return atomic_read(&au_di(d)->di_generation);
8828+}
8829+
8830+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
8831+{
8832+ hdentry->hd_dentry = NULL;
8833+}
8834+
8835+static inline void au_hdput(struct au_hdentry *hd)
8836+{
4a4d8108
AM
8837+ if (hd)
8838+ dput(hd->hd_dentry);
1facf9fc 8839+}
8840+
8841+static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
8842+{
1308ab2a 8843+ DiMustAnyLock(dentry);
1facf9fc 8844+ return au_di(dentry)->di_bstart;
8845+}
8846+
8847+static inline aufs_bindex_t au_dbend(struct dentry *dentry)
8848+{
1308ab2a 8849+ DiMustAnyLock(dentry);
1facf9fc 8850+ return au_di(dentry)->di_bend;
8851+}
8852+
8853+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
8854+{
1308ab2a 8855+ DiMustAnyLock(dentry);
1facf9fc 8856+ return au_di(dentry)->di_bwh;
8857+}
8858+
8859+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
8860+{
1308ab2a 8861+ DiMustAnyLock(dentry);
1facf9fc 8862+ return au_di(dentry)->di_bdiropq;
8863+}
8864+
8865+/* todo: hard/soft set? */
8866+static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
8867+{
1308ab2a 8868+ DiMustWriteLock(dentry);
1facf9fc 8869+ au_di(dentry)->di_bstart = bindex;
8870+}
8871+
8872+static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
8873+{
1308ab2a 8874+ DiMustWriteLock(dentry);
1facf9fc 8875+ au_di(dentry)->di_bend = bindex;
8876+}
8877+
8878+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
8879+{
1308ab2a 8880+ DiMustWriteLock(dentry);
1facf9fc 8881+ /* dbwh can be outside of bstart - bend range */
8882+ au_di(dentry)->di_bwh = bindex;
8883+}
8884+
8885+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
8886+{
1308ab2a 8887+ DiMustWriteLock(dentry);
1facf9fc 8888+ au_di(dentry)->di_bdiropq = bindex;
8889+}
8890+
8891+/* ---------------------------------------------------------------------- */
8892+
4a4d8108 8893+#ifdef CONFIG_AUFS_HNOTIFY
1facf9fc 8894+static inline void au_digen_dec(struct dentry *d)
8895+{
e49829fe 8896+ atomic_dec(&au_di(d)->di_generation);
1facf9fc 8897+}
8898+
4a4d8108 8899+static inline void au_hn_di_reinit(struct dentry *dentry)
1facf9fc 8900+{
8901+ dentry->d_fsdata = NULL;
8902+}
8903+#else
4a4d8108
AM
8904+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
8905+#endif /* CONFIG_AUFS_HNOTIFY */
1facf9fc 8906+
8907+#endif /* __KERNEL__ */
8908+#endif /* __AUFS_DENTRY_H__ */
7f207e10
AM
8909diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
8910--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 8911+++ linux/fs/aufs/dinfo.c 2016-02-28 11:27:01.277245613 +0100
38d290e6 8912@@ -0,0 +1,544 @@
1facf9fc 8913+/*
7f2ca4b1 8914+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 8915+ *
8916+ * This program, aufs is free software; you can redistribute it and/or modify
8917+ * it under the terms of the GNU General Public License as published by
8918+ * the Free Software Foundation; either version 2 of the License, or
8919+ * (at your option) any later version.
dece6358
AM
8920+ *
8921+ * This program is distributed in the hope that it will be useful,
8922+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
8923+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
8924+ * GNU General Public License for more details.
8925+ *
8926+ * You should have received a copy of the GNU General Public License
523b37e3 8927+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 8928+ */
8929+
8930+/*
8931+ * dentry private data
8932+ */
8933+
8934+#include "aufs.h"
8935+
e49829fe 8936+void au_di_init_once(void *_dinfo)
4a4d8108 8937+{
e49829fe
JR
8938+ struct au_dinfo *dinfo = _dinfo;
8939+ static struct lock_class_key aufs_di;
4a4d8108 8940+
e49829fe
JR
8941+ au_rw_init(&dinfo->di_rwsem);
8942+ au_rw_class(&dinfo->di_rwsem, &aufs_di);
4a4d8108
AM
8943+}
8944+
027c5e7a 8945+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
1facf9fc 8946+{
8947+ struct au_dinfo *dinfo;
027c5e7a 8948+ int nbr, i;
1facf9fc 8949+
8950+ dinfo = au_cache_alloc_dinfo();
8951+ if (unlikely(!dinfo))
8952+ goto out;
8953+
1facf9fc 8954+ nbr = au_sbend(sb) + 1;
8955+ if (nbr <= 0)
8956+ nbr = 1;
8957+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
027c5e7a
AM
8958+ if (dinfo->di_hdentry) {
8959+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
8960+ dinfo->di_bstart = -1;
8961+ dinfo->di_bend = -1;
8962+ dinfo->di_bwh = -1;
8963+ dinfo->di_bdiropq = -1;
38d290e6 8964+ dinfo->di_tmpfile = 0;
027c5e7a
AM
8965+ for (i = 0; i < nbr; i++)
8966+ dinfo->di_hdentry[i].hd_id = -1;
8967+ goto out;
8968+ }
1facf9fc 8969+
1facf9fc 8970+ au_cache_free_dinfo(dinfo);
027c5e7a
AM
8971+ dinfo = NULL;
8972+
4f0767ce 8973+out:
027c5e7a 8974+ return dinfo;
1facf9fc 8975+}
8976+
027c5e7a 8977+void au_di_free(struct au_dinfo *dinfo)
4a4d8108 8978+{
4a4d8108
AM
8979+ struct au_hdentry *p;
8980+ aufs_bindex_t bend, bindex;
8981+
8982+ /* dentry may not be revalidated */
027c5e7a 8983+ bindex = dinfo->di_bstart;
4a4d8108 8984+ if (bindex >= 0) {
027c5e7a
AM
8985+ bend = dinfo->di_bend;
8986+ p = dinfo->di_hdentry + bindex;
4a4d8108
AM
8987+ while (bindex++ <= bend)
8988+ au_hdput(p++);
8989+ }
027c5e7a
AM
8990+ kfree(dinfo->di_hdentry);
8991+ au_cache_free_dinfo(dinfo);
8992+}
8993+
8994+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
8995+{
8996+ struct au_hdentry *p;
8997+ aufs_bindex_t bi;
8998+
8999+ AuRwMustWriteLock(&a->di_rwsem);
9000+ AuRwMustWriteLock(&b->di_rwsem);
9001+
9002+#define DiSwap(v, name) \
9003+ do { \
9004+ v = a->di_##name; \
9005+ a->di_##name = b->di_##name; \
9006+ b->di_##name = v; \
9007+ } while (0)
9008+
9009+ DiSwap(p, hdentry);
9010+ DiSwap(bi, bstart);
9011+ DiSwap(bi, bend);
9012+ DiSwap(bi, bwh);
9013+ DiSwap(bi, bdiropq);
9014+ /* smp_mb(); */
9015+
9016+#undef DiSwap
9017+}
9018+
9019+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
9020+{
9021+ AuRwMustWriteLock(&dst->di_rwsem);
9022+ AuRwMustWriteLock(&src->di_rwsem);
9023+
9024+ dst->di_bstart = src->di_bstart;
9025+ dst->di_bend = src->di_bend;
9026+ dst->di_bwh = src->di_bwh;
9027+ dst->di_bdiropq = src->di_bdiropq;
9028+ /* smp_mb(); */
9029+}
9030+
9031+int au_di_init(struct dentry *dentry)
9032+{
9033+ int err;
9034+ struct super_block *sb;
9035+ struct au_dinfo *dinfo;
9036+
9037+ err = 0;
9038+ sb = dentry->d_sb;
9039+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
9040+ if (dinfo) {
9041+ atomic_set(&dinfo->di_generation, au_sigen(sb));
9042+ /* smp_mb(); */ /* atomic_set */
9043+ dentry->d_fsdata = dinfo;
9044+ } else
9045+ err = -ENOMEM;
9046+
9047+ return err;
9048+}
9049+
9050+void au_di_fin(struct dentry *dentry)
9051+{
9052+ struct au_dinfo *dinfo;
9053+
9054+ dinfo = au_di(dentry);
9055+ AuRwDestroy(&dinfo->di_rwsem);
9056+ au_di_free(dinfo);
4a4d8108
AM
9057+}
9058+
1facf9fc 9059+int au_di_realloc(struct au_dinfo *dinfo, int nbr)
9060+{
9061+ int err, sz;
9062+ struct au_hdentry *hdp;
9063+
1308ab2a 9064+ AuRwMustWriteLock(&dinfo->di_rwsem);
9065+
1facf9fc 9066+ err = -ENOMEM;
9067+ sz = sizeof(*hdp) * (dinfo->di_bend + 1);
9068+ if (!sz)
9069+ sz = sizeof(*hdp);
9070+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
9071+ if (hdp) {
9072+ dinfo->di_hdentry = hdp;
9073+ err = 0;
9074+ }
9075+
9076+ return err;
9077+}
9078+
9079+/* ---------------------------------------------------------------------- */
9080+
9081+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
9082+{
9083+ switch (lsc) {
9084+ case AuLsc_DI_CHILD:
9085+ ii_write_lock_child(inode);
9086+ break;
9087+ case AuLsc_DI_CHILD2:
9088+ ii_write_lock_child2(inode);
9089+ break;
9090+ case AuLsc_DI_CHILD3:
9091+ ii_write_lock_child3(inode);
9092+ break;
9093+ case AuLsc_DI_PARENT:
9094+ ii_write_lock_parent(inode);
9095+ break;
9096+ case AuLsc_DI_PARENT2:
9097+ ii_write_lock_parent2(inode);
9098+ break;
9099+ case AuLsc_DI_PARENT3:
9100+ ii_write_lock_parent3(inode);
9101+ break;
9102+ default:
9103+ BUG();
9104+ }
9105+}
9106+
9107+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9108+{
9109+ switch (lsc) {
9110+ case AuLsc_DI_CHILD:
9111+ ii_read_lock_child(inode);
9112+ break;
9113+ case AuLsc_DI_CHILD2:
9114+ ii_read_lock_child2(inode);
9115+ break;
9116+ case AuLsc_DI_CHILD3:
9117+ ii_read_lock_child3(inode);
9118+ break;
9119+ case AuLsc_DI_PARENT:
9120+ ii_read_lock_parent(inode);
9121+ break;
9122+ case AuLsc_DI_PARENT2:
9123+ ii_read_lock_parent2(inode);
9124+ break;
9125+ case AuLsc_DI_PARENT3:
9126+ ii_read_lock_parent3(inode);
9127+ break;
9128+ default:
9129+ BUG();
9130+ }
9131+}
9132+
9133+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9134+{
dece6358 9135+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 9136+ if (d->d_inode) {
9137+ if (au_ftest_lock(flags, IW))
9138+ do_ii_write_lock(d->d_inode, lsc);
9139+ else if (au_ftest_lock(flags, IR))
9140+ do_ii_read_lock(d->d_inode, lsc);
9141+ }
9142+}
9143+
9144+void di_read_unlock(struct dentry *d, int flags)
9145+{
9146+ if (d->d_inode) {
027c5e7a
AM
9147+ if (au_ftest_lock(flags, IW)) {
9148+ au_dbg_verify_dinode(d);
1facf9fc 9149+ ii_write_unlock(d->d_inode);
027c5e7a
AM
9150+ } else if (au_ftest_lock(flags, IR)) {
9151+ au_dbg_verify_dinode(d);
1facf9fc 9152+ ii_read_unlock(d->d_inode);
027c5e7a 9153+ }
1facf9fc 9154+ }
dece6358 9155+ au_rw_read_unlock(&au_di(d)->di_rwsem);
1facf9fc 9156+}
9157+
9158+void di_downgrade_lock(struct dentry *d, int flags)
9159+{
1facf9fc 9160+ if (d->d_inode && au_ftest_lock(flags, IR))
9161+ ii_downgrade_lock(d->d_inode);
dece6358 9162+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
1facf9fc 9163+}
9164+
9165+void di_write_lock(struct dentry *d, unsigned int lsc)
9166+{
dece6358 9167+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
1facf9fc 9168+ if (d->d_inode)
9169+ do_ii_write_lock(d->d_inode, lsc);
9170+}
9171+
9172+void di_write_unlock(struct dentry *d)
9173+{
027c5e7a 9174+ au_dbg_verify_dinode(d);
1facf9fc 9175+ if (d->d_inode)
9176+ ii_write_unlock(d->d_inode);
dece6358 9177+ au_rw_write_unlock(&au_di(d)->di_rwsem);
1facf9fc 9178+}
9179+
9180+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9181+{
9182+ AuDebugOn(d1 == d2
9183+ || d1->d_inode == d2->d_inode
9184+ || d1->d_sb != d2->d_sb);
9185+
9186+ if (isdir && au_test_subdir(d1, d2)) {
9187+ di_write_lock_child(d1);
9188+ di_write_lock_child2(d2);
9189+ } else {
9190+ /* there should be no races */
9191+ di_write_lock_child(d2);
9192+ di_write_lock_child2(d1);
9193+ }
9194+}
9195+
9196+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9197+{
9198+ AuDebugOn(d1 == d2
9199+ || d1->d_inode == d2->d_inode
9200+ || d1->d_sb != d2->d_sb);
9201+
9202+ if (isdir && au_test_subdir(d1, d2)) {
9203+ di_write_lock_parent(d1);
9204+ di_write_lock_parent2(d2);
9205+ } else {
9206+ /* there should be no races */
9207+ di_write_lock_parent(d2);
9208+ di_write_lock_parent2(d1);
9209+ }
9210+}
9211+
9212+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9213+{
9214+ di_write_unlock(d1);
9215+ if (d1->d_inode == d2->d_inode)
dece6358 9216+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
1facf9fc 9217+ else
9218+ di_write_unlock(d2);
9219+}
9220+
9221+/* ---------------------------------------------------------------------- */
9222+
9223+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9224+{
9225+ struct dentry *d;
9226+
1308ab2a 9227+ DiMustAnyLock(dentry);
9228+
1facf9fc 9229+ if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
9230+ return NULL;
9231+ AuDebugOn(bindex < 0);
9232+ d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
c1595e42 9233+ AuDebugOn(d && au_dcount(d) <= 0);
1facf9fc 9234+ return d;
9235+}
9236+
2cbb1c4b
JR
9237+/*
9238+ * extended version of au_h_dptr().
38d290e6
JR
9239+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9240+ * error.
2cbb1c4b
JR
9241+ */
9242+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9243+{
9244+ struct dentry *h_dentry;
9245+ struct inode *inode, *h_inode;
9246+
9247+ inode = dentry->d_inode;
9248+ AuDebugOn(!inode);
9249+
9250+ h_dentry = NULL;
9251+ if (au_dbstart(dentry) <= bindex
9252+ && bindex <= au_dbend(dentry))
9253+ h_dentry = au_h_dptr(dentry, bindex);
38d290e6 9254+ if (h_dentry && !au_d_linkable(h_dentry)) {
2cbb1c4b
JR
9255+ dget(h_dentry);
9256+ goto out; /* success */
9257+ }
9258+
9259+ AuDebugOn(bindex < au_ibstart(inode));
9260+ AuDebugOn(au_ibend(inode) < bindex);
9261+ h_inode = au_h_iptr(inode, bindex);
9262+ h_dentry = d_find_alias(h_inode);
9263+ if (h_dentry) {
9264+ if (!IS_ERR(h_dentry)) {
38d290e6 9265+ if (!au_d_linkable(h_dentry))
2cbb1c4b
JR
9266+ goto out; /* success */
9267+ dput(h_dentry);
9268+ } else
9269+ goto out;
9270+ }
9271+
9272+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9273+ h_dentry = au_plink_lkup(inode, bindex);
9274+ AuDebugOn(!h_dentry);
9275+ if (!IS_ERR(h_dentry)) {
9276+ if (!au_d_hashed_positive(h_dentry))
9277+ goto out; /* success */
9278+ dput(h_dentry);
9279+ h_dentry = NULL;
9280+ }
9281+ }
9282+
9283+out:
9284+ AuDbgDentry(h_dentry);
9285+ return h_dentry;
9286+}
9287+
1facf9fc 9288+aufs_bindex_t au_dbtail(struct dentry *dentry)
9289+{
9290+ aufs_bindex_t bend, bwh;
9291+
9292+ bend = au_dbend(dentry);
9293+ if (0 <= bend) {
9294+ bwh = au_dbwh(dentry);
9295+ if (!bwh)
9296+ return bwh;
9297+ if (0 < bwh && bwh < bend)
9298+ return bwh - 1;
9299+ }
9300+ return bend;
9301+}
9302+
9303+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9304+{
9305+ aufs_bindex_t bend, bopq;
9306+
9307+ bend = au_dbtail(dentry);
9308+ if (0 <= bend) {
9309+ bopq = au_dbdiropq(dentry);
9310+ if (0 <= bopq && bopq < bend)
9311+ bend = bopq;
9312+ }
9313+ return bend;
9314+}
9315+
9316+/* ---------------------------------------------------------------------- */
9317+
9318+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9319+ struct dentry *h_dentry)
9320+{
9321+ struct au_hdentry *hd = au_di(dentry)->di_hdentry + bindex;
027c5e7a 9322+ struct au_branch *br;
1facf9fc 9323+
1308ab2a 9324+ DiMustWriteLock(dentry);
9325+
4a4d8108 9326+ au_hdput(hd);
1facf9fc 9327+ hd->hd_dentry = h_dentry;
027c5e7a
AM
9328+ if (h_dentry) {
9329+ br = au_sbr(dentry->d_sb, bindex);
9330+ hd->hd_id = br->br_id;
9331+ }
9332+}
9333+
9334+int au_dbrange_test(struct dentry *dentry)
9335+{
9336+ int err;
9337+ aufs_bindex_t bstart, bend;
9338+
9339+ err = 0;
9340+ bstart = au_dbstart(dentry);
9341+ bend = au_dbend(dentry);
9342+ if (bstart >= 0)
9343+ AuDebugOn(bend < 0 && bstart > bend);
9344+ else {
9345+ err = -EIO;
9346+ AuDebugOn(bend >= 0);
9347+ }
9348+
9349+ return err;
9350+}
9351+
9352+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9353+{
9354+ int err;
9355+
9356+ err = 0;
9357+ if (unlikely(au_digen(dentry) != sigen
9358+ || au_iigen_test(dentry->d_inode, sigen)))
9359+ err = -EIO;
9360+
9361+ return err;
1facf9fc 9362+}
9363+
9364+void au_update_digen(struct dentry *dentry)
9365+{
9366+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9367+ /* smp_mb(); */ /* atomic_set */
9368+}
9369+
9370+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9371+{
9372+ struct au_dinfo *dinfo;
9373+ struct dentry *h_d;
4a4d8108 9374+ struct au_hdentry *hdp;
1facf9fc 9375+
1308ab2a 9376+ DiMustWriteLock(dentry);
9377+
1facf9fc 9378+ dinfo = au_di(dentry);
9379+ if (!dinfo || dinfo->di_bstart < 0)
9380+ return;
9381+
4a4d8108 9382+ hdp = dinfo->di_hdentry;
1facf9fc 9383+ if (do_put_zero) {
9384+ aufs_bindex_t bindex, bend;
9385+
9386+ bend = dinfo->di_bend;
9387+ for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
4a4d8108 9388+ h_d = hdp[0 + bindex].hd_dentry;
1facf9fc 9389+ if (h_d && !h_d->d_inode)
9390+ au_set_h_dptr(dentry, bindex, NULL);
9391+ }
9392+ }
9393+
9394+ dinfo->di_bstart = -1;
9395+ while (++dinfo->di_bstart <= dinfo->di_bend)
4a4d8108 9396+ if (hdp[0 + dinfo->di_bstart].hd_dentry)
1facf9fc 9397+ break;
9398+ if (dinfo->di_bstart > dinfo->di_bend) {
9399+ dinfo->di_bstart = -1;
9400+ dinfo->di_bend = -1;
9401+ return;
9402+ }
9403+
9404+ dinfo->di_bend++;
9405+ while (0 <= --dinfo->di_bend)
4a4d8108 9406+ if (hdp[0 + dinfo->di_bend].hd_dentry)
1facf9fc 9407+ break;
9408+ AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
9409+}
9410+
9411+void au_update_dbstart(struct dentry *dentry)
9412+{
9413+ aufs_bindex_t bindex, bend;
9414+ struct dentry *h_dentry;
9415+
9416+ bend = au_dbend(dentry);
9417+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
9418+ h_dentry = au_h_dptr(dentry, bindex);
9419+ if (!h_dentry)
9420+ continue;
9421+ if (h_dentry->d_inode) {
9422+ au_set_dbstart(dentry, bindex);
9423+ return;
9424+ }
9425+ au_set_h_dptr(dentry, bindex, NULL);
9426+ }
9427+}
9428+
9429+void au_update_dbend(struct dentry *dentry)
9430+{
9431+ aufs_bindex_t bindex, bstart;
9432+ struct dentry *h_dentry;
9433+
9434+ bstart = au_dbstart(dentry);
7f207e10 9435+ for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
1facf9fc 9436+ h_dentry = au_h_dptr(dentry, bindex);
9437+ if (!h_dentry)
9438+ continue;
9439+ if (h_dentry->d_inode) {
9440+ au_set_dbend(dentry, bindex);
9441+ return;
9442+ }
9443+ au_set_h_dptr(dentry, bindex, NULL);
9444+ }
9445+}
9446+
9447+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9448+{
9449+ aufs_bindex_t bindex, bend;
9450+
9451+ bend = au_dbend(dentry);
9452+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
9453+ if (au_h_dptr(dentry, bindex) == h_dentry)
9454+ return bindex;
9455+ return -1;
9456+}
7f207e10
AM
9457diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9458--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
9459+++ linux/fs/aufs/dir.c 2016-02-28 11:27:01.277245613 +0100
9460@@ -0,0 +1,756 @@
1facf9fc 9461+/*
7f2ca4b1 9462+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 9463+ *
9464+ * This program, aufs is free software; you can redistribute it and/or modify
9465+ * it under the terms of the GNU General Public License as published by
9466+ * the Free Software Foundation; either version 2 of the License, or
9467+ * (at your option) any later version.
dece6358
AM
9468+ *
9469+ * This program is distributed in the hope that it will be useful,
9470+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9471+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9472+ * GNU General Public License for more details.
9473+ *
9474+ * You should have received a copy of the GNU General Public License
523b37e3 9475+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 9476+ */
9477+
9478+/*
9479+ * directory operations
9480+ */
9481+
9482+#include <linux/fs_stack.h>
9483+#include "aufs.h"
9484+
9485+void au_add_nlink(struct inode *dir, struct inode *h_dir)
9486+{
9dbd164d
AM
9487+ unsigned int nlink;
9488+
1facf9fc 9489+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9490+
9dbd164d
AM
9491+ nlink = dir->i_nlink;
9492+ nlink += h_dir->i_nlink - 2;
1facf9fc 9493+ if (h_dir->i_nlink < 2)
9dbd164d 9494+ nlink += 2;
f6b6e03d 9495+ smp_mb(); /* for i_nlink */
7eafdf33 9496+ /* 0 can happen in revaliding */
92d182d2 9497+ set_nlink(dir, nlink);
1facf9fc 9498+}
9499+
9500+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
9501+{
9dbd164d
AM
9502+ unsigned int nlink;
9503+
1facf9fc 9504+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
9505+
9dbd164d
AM
9506+ nlink = dir->i_nlink;
9507+ nlink -= h_dir->i_nlink - 2;
1facf9fc 9508+ if (h_dir->i_nlink < 2)
9dbd164d 9509+ nlink -= 2;
f6b6e03d 9510+ smp_mb(); /* for i_nlink */
92d182d2 9511+ /* nlink == 0 means the branch-fs is broken */
9dbd164d 9512+ set_nlink(dir, nlink);
1facf9fc 9513+}
9514+
1308ab2a 9515+loff_t au_dir_size(struct file *file, struct dentry *dentry)
9516+{
9517+ loff_t sz;
9518+ aufs_bindex_t bindex, bend;
9519+ struct file *h_file;
9520+ struct dentry *h_dentry;
9521+
9522+ sz = 0;
9523+ if (file) {
7f2ca4b1 9524+ AuDebugOn(!d_is_dir(file->f_path.dentry));
1308ab2a 9525+
4a4d8108 9526+ bend = au_fbend_dir(file);
1308ab2a 9527+ for (bindex = au_fbstart(file);
9528+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9529+ bindex++) {
4a4d8108 9530+ h_file = au_hf_dir(file, bindex);
c06a8ce3
AM
9531+ if (h_file && file_inode(h_file))
9532+ sz += vfsub_f_size_read(h_file);
1308ab2a 9533+ }
9534+ } else {
9535+ AuDebugOn(!dentry);
7f2ca4b1 9536+ AuDebugOn(!d_is_dir(dentry));
1308ab2a 9537+
9538+ bend = au_dbtaildir(dentry);
9539+ for (bindex = au_dbstart(dentry);
9540+ bindex <= bend && sz < KMALLOC_MAX_SIZE;
9541+ bindex++) {
9542+ h_dentry = au_h_dptr(dentry, bindex);
9543+ if (h_dentry && h_dentry->d_inode)
9544+ sz += i_size_read(h_dentry->d_inode);
9545+ }
9546+ }
9547+ if (sz < KMALLOC_MAX_SIZE)
9548+ sz = roundup_pow_of_two(sz);
9549+ if (sz > KMALLOC_MAX_SIZE)
9550+ sz = KMALLOC_MAX_SIZE;
9551+ else if (sz < NAME_MAX) {
9552+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
9553+ sz = AUFS_RDBLK_DEF;
9554+ }
9555+ return sz;
9556+}
9557+
7f2ca4b1
JR
9558+struct au_dir_ts_arg {
9559+ struct dentry *dentry;
9560+ aufs_bindex_t brid;
9561+};
9562+
9563+static void au_do_dir_ts(void *arg)
9564+{
9565+ struct au_dir_ts_arg *a = arg;
9566+ struct au_dtime dt;
9567+ struct path h_path;
9568+ struct inode *dir, *h_dir;
9569+ struct super_block *sb;
9570+ struct au_branch *br;
9571+ struct au_hinode *hdir;
9572+ int err;
9573+ aufs_bindex_t bstart, bindex;
9574+
9575+ sb = a->dentry->d_sb;
9576+ dir = a->dentry->d_inode;
9577+ if (!dir)
9578+ goto out;
9579+ /* no dir->i_mutex lock */
9580+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
9581+
9582+ bstart = au_ibstart(dir);
9583+ bindex = au_br_index(sb, a->brid);
9584+ if (bindex < bstart)
9585+ goto out_unlock;
9586+
9587+ br = au_sbr(sb, bindex);
9588+ h_path.dentry = au_h_dptr(a->dentry, bindex);
9589+ if (!h_path.dentry)
9590+ goto out_unlock;
9591+ h_path.mnt = au_br_mnt(br);
9592+ au_dtime_store(&dt, a->dentry, &h_path);
9593+
9594+ br = au_sbr(sb, bstart);
9595+ if (!au_br_writable(br->br_perm))
9596+ goto out_unlock;
9597+ h_path.dentry = au_h_dptr(a->dentry, bstart);
9598+ h_path.mnt = au_br_mnt(br);
9599+ err = vfsub_mnt_want_write(h_path.mnt);
9600+ if (err)
9601+ goto out_unlock;
9602+ hdir = au_hi(dir, bstart);
9603+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
9604+ h_dir = au_h_iptr(dir, bstart);
9605+ if (h_dir->i_nlink
9606+ && timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
9607+ dt.dt_h_path = h_path;
9608+ au_dtime_revert(&dt);
9609+ }
9610+ au_hn_imtx_unlock(hdir);
9611+ vfsub_mnt_drop_write(h_path.mnt);
9612+ au_cpup_attr_timesizes(dir);
9613+
9614+out_unlock:
9615+ aufs_read_unlock(a->dentry, AuLock_DW);
9616+out:
9617+ dput(a->dentry);
9618+ au_nwt_done(&au_sbi(sb)->si_nowait);
9619+ kfree(arg);
9620+}
9621+
9622+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
9623+{
9624+ int perm, wkq_err;
9625+ aufs_bindex_t bstart;
9626+ struct au_dir_ts_arg *arg;
9627+ struct dentry *dentry;
9628+ struct super_block *sb;
9629+
9630+ IMustLock(dir);
9631+
9632+ dentry = d_find_any_alias(dir);
9633+ AuDebugOn(!dentry);
9634+ sb = dentry->d_sb;
9635+ bstart = au_ibstart(dir);
9636+ if (bstart == bindex) {
9637+ au_cpup_attr_timesizes(dir);
9638+ goto out;
9639+ }
9640+
9641+ perm = au_sbr_perm(sb, bstart);
9642+ if (!au_br_writable(perm))
9643+ goto out;
9644+
9645+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
9646+ if (!arg)
9647+ goto out;
9648+
9649+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
9650+ arg->brid = au_sbr_id(sb, bindex);
9651+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
9652+ if (unlikely(wkq_err)) {
9653+ pr_err("wkq %d\n", wkq_err);
9654+ dput(dentry);
9655+ kfree(arg);
9656+ }
9657+
9658+out:
9659+ dput(dentry);
9660+}
9661+
1facf9fc 9662+/* ---------------------------------------------------------------------- */
9663+
9664+static int reopen_dir(struct file *file)
9665+{
9666+ int err;
9667+ unsigned int flags;
9668+ aufs_bindex_t bindex, btail, bstart;
9669+ struct dentry *dentry, *h_dentry;
9670+ struct file *h_file;
9671+
9672+ /* open all lower dirs */
9673+ dentry = file->f_dentry;
9674+ bstart = au_dbstart(dentry);
9675+ for (bindex = au_fbstart(file); bindex < bstart; bindex++)
9676+ au_set_h_fptr(file, bindex, NULL);
9677+ au_set_fbstart(file, bstart);
9678+
9679+ btail = au_dbtaildir(dentry);
4a4d8108 9680+ for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
1facf9fc 9681+ au_set_h_fptr(file, bindex, NULL);
4a4d8108 9682+ au_set_fbend_dir(file, btail);
1facf9fc 9683+
4a4d8108 9684+ flags = vfsub_file_flags(file);
1facf9fc 9685+ for (bindex = bstart; bindex <= btail; bindex++) {
9686+ h_dentry = au_h_dptr(dentry, bindex);
9687+ if (!h_dentry)
9688+ continue;
4a4d8108 9689+ h_file = au_hf_dir(file, bindex);
1facf9fc 9690+ if (h_file)
9691+ continue;
9692+
392086de 9693+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9694+ err = PTR_ERR(h_file);
9695+ if (IS_ERR(h_file))
9696+ goto out; /* close all? */
9697+ au_set_h_fptr(file, bindex, h_file);
9698+ }
9699+ au_update_figen(file);
9700+ /* todo: necessary? */
9701+ /* file->f_ra = h_file->f_ra; */
9702+ err = 0;
9703+
4f0767ce 9704+out:
1facf9fc 9705+ return err;
9706+}
9707+
7f2ca4b1 9708+static int do_open_dir(struct file *file, int flags, struct file *h_file)
1facf9fc 9709+{
9710+ int err;
9711+ aufs_bindex_t bindex, btail;
9712+ struct dentry *dentry, *h_dentry;
7f2ca4b1 9713+ struct vfsmount *mnt;
1facf9fc 9714+
1308ab2a 9715+ FiMustWriteLock(file);
7f2ca4b1 9716+ AuDebugOn(h_file);
1308ab2a 9717+
523b37e3 9718+ err = 0;
7f2ca4b1 9719+ mnt = file->f_path.mnt;
1facf9fc 9720+ dentry = file->f_dentry;
1facf9fc 9721+ file->f_version = dentry->d_inode->i_version;
9722+ bindex = au_dbstart(dentry);
9723+ au_set_fbstart(file, bindex);
9724+ btail = au_dbtaildir(dentry);
4a4d8108 9725+ au_set_fbend_dir(file, btail);
1facf9fc 9726+ for (; !err && bindex <= btail; bindex++) {
9727+ h_dentry = au_h_dptr(dentry, bindex);
9728+ if (!h_dentry)
9729+ continue;
9730+
7f2ca4b1
JR
9731+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
9732+ if (unlikely(err))
9733+ break;
392086de 9734+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
1facf9fc 9735+ if (IS_ERR(h_file)) {
9736+ err = PTR_ERR(h_file);
9737+ break;
9738+ }
9739+ au_set_h_fptr(file, bindex, h_file);
9740+ }
9741+ au_update_figen(file);
9742+ /* todo: necessary? */
9743+ /* file->f_ra = h_file->f_ra; */
9744+ if (!err)
9745+ return 0; /* success */
9746+
9747+ /* close all */
9748+ for (bindex = au_fbstart(file); bindex <= btail; bindex++)
9749+ au_set_h_fptr(file, bindex, NULL);
9750+ au_set_fbstart(file, -1);
4a4d8108
AM
9751+ au_set_fbend_dir(file, -1);
9752+
1facf9fc 9753+ return err;
9754+}
9755+
9756+static int aufs_open_dir(struct inode *inode __maybe_unused,
9757+ struct file *file)
9758+{
4a4d8108
AM
9759+ int err;
9760+ struct super_block *sb;
9761+ struct au_fidir *fidir;
9762+
9763+ err = -ENOMEM;
9764+ sb = file->f_dentry->d_sb;
9765+ si_read_lock(sb, AuLock_FLUSH);
e49829fe 9766+ fidir = au_fidir_alloc(sb);
4a4d8108 9767+ if (fidir) {
7f2ca4b1
JR
9768+ struct au_do_open_args args = {
9769+ .open = do_open_dir,
9770+ .fidir = fidir
9771+ };
9772+ err = au_do_open(file, &args);
4a4d8108
AM
9773+ if (unlikely(err))
9774+ kfree(fidir);
9775+ }
9776+ si_read_unlock(sb);
9777+ return err;
1facf9fc 9778+}
9779+
9780+static int aufs_release_dir(struct inode *inode __maybe_unused,
9781+ struct file *file)
9782+{
9783+ struct au_vdir *vdir_cache;
4a4d8108
AM
9784+ struct au_finfo *finfo;
9785+ struct au_fidir *fidir;
9786+ aufs_bindex_t bindex, bend;
1facf9fc 9787+
4a4d8108
AM
9788+ finfo = au_fi(file);
9789+ fidir = finfo->fi_hdir;
9790+ if (fidir) {
076b876e
AM
9791+ au_sphl_del(&finfo->fi_hlist,
9792+ &au_sbi(file->f_dentry->d_sb)->si_files);
4a4d8108
AM
9793+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
9794+ if (vdir_cache)
9795+ au_vdir_free(vdir_cache);
9796+
9797+ bindex = finfo->fi_btop;
9798+ if (bindex >= 0) {
9799+ /*
9800+ * calls fput() instead of filp_close(),
9801+ * since no dnotify or lock for the lower file.
9802+ */
9803+ bend = fidir->fd_bbot;
9804+ for (; bindex <= bend; bindex++)
9805+ au_set_h_fptr(file, bindex, NULL);
9806+ }
9807+ kfree(fidir);
9808+ finfo->fi_hdir = NULL;
1facf9fc 9809+ }
1facf9fc 9810+ au_finfo_fin(file);
1facf9fc 9811+ return 0;
9812+}
9813+
9814+/* ---------------------------------------------------------------------- */
9815+
4a4d8108
AM
9816+static int au_do_flush_dir(struct file *file, fl_owner_t id)
9817+{
9818+ int err;
9819+ aufs_bindex_t bindex, bend;
9820+ struct file *h_file;
9821+
9822+ err = 0;
9823+ bend = au_fbend_dir(file);
9824+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
9825+ h_file = au_hf_dir(file, bindex);
9826+ if (h_file)
9827+ err = vfsub_flush(h_file, id);
9828+ }
9829+ return err;
9830+}
9831+
9832+static int aufs_flush_dir(struct file *file, fl_owner_t id)
9833+{
9834+ return au_do_flush(file, id, au_do_flush_dir);
9835+}
9836+
9837+/* ---------------------------------------------------------------------- */
9838+
1facf9fc 9839+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
9840+{
9841+ int err;
9842+ aufs_bindex_t bend, bindex;
9843+ struct inode *inode;
9844+ struct super_block *sb;
9845+
9846+ err = 0;
9847+ sb = dentry->d_sb;
9848+ inode = dentry->d_inode;
9849+ IMustLock(inode);
9850+ bend = au_dbend(dentry);
9851+ for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
9852+ struct path h_path;
1facf9fc 9853+
9854+ if (au_test_ro(sb, bindex, inode))
9855+ continue;
9856+ h_path.dentry = au_h_dptr(dentry, bindex);
9857+ if (!h_path.dentry)
9858+ continue;
1facf9fc 9859+
1facf9fc 9860+ h_path.mnt = au_sbr_mnt(sb, bindex);
53392da6 9861+ err = vfsub_fsync(NULL, &h_path, datasync);
1facf9fc 9862+ }
9863+
9864+ return err;
9865+}
9866+
9867+static int au_do_fsync_dir(struct file *file, int datasync)
9868+{
9869+ int err;
9870+ aufs_bindex_t bend, bindex;
9871+ struct file *h_file;
9872+ struct super_block *sb;
9873+ struct inode *inode;
1facf9fc 9874+
9875+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9876+ if (unlikely(err))
9877+ goto out;
9878+
c06a8ce3 9879+ inode = file_inode(file);
7f2ca4b1 9880+ sb = inode->i_sb;
4a4d8108 9881+ bend = au_fbend_dir(file);
1facf9fc 9882+ for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
4a4d8108 9883+ h_file = au_hf_dir(file, bindex);
1facf9fc 9884+ if (!h_file || au_test_ro(sb, bindex, inode))
9885+ continue;
9886+
53392da6 9887+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
1facf9fc 9888+ }
9889+
4f0767ce 9890+out:
1facf9fc 9891+ return err;
9892+}
9893+
9894+/*
9895+ * @file may be NULL
9896+ */
1e00d052
AM
9897+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
9898+ int datasync)
1facf9fc 9899+{
9900+ int err;
b752ccd1 9901+ struct dentry *dentry;
1facf9fc 9902+ struct super_block *sb;
1e00d052 9903+ struct mutex *mtx;
1facf9fc 9904+
9905+ err = 0;
1e00d052
AM
9906+ dentry = file->f_dentry;
9907+ mtx = &dentry->d_inode->i_mutex;
9908+ mutex_lock(mtx);
1facf9fc 9909+ sb = dentry->d_sb;
9910+ si_noflush_read_lock(sb);
9911+ if (file)
9912+ err = au_do_fsync_dir(file, datasync);
9913+ else {
9914+ di_write_lock_child(dentry);
9915+ err = au_do_fsync_dir_no_file(dentry, datasync);
9916+ }
9917+ au_cpup_attr_timesizes(dentry->d_inode);
9918+ di_write_unlock(dentry);
9919+ if (file)
9920+ fi_write_unlock(file);
9921+
9922+ si_read_unlock(sb);
1e00d052 9923+ mutex_unlock(mtx);
1facf9fc 9924+ return err;
9925+}
9926+
9927+/* ---------------------------------------------------------------------- */
9928+
392086de 9929+static int aufs_iterate(struct file *file, struct dir_context *ctx)
1facf9fc 9930+{
9931+ int err;
9932+ struct dentry *dentry;
9dbd164d 9933+ struct inode *inode, *h_inode;
1facf9fc 9934+ struct super_block *sb;
9935+
523b37e3 9936+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 9937+
1facf9fc 9938+ dentry = file->f_dentry;
9939+ inode = dentry->d_inode;
9940+ IMustLock(inode);
9941+
9942+ sb = dentry->d_sb;
9943+ si_read_lock(sb, AuLock_FLUSH);
9944+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1);
9945+ if (unlikely(err))
9946+ goto out;
027c5e7a
AM
9947+ err = au_alive_dir(dentry);
9948+ if (!err)
9949+ err = au_vdir_init(file);
1facf9fc 9950+ di_downgrade_lock(dentry, AuLock_IR);
9951+ if (unlikely(err))
9952+ goto out_unlock;
9953+
9dbd164d 9954+ h_inode = au_h_iptr(inode, au_ibstart(inode));
b752ccd1 9955+ if (!au_test_nfsd()) {
392086de 9956+ err = au_vdir_fill_de(file, ctx);
9dbd164d 9957+ fsstack_copy_attr_atime(inode, h_inode);
1facf9fc 9958+ } else {
9959+ /*
9960+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
9961+ * encode_fh() and others.
9962+ */
9dbd164d 9963+ atomic_inc(&h_inode->i_count);
1facf9fc 9964+ di_read_unlock(dentry, AuLock_IR);
9965+ si_read_unlock(sb);
392086de 9966+ err = au_vdir_fill_de(file, ctx);
1facf9fc 9967+ fsstack_copy_attr_atime(inode, h_inode);
9968+ fi_write_unlock(file);
9dbd164d 9969+ iput(h_inode);
1facf9fc 9970+
9971+ AuTraceErr(err);
9972+ return err;
9973+ }
9974+
4f0767ce 9975+out_unlock:
1facf9fc 9976+ di_read_unlock(dentry, AuLock_IR);
9977+ fi_write_unlock(file);
4f0767ce 9978+out:
1facf9fc 9979+ si_read_unlock(sb);
9980+ return err;
9981+}
9982+
9983+/* ---------------------------------------------------------------------- */
9984+
9985+#define AuTestEmpty_WHONLY 1
dece6358
AM
9986+#define AuTestEmpty_CALLED (1 << 1)
9987+#define AuTestEmpty_SHWH (1 << 2)
1facf9fc 9988+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
7f207e10
AM
9989+#define au_fset_testempty(flags, name) \
9990+ do { (flags) |= AuTestEmpty_##name; } while (0)
9991+#define au_fclr_testempty(flags, name) \
9992+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
1facf9fc 9993+
dece6358
AM
9994+#ifndef CONFIG_AUFS_SHWH
9995+#undef AuTestEmpty_SHWH
9996+#define AuTestEmpty_SHWH 0
9997+#endif
9998+
1facf9fc 9999+struct test_empty_arg {
392086de 10000+ struct dir_context ctx;
1308ab2a 10001+ struct au_nhash *whlist;
1facf9fc 10002+ unsigned int flags;
10003+ int err;
10004+ aufs_bindex_t bindex;
10005+};
10006+
392086de
AM
10007+static int test_empty_cb(struct dir_context *ctx, const char *__name,
10008+ int namelen, loff_t offset __maybe_unused, u64 ino,
dece6358 10009+ unsigned int d_type)
1facf9fc 10010+{
392086de
AM
10011+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
10012+ ctx);
1facf9fc 10013+ char *name = (void *)__name;
10014+
10015+ arg->err = 0;
10016+ au_fset_testempty(arg->flags, CALLED);
10017+ /* smp_mb(); */
10018+ if (name[0] == '.'
10019+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
10020+ goto out; /* success */
10021+
10022+ if (namelen <= AUFS_WH_PFX_LEN
10023+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
10024+ if (au_ftest_testempty(arg->flags, WHONLY)
1308ab2a 10025+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10026+ arg->err = -ENOTEMPTY;
10027+ goto out;
10028+ }
10029+
10030+ name += AUFS_WH_PFX_LEN;
10031+ namelen -= AUFS_WH_PFX_LEN;
1308ab2a 10032+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
1facf9fc 10033+ arg->err = au_nhash_append_wh
1308ab2a 10034+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
dece6358 10035+ au_ftest_testempty(arg->flags, SHWH));
1facf9fc 10036+
4f0767ce 10037+out:
1facf9fc 10038+ /* smp_mb(); */
10039+ AuTraceErr(arg->err);
10040+ return arg->err;
10041+}
10042+
10043+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10044+{
10045+ int err;
10046+ struct file *h_file;
10047+
10048+ h_file = au_h_open(dentry, arg->bindex,
10049+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
392086de 10050+ /*file*/NULL, /*force_wr*/0);
1facf9fc 10051+ err = PTR_ERR(h_file);
10052+ if (IS_ERR(h_file))
10053+ goto out;
10054+
10055+ err = 0;
10056+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
c06a8ce3 10057+ && !file_inode(h_file)->i_nlink)
1facf9fc 10058+ goto out_put;
10059+
10060+ do {
10061+ arg->err = 0;
10062+ au_fclr_testempty(arg->flags, CALLED);
10063+ /* smp_mb(); */
392086de 10064+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1facf9fc 10065+ if (err >= 0)
10066+ err = arg->err;
10067+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
10068+
4f0767ce 10069+out_put:
1facf9fc 10070+ fput(h_file);
10071+ au_sbr_put(dentry->d_sb, arg->bindex);
4f0767ce 10072+out:
1facf9fc 10073+ return err;
10074+}
10075+
10076+struct do_test_empty_args {
10077+ int *errp;
10078+ struct dentry *dentry;
10079+ struct test_empty_arg *arg;
10080+};
10081+
10082+static void call_do_test_empty(void *args)
10083+{
10084+ struct do_test_empty_args *a = args;
10085+ *a->errp = do_test_empty(a->dentry, a->arg);
10086+}
10087+
10088+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10089+{
10090+ int err, wkq_err;
10091+ struct dentry *h_dentry;
10092+ struct inode *h_inode;
10093+
10094+ h_dentry = au_h_dptr(dentry, arg->bindex);
10095+ h_inode = h_dentry->d_inode;
53392da6 10096+ /* todo: i_mode changes anytime? */
1facf9fc 10097+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
10098+ err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
10099+ mutex_unlock(&h_inode->i_mutex);
10100+ if (!err)
10101+ err = do_test_empty(dentry, arg);
10102+ else {
10103+ struct do_test_empty_args args = {
10104+ .errp = &err,
10105+ .dentry = dentry,
10106+ .arg = arg
10107+ };
10108+ unsigned int flags = arg->flags;
10109+
10110+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10111+ if (unlikely(wkq_err))
10112+ err = wkq_err;
10113+ arg->flags = flags;
10114+ }
10115+
10116+ return err;
10117+}
10118+
10119+int au_test_empty_lower(struct dentry *dentry)
10120+{
10121+ int err;
1308ab2a 10122+ unsigned int rdhash;
1facf9fc 10123+ aufs_bindex_t bindex, bstart, btail;
1308ab2a 10124+ struct au_nhash whlist;
392086de
AM
10125+ struct test_empty_arg arg = {
10126+ .ctx = {
10127+ .actor = au_diractor(test_empty_cb)
10128+ }
10129+ };
076b876e 10130+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
1facf9fc 10131+
dece6358
AM
10132+ SiMustAnyLock(dentry->d_sb);
10133+
1308ab2a 10134+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10135+ if (!rdhash)
10136+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10137+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
dece6358 10138+ if (unlikely(err))
1facf9fc 10139+ goto out;
10140+
1facf9fc 10141+ arg.flags = 0;
1308ab2a 10142+ arg.whlist = &whlist;
10143+ bstart = au_dbstart(dentry);
dece6358
AM
10144+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10145+ au_fset_testempty(arg.flags, SHWH);
076b876e
AM
10146+ test_empty = do_test_empty;
10147+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10148+ test_empty = sio_test_empty;
1facf9fc 10149+ arg.bindex = bstart;
076b876e 10150+ err = test_empty(dentry, &arg);
1facf9fc 10151+ if (unlikely(err))
10152+ goto out_whlist;
10153+
10154+ au_fset_testempty(arg.flags, WHONLY);
10155+ btail = au_dbtaildir(dentry);
10156+ for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
10157+ struct dentry *h_dentry;
10158+
10159+ h_dentry = au_h_dptr(dentry, bindex);
10160+ if (h_dentry && h_dentry->d_inode) {
10161+ arg.bindex = bindex;
076b876e 10162+ err = test_empty(dentry, &arg);
1facf9fc 10163+ }
10164+ }
10165+
4f0767ce 10166+out_whlist:
1308ab2a 10167+ au_nhash_wh_free(&whlist);
4f0767ce 10168+out:
1facf9fc 10169+ return err;
10170+}
10171+
10172+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10173+{
10174+ int err;
392086de
AM
10175+ struct test_empty_arg arg = {
10176+ .ctx = {
10177+ .actor = au_diractor(test_empty_cb)
10178+ }
10179+ };
1facf9fc 10180+ aufs_bindex_t bindex, btail;
10181+
10182+ err = 0;
1308ab2a 10183+ arg.whlist = whlist;
1facf9fc 10184+ arg.flags = AuTestEmpty_WHONLY;
dece6358
AM
10185+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10186+ au_fset_testempty(arg.flags, SHWH);
1facf9fc 10187+ btail = au_dbtaildir(dentry);
10188+ for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
10189+ struct dentry *h_dentry;
10190+
10191+ h_dentry = au_h_dptr(dentry, bindex);
10192+ if (h_dentry && h_dentry->d_inode) {
10193+ arg.bindex = bindex;
10194+ err = sio_test_empty(dentry, &arg);
10195+ }
10196+ }
10197+
10198+ return err;
10199+}
10200+
10201+/* ---------------------------------------------------------------------- */
10202+
10203+const struct file_operations aufs_dir_fop = {
4a4d8108 10204+ .owner = THIS_MODULE,
027c5e7a 10205+ .llseek = default_llseek,
1facf9fc 10206+ .read = generic_read_dir,
392086de 10207+ .iterate = aufs_iterate,
1facf9fc 10208+ .unlocked_ioctl = aufs_ioctl_dir,
b752ccd1
AM
10209+#ifdef CONFIG_COMPAT
10210+ .compat_ioctl = aufs_compat_ioctl_dir,
10211+#endif
1facf9fc 10212+ .open = aufs_open_dir,
10213+ .release = aufs_release_dir,
4a4d8108 10214+ .flush = aufs_flush_dir,
1facf9fc 10215+ .fsync = aufs_fsync_dir
10216+};
7f207e10
AM
10217diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10218--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
10219+++ linux/fs/aufs/dir.h 2016-02-28 11:27:01.277245613 +0100
10220@@ -0,0 +1,131 @@
1facf9fc 10221+/*
7f2ca4b1 10222+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 10223+ *
10224+ * This program, aufs is free software; you can redistribute it and/or modify
10225+ * it under the terms of the GNU General Public License as published by
10226+ * the Free Software Foundation; either version 2 of the License, or
10227+ * (at your option) any later version.
dece6358
AM
10228+ *
10229+ * This program is distributed in the hope that it will be useful,
10230+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10231+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10232+ * GNU General Public License for more details.
10233+ *
10234+ * You should have received a copy of the GNU General Public License
523b37e3 10235+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10236+ */
10237+
10238+/*
10239+ * directory operations
10240+ */
10241+
10242+#ifndef __AUFS_DIR_H__
10243+#define __AUFS_DIR_H__
10244+
10245+#ifdef __KERNEL__
10246+
10247+#include <linux/fs.h>
1facf9fc 10248+
10249+/* ---------------------------------------------------------------------- */
10250+
10251+/* need to be faster and smaller */
10252+
10253+struct au_nhash {
dece6358
AM
10254+ unsigned int nh_num;
10255+ struct hlist_head *nh_head;
1facf9fc 10256+};
10257+
10258+struct au_vdir_destr {
10259+ unsigned char len;
10260+ unsigned char name[0];
10261+} __packed;
10262+
10263+struct au_vdir_dehstr {
10264+ struct hlist_node hash;
10265+ struct au_vdir_destr *str;
4a4d8108 10266+} ____cacheline_aligned_in_smp;
1facf9fc 10267+
10268+struct au_vdir_de {
10269+ ino_t de_ino;
10270+ unsigned char de_type;
10271+ /* caution: packed */
10272+ struct au_vdir_destr de_str;
10273+} __packed;
10274+
10275+struct au_vdir_wh {
10276+ struct hlist_node wh_hash;
dece6358
AM
10277+#ifdef CONFIG_AUFS_SHWH
10278+ ino_t wh_ino;
1facf9fc 10279+ aufs_bindex_t wh_bindex;
dece6358
AM
10280+ unsigned char wh_type;
10281+#else
10282+ aufs_bindex_t wh_bindex;
10283+#endif
10284+ /* caution: packed */
1facf9fc 10285+ struct au_vdir_destr wh_str;
10286+} __packed;
10287+
10288+union au_vdir_deblk_p {
10289+ unsigned char *deblk;
10290+ struct au_vdir_de *de;
10291+};
10292+
10293+struct au_vdir {
10294+ unsigned char **vd_deblk;
10295+ unsigned long vd_nblk;
1facf9fc 10296+ struct {
10297+ unsigned long ul;
10298+ union au_vdir_deblk_p p;
10299+ } vd_last;
10300+
10301+ unsigned long vd_version;
dece6358 10302+ unsigned int vd_deblk_sz;
1facf9fc 10303+ unsigned long vd_jiffy;
4a4d8108 10304+} ____cacheline_aligned_in_smp;
1facf9fc 10305+
10306+/* ---------------------------------------------------------------------- */
10307+
10308+/* dir.c */
10309+extern const struct file_operations aufs_dir_fop;
10310+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10311+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
1308ab2a 10312+loff_t au_dir_size(struct file *file, struct dentry *dentry);
7f2ca4b1 10313+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
1facf9fc 10314+int au_test_empty_lower(struct dentry *dentry);
10315+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10316+
10317+/* vdir.c */
1308ab2a 10318+unsigned int au_rdhash_est(loff_t sz);
dece6358
AM
10319+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10320+void au_nhash_wh_free(struct au_nhash *whlist);
1facf9fc 10321+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10322+ int limit);
dece6358
AM
10323+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10324+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10325+ unsigned int d_type, aufs_bindex_t bindex,
10326+ unsigned char shwh);
1facf9fc 10327+void au_vdir_free(struct au_vdir *vdir);
10328+int au_vdir_init(struct file *file);
392086de 10329+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
1facf9fc 10330+
10331+/* ioctl.c */
10332+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10333+
1308ab2a 10334+#ifdef CONFIG_AUFS_RDU
10335+/* rdu.c */
10336+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
10337+#ifdef CONFIG_COMPAT
10338+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10339+ unsigned long arg);
10340+#endif
1308ab2a 10341+#else
c1595e42
JR
10342+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10343+ unsigned int cmd, unsigned long arg)
b752ccd1 10344+#ifdef CONFIG_COMPAT
c1595e42
JR
10345+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10346+ unsigned int cmd, unsigned long arg)
b752ccd1 10347+#endif
1308ab2a 10348+#endif
10349+
1facf9fc 10350+#endif /* __KERNEL__ */
10351+#endif /* __AUFS_DIR_H__ */
7f207e10
AM
10352diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
10353--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 10354+++ linux/fs/aufs/dynop.c 2016-02-28 11:27:01.277245613 +0100
523b37e3 10355@@ -0,0 +1,379 @@
1facf9fc 10356+/*
7f2ca4b1 10357+ * Copyright (C) 2010-2016 Junjiro R. Okajima
1facf9fc 10358+ *
10359+ * This program, aufs is free software; you can redistribute it and/or modify
10360+ * it under the terms of the GNU General Public License as published by
10361+ * the Free Software Foundation; either version 2 of the License, or
10362+ * (at your option) any later version.
dece6358
AM
10363+ *
10364+ * This program is distributed in the hope that it will be useful,
10365+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10366+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10367+ * GNU General Public License for more details.
10368+ *
10369+ * You should have received a copy of the GNU General Public License
523b37e3 10370+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 10371+ */
10372+
10373+/*
4a4d8108 10374+ * dynamically customizable operations for regular files
1facf9fc 10375+ */
10376+
1facf9fc 10377+#include "aufs.h"
10378+
4a4d8108 10379+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
1facf9fc 10380+
4a4d8108
AM
10381+/*
10382+ * How large will these lists be?
10383+ * Usually just a few elements, 20-30 at most for each, I guess.
10384+ */
10385+static struct au_splhead dynop[AuDyLast];
10386+
10387+static struct au_dykey *dy_gfind_get(struct au_splhead *spl, const void *h_op)
1facf9fc 10388+{
4a4d8108
AM
10389+ struct au_dykey *key, *tmp;
10390+ struct list_head *head;
1facf9fc 10391+
4a4d8108
AM
10392+ key = NULL;
10393+ head = &spl->head;
10394+ rcu_read_lock();
10395+ list_for_each_entry_rcu(tmp, head, dk_list)
10396+ if (tmp->dk_op.dy_hop == h_op) {
10397+ key = tmp;
10398+ kref_get(&key->dk_kref);
10399+ break;
10400+ }
10401+ rcu_read_unlock();
10402+
10403+ return key;
1facf9fc 10404+}
10405+
4a4d8108 10406+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
1facf9fc 10407+{
4a4d8108
AM
10408+ struct au_dykey **k, *found;
10409+ const void *h_op = key->dk_op.dy_hop;
10410+ int i;
1facf9fc 10411+
4a4d8108
AM
10412+ found = NULL;
10413+ k = br->br_dykey;
10414+ for (i = 0; i < AuBrDynOp; i++)
10415+ if (k[i]) {
10416+ if (k[i]->dk_op.dy_hop == h_op) {
10417+ found = k[i];
10418+ break;
10419+ }
10420+ } else
10421+ break;
10422+ if (!found) {
10423+ spin_lock(&br->br_dykey_lock);
10424+ for (; i < AuBrDynOp; i++)
10425+ if (k[i]) {
10426+ if (k[i]->dk_op.dy_hop == h_op) {
10427+ found = k[i];
10428+ break;
10429+ }
10430+ } else {
10431+ k[i] = key;
10432+ break;
10433+ }
10434+ spin_unlock(&br->br_dykey_lock);
10435+ BUG_ON(i == AuBrDynOp); /* expand the array */
10436+ }
10437+
10438+ return found;
1facf9fc 10439+}
10440+
4a4d8108
AM
10441+/* kref_get() if @key is already added */
10442+static struct au_dykey *dy_gadd(struct au_splhead *spl, struct au_dykey *key)
10443+{
10444+ struct au_dykey *tmp, *found;
10445+ struct list_head *head;
10446+ const void *h_op = key->dk_op.dy_hop;
1facf9fc 10447+
4a4d8108
AM
10448+ found = NULL;
10449+ head = &spl->head;
10450+ spin_lock(&spl->spin);
10451+ list_for_each_entry(tmp, head, dk_list)
10452+ if (tmp->dk_op.dy_hop == h_op) {
10453+ kref_get(&tmp->dk_kref);
10454+ found = tmp;
10455+ break;
10456+ }
10457+ if (!found)
10458+ list_add_rcu(&key->dk_list, head);
10459+ spin_unlock(&spl->spin);
1facf9fc 10460+
4a4d8108
AM
10461+ if (!found)
10462+ DyPrSym(key);
10463+ return found;
10464+}
10465+
10466+static void dy_free_rcu(struct rcu_head *rcu)
1facf9fc 10467+{
4a4d8108
AM
10468+ struct au_dykey *key;
10469+
10470+ key = container_of(rcu, struct au_dykey, dk_rcu);
10471+ DyPrSym(key);
10472+ kfree(key);
1facf9fc 10473+}
10474+
4a4d8108
AM
10475+static void dy_free(struct kref *kref)
10476+{
10477+ struct au_dykey *key;
10478+ struct au_splhead *spl;
1facf9fc 10479+
4a4d8108
AM
10480+ key = container_of(kref, struct au_dykey, dk_kref);
10481+ spl = dynop + key->dk_op.dy_type;
10482+ au_spl_del_rcu(&key->dk_list, spl);
10483+ call_rcu(&key->dk_rcu, dy_free_rcu);
10484+}
10485+
10486+void au_dy_put(struct au_dykey *key)
1facf9fc 10487+{
4a4d8108
AM
10488+ kref_put(&key->dk_kref, dy_free);
10489+}
1facf9fc 10490+
4a4d8108
AM
10491+/* ---------------------------------------------------------------------- */
10492+
10493+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
10494+
10495+#ifdef CONFIG_AUFS_DEBUG
10496+#define DyDbgDeclare(cnt) unsigned int cnt = 0
4f0767ce 10497+#define DyDbgInc(cnt) do { cnt++; } while (0)
4a4d8108
AM
10498+#else
10499+#define DyDbgDeclare(cnt) do {} while (0)
10500+#define DyDbgInc(cnt) do {} while (0)
10501+#endif
10502+
10503+#define DySet(func, dst, src, h_op, h_sb) do { \
10504+ DyDbgInc(cnt); \
10505+ if (h_op->func) { \
10506+ if (src.func) \
10507+ dst.func = src.func; \
10508+ else \
10509+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
10510+ } \
10511+} while (0)
10512+
10513+#define DySetForce(func, dst, src) do { \
10514+ AuDebugOn(!src.func); \
10515+ DyDbgInc(cnt); \
10516+ dst.func = src.func; \
10517+} while (0)
10518+
10519+#define DySetAop(func) \
10520+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
10521+#define DySetAopForce(func) \
10522+ DySetForce(func, dyaop->da_op, aufs_aop)
10523+
10524+static void dy_aop(struct au_dykey *key, const void *h_op,
10525+ struct super_block *h_sb __maybe_unused)
10526+{
10527+ struct au_dyaop *dyaop = (void *)key;
10528+ const struct address_space_operations *h_aop = h_op;
10529+ DyDbgDeclare(cnt);
10530+
10531+ AuDbg("%s\n", au_sbtype(h_sb));
10532+
10533+ DySetAop(writepage);
10534+ DySetAopForce(readpage); /* force */
4a4d8108
AM
10535+ DySetAop(writepages);
10536+ DySetAop(set_page_dirty);
10537+ DySetAop(readpages);
10538+ DySetAop(write_begin);
10539+ DySetAop(write_end);
10540+ DySetAop(bmap);
10541+ DySetAop(invalidatepage);
10542+ DySetAop(releasepage);
027c5e7a 10543+ DySetAop(freepage);
4a4d8108
AM
10544+ /* these two will be changed according to an aufs mount option */
10545+ DySetAop(direct_IO);
10546+ DySetAop(get_xip_mem);
10547+ DySetAop(migratepage);
10548+ DySetAop(launder_page);
10549+ DySetAop(is_partially_uptodate);
392086de 10550+ DySetAop(is_dirty_writeback);
4a4d8108 10551+ DySetAop(error_remove_page);
b4510431
AM
10552+ DySetAop(swap_activate);
10553+ DySetAop(swap_deactivate);
4a4d8108
AM
10554+
10555+ DyDbgSize(cnt, *h_aop);
10556+ dyaop->da_get_xip_mem = h_aop->get_xip_mem;
10557+}
10558+
4a4d8108
AM
10559+/* ---------------------------------------------------------------------- */
10560+
10561+static void dy_bug(struct kref *kref)
10562+{
10563+ BUG();
10564+}
10565+
10566+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
10567+{
10568+ struct au_dykey *key, *old;
10569+ struct au_splhead *spl;
b752ccd1 10570+ struct op {
4a4d8108 10571+ unsigned int sz;
b752ccd1
AM
10572+ void (*set)(struct au_dykey *key, const void *h_op,
10573+ struct super_block *h_sb __maybe_unused);
10574+ };
10575+ static const struct op a[] = {
4a4d8108
AM
10576+ [AuDy_AOP] = {
10577+ .sz = sizeof(struct au_dyaop),
b752ccd1 10578+ .set = dy_aop
4a4d8108 10579+ }
b752ccd1
AM
10580+ };
10581+ const struct op *p;
4a4d8108
AM
10582+
10583+ spl = dynop + op->dy_type;
10584+ key = dy_gfind_get(spl, op->dy_hop);
10585+ if (key)
10586+ goto out_add; /* success */
10587+
10588+ p = a + op->dy_type;
10589+ key = kzalloc(p->sz, GFP_NOFS);
10590+ if (unlikely(!key)) {
10591+ key = ERR_PTR(-ENOMEM);
10592+ goto out;
10593+ }
10594+
10595+ key->dk_op.dy_hop = op->dy_hop;
10596+ kref_init(&key->dk_kref);
86dc4139 10597+ p->set(key, op->dy_hop, au_br_sb(br));
4a4d8108
AM
10598+ old = dy_gadd(spl, key);
10599+ if (old) {
10600+ kfree(key);
10601+ key = old;
10602+ }
10603+
10604+out_add:
10605+ old = dy_bradd(br, key);
10606+ if (old)
10607+ /* its ref-count should never be zero here */
10608+ kref_put(&key->dk_kref, dy_bug);
10609+out:
10610+ return key;
10611+}
10612+
10613+/* ---------------------------------------------------------------------- */
10614+/*
10615+ * Aufs prohibits O_DIRECT by defaut even if the branch supports it.
c1595e42 10616+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
4a4d8108
AM
10617+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
10618+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
10619+ * See the aufs manual in detail.
10620+ *
10621+ * To keep this behaviour, aufs has to set NULL to ->get_xip_mem too, and the
10622+ * performance of fadvise() and madvise() may be affected.
10623+ */
10624+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
10625+{
10626+ if (!do_dx) {
10627+ dyaop->da_op.direct_IO = NULL;
10628+ dyaop->da_op.get_xip_mem = NULL;
10629+ } else {
10630+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
10631+ dyaop->da_op.get_xip_mem = aufs_aop.get_xip_mem;
10632+ if (!dyaop->da_get_xip_mem)
10633+ dyaop->da_op.get_xip_mem = NULL;
10634+ }
10635+}
10636+
10637+static struct au_dyaop *dy_aget(struct au_branch *br,
10638+ const struct address_space_operations *h_aop,
10639+ int do_dx)
10640+{
10641+ struct au_dyaop *dyaop;
10642+ struct au_dynop op;
10643+
10644+ op.dy_type = AuDy_AOP;
10645+ op.dy_haop = h_aop;
10646+ dyaop = (void *)dy_get(&op, br);
10647+ if (IS_ERR(dyaop))
10648+ goto out;
10649+ dy_adx(dyaop, do_dx);
10650+
10651+out:
10652+ return dyaop;
10653+}
10654+
10655+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10656+ struct inode *h_inode)
10657+{
10658+ int err, do_dx;
10659+ struct super_block *sb;
10660+ struct au_branch *br;
10661+ struct au_dyaop *dyaop;
10662+
10663+ AuDebugOn(!S_ISREG(h_inode->i_mode));
10664+ IiMustWriteLock(inode);
10665+
10666+ sb = inode->i_sb;
10667+ br = au_sbr(sb, bindex);
10668+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
10669+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
10670+ err = PTR_ERR(dyaop);
10671+ if (IS_ERR(dyaop))
10672+ /* unnecessary to call dy_fput() */
10673+ goto out;
10674+
10675+ err = 0;
10676+ inode->i_mapping->a_ops = &dyaop->da_op;
10677+
10678+out:
10679+ return err;
10680+}
10681+
b752ccd1
AM
10682+/*
10683+ * Is it safe to replace a_ops during the inode/file is in operation?
10684+ * Yes, I hope so.
10685+ */
10686+int au_dy_irefresh(struct inode *inode)
10687+{
10688+ int err;
10689+ aufs_bindex_t bstart;
10690+ struct inode *h_inode;
10691+
10692+ err = 0;
10693+ if (S_ISREG(inode->i_mode)) {
10694+ bstart = au_ibstart(inode);
10695+ h_inode = au_h_iptr(inode, bstart);
10696+ err = au_dy_iaop(inode, bstart, h_inode);
10697+ }
10698+ return err;
10699+}
10700+
4a4d8108
AM
10701+void au_dy_arefresh(int do_dx)
10702+{
10703+ struct au_splhead *spl;
10704+ struct list_head *head;
10705+ struct au_dykey *key;
10706+
10707+ spl = dynop + AuDy_AOP;
10708+ head = &spl->head;
10709+ spin_lock(&spl->spin);
10710+ list_for_each_entry(key, head, dk_list)
10711+ dy_adx((void *)key, do_dx);
10712+ spin_unlock(&spl->spin);
10713+}
10714+
4a4d8108
AM
10715+/* ---------------------------------------------------------------------- */
10716+
10717+void __init au_dy_init(void)
10718+{
10719+ int i;
10720+
10721+ /* make sure that 'struct au_dykey *' can be any type */
10722+ BUILD_BUG_ON(offsetof(struct au_dyaop, da_key));
4a4d8108
AM
10723+
10724+ for (i = 0; i < AuDyLast; i++)
10725+ au_spl_init(dynop + i);
10726+}
10727+
10728+void au_dy_fin(void)
10729+{
10730+ int i;
10731+
10732+ for (i = 0; i < AuDyLast; i++)
10733+ WARN_ON(!list_empty(&dynop[i].head));
10734+}
7f207e10
AM
10735diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
10736--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
10737+++ linux/fs/aufs/dynop.h 2016-02-28 11:27:01.277245613 +0100
10738@@ -0,0 +1,76 @@
4a4d8108 10739+/*
7f2ca4b1 10740+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
10741+ *
10742+ * This program, aufs is free software; you can redistribute it and/or modify
10743+ * it under the terms of the GNU General Public License as published by
10744+ * the Free Software Foundation; either version 2 of the License, or
10745+ * (at your option) any later version.
10746+ *
10747+ * This program is distributed in the hope that it will be useful,
10748+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10749+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10750+ * GNU General Public License for more details.
10751+ *
10752+ * You should have received a copy of the GNU General Public License
523b37e3 10753+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10754+ */
10755+
10756+/*
10757+ * dynamically customizable operations (for regular files only)
10758+ */
10759+
10760+#ifndef __AUFS_DYNOP_H__
10761+#define __AUFS_DYNOP_H__
10762+
10763+#ifdef __KERNEL__
10764+
7f2ca4b1
JR
10765+#include <linux/fs.h>
10766+#include <linux/kref.h>
4a4d8108 10767+
2cbb1c4b 10768+enum {AuDy_AOP, AuDyLast};
4a4d8108
AM
10769+
10770+struct au_dynop {
10771+ int dy_type;
10772+ union {
10773+ const void *dy_hop;
10774+ const struct address_space_operations *dy_haop;
4a4d8108
AM
10775+ };
10776+};
10777+
10778+struct au_dykey {
10779+ union {
10780+ struct list_head dk_list;
10781+ struct rcu_head dk_rcu;
10782+ };
10783+ struct au_dynop dk_op;
10784+
10785+ /*
10786+ * during I am in the branch local array, kref is gotten. when the
10787+ * branch is removed, kref is put.
10788+ */
10789+ struct kref dk_kref;
10790+};
10791+
10792+/* stop unioning since their sizes are very different from each other */
10793+struct au_dyaop {
10794+ struct au_dykey da_key;
10795+ struct address_space_operations da_op; /* not const */
10796+ int (*da_get_xip_mem)(struct address_space *, pgoff_t, int,
10797+ void **, unsigned long *);
10798+};
10799+
4a4d8108
AM
10800+/* ---------------------------------------------------------------------- */
10801+
10802+/* dynop.c */
10803+struct au_branch;
10804+void au_dy_put(struct au_dykey *key);
10805+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
10806+ struct inode *h_inode);
b752ccd1 10807+int au_dy_irefresh(struct inode *inode);
4a4d8108 10808+void au_dy_arefresh(int do_dio);
4a4d8108
AM
10809+
10810+void __init au_dy_init(void);
10811+void au_dy_fin(void);
10812+
4a4d8108
AM
10813+#endif /* __KERNEL__ */
10814+#endif /* __AUFS_DYNOP_H__ */
7f207e10
AM
10815diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
10816--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 10817+++ linux/fs/aufs/export.c 2016-02-28 12:40:45.724388517 +0100
523b37e3 10818@@ -0,0 +1,831 @@
4a4d8108 10819+/*
7f2ca4b1 10820+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
10821+ *
10822+ * This program, aufs is free software; you can redistribute it and/or modify
10823+ * it under the terms of the GNU General Public License as published by
10824+ * the Free Software Foundation; either version 2 of the License, or
10825+ * (at your option) any later version.
10826+ *
10827+ * This program is distributed in the hope that it will be useful,
10828+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10829+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10830+ * GNU General Public License for more details.
10831+ *
10832+ * You should have received a copy of the GNU General Public License
523b37e3 10833+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
10834+ */
10835+
10836+/*
10837+ * export via nfs
10838+ */
10839+
10840+#include <linux/exportfs.h>
7eafdf33 10841+#include <linux/fs_struct.h>
4a4d8108
AM
10842+#include <linux/namei.h>
10843+#include <linux/nsproxy.h>
10844+#include <linux/random.h>
10845+#include <linux/writeback.h>
7eafdf33 10846+#include "../fs/mount.h"
4a4d8108
AM
10847+#include "aufs.h"
10848+
10849+union conv {
10850+#ifdef CONFIG_AUFS_INO_T_64
10851+ __u32 a[2];
10852+#else
10853+ __u32 a[1];
10854+#endif
10855+ ino_t ino;
10856+};
10857+
10858+static ino_t decode_ino(__u32 *a)
10859+{
10860+ union conv u;
10861+
10862+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
10863+ u.a[0] = a[0];
10864+#ifdef CONFIG_AUFS_INO_T_64
10865+ u.a[1] = a[1];
10866+#endif
10867+ return u.ino;
10868+}
10869+
10870+static void encode_ino(__u32 *a, ino_t ino)
10871+{
10872+ union conv u;
10873+
10874+ u.ino = ino;
10875+ a[0] = u.a[0];
10876+#ifdef CONFIG_AUFS_INO_T_64
10877+ a[1] = u.a[1];
10878+#endif
10879+}
10880+
10881+/* NFS file handle */
10882+enum {
10883+ Fh_br_id,
10884+ Fh_sigen,
10885+#ifdef CONFIG_AUFS_INO_T_64
10886+ /* support 64bit inode number */
10887+ Fh_ino1,
10888+ Fh_ino2,
10889+ Fh_dir_ino1,
10890+ Fh_dir_ino2,
10891+#else
10892+ Fh_ino1,
10893+ Fh_dir_ino1,
10894+#endif
10895+ Fh_igen,
10896+ Fh_h_type,
10897+ Fh_tail,
10898+
10899+ Fh_ino = Fh_ino1,
10900+ Fh_dir_ino = Fh_dir_ino1
10901+};
10902+
10903+static int au_test_anon(struct dentry *dentry)
10904+{
027c5e7a 10905+ /* note: read d_flags without d_lock */
4a4d8108
AM
10906+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
10907+}
10908+
a2a7ad62
AM
10909+int au_test_nfsd(void)
10910+{
10911+ int ret;
10912+ struct task_struct *tsk = current;
10913+ char comm[sizeof(tsk->comm)];
10914+
10915+ ret = 0;
10916+ if (tsk->flags & PF_KTHREAD) {
10917+ get_task_comm(comm, tsk);
10918+ ret = !strcmp(comm, "nfsd");
10919+ }
10920+
10921+ return ret;
10922+}
10923+
4a4d8108
AM
10924+/* ---------------------------------------------------------------------- */
10925+/* inode generation external table */
10926+
b752ccd1 10927+void au_xigen_inc(struct inode *inode)
4a4d8108 10928+{
4a4d8108
AM
10929+ loff_t pos;
10930+ ssize_t sz;
10931+ __u32 igen;
10932+ struct super_block *sb;
10933+ struct au_sbinfo *sbinfo;
10934+
4a4d8108 10935+ sb = inode->i_sb;
b752ccd1 10936+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
1facf9fc 10937+
b752ccd1 10938+ sbinfo = au_sbi(sb);
1facf9fc 10939+ pos = inode->i_ino;
10940+ pos *= sizeof(igen);
10941+ igen = inode->i_generation + 1;
1facf9fc 10942+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
10943+ sizeof(igen), &pos);
10944+ if (sz == sizeof(igen))
b752ccd1 10945+ return; /* success */
1facf9fc 10946+
b752ccd1 10947+ if (unlikely(sz >= 0))
1facf9fc 10948+ AuIOErr("xigen error (%zd)\n", sz);
1facf9fc 10949+}
10950+
10951+int au_xigen_new(struct inode *inode)
10952+{
10953+ int err;
10954+ loff_t pos;
10955+ ssize_t sz;
10956+ struct super_block *sb;
10957+ struct au_sbinfo *sbinfo;
10958+ struct file *file;
10959+
10960+ err = 0;
10961+ /* todo: dirty, at mount time */
10962+ if (inode->i_ino == AUFS_ROOT_INO)
10963+ goto out;
10964+ sb = inode->i_sb;
dece6358 10965+ SiMustAnyLock(sb);
1facf9fc 10966+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
10967+ goto out;
10968+
10969+ err = -EFBIG;
10970+ pos = inode->i_ino;
10971+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
10972+ AuIOErr1("too large i%lld\n", pos);
10973+ goto out;
10974+ }
10975+ pos *= sizeof(inode->i_generation);
10976+
10977+ err = 0;
10978+ sbinfo = au_sbi(sb);
10979+ file = sbinfo->si_xigen;
10980+ BUG_ON(!file);
10981+
c06a8ce3 10982+ if (vfsub_f_size_read(file)
1facf9fc 10983+ < pos + sizeof(inode->i_generation)) {
10984+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
10985+ sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
10986+ sizeof(inode->i_generation), &pos);
10987+ } else
10988+ sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
10989+ sizeof(inode->i_generation), &pos);
10990+ if (sz == sizeof(inode->i_generation))
10991+ goto out; /* success */
10992+
10993+ err = sz;
10994+ if (unlikely(sz >= 0)) {
10995+ err = -EIO;
10996+ AuIOErr("xigen error (%zd)\n", sz);
10997+ }
10998+
4f0767ce 10999+out:
1facf9fc 11000+ return err;
11001+}
11002+
11003+int au_xigen_set(struct super_block *sb, struct file *base)
11004+{
11005+ int err;
11006+ struct au_sbinfo *sbinfo;
11007+ struct file *file;
11008+
dece6358
AM
11009+ SiMustWriteLock(sb);
11010+
1facf9fc 11011+ sbinfo = au_sbi(sb);
11012+ file = au_xino_create2(base, sbinfo->si_xigen);
11013+ err = PTR_ERR(file);
11014+ if (IS_ERR(file))
11015+ goto out;
11016+ err = 0;
11017+ if (sbinfo->si_xigen)
11018+ fput(sbinfo->si_xigen);
11019+ sbinfo->si_xigen = file;
11020+
4f0767ce 11021+out:
1facf9fc 11022+ return err;
11023+}
11024+
11025+void au_xigen_clr(struct super_block *sb)
11026+{
11027+ struct au_sbinfo *sbinfo;
11028+
dece6358
AM
11029+ SiMustWriteLock(sb);
11030+
1facf9fc 11031+ sbinfo = au_sbi(sb);
11032+ if (sbinfo->si_xigen) {
11033+ fput(sbinfo->si_xigen);
11034+ sbinfo->si_xigen = NULL;
11035+ }
11036+}
11037+
11038+/* ---------------------------------------------------------------------- */
11039+
11040+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
11041+ ino_t dir_ino)
11042+{
11043+ struct dentry *dentry, *d;
11044+ struct inode *inode;
11045+ unsigned int sigen;
11046+
11047+ dentry = NULL;
11048+ inode = ilookup(sb, ino);
11049+ if (!inode)
11050+ goto out;
11051+
11052+ dentry = ERR_PTR(-ESTALE);
11053+ sigen = au_sigen(sb);
11054+ if (unlikely(is_bad_inode(inode)
11055+ || IS_DEADDIR(inode)
537831f9 11056+ || sigen != au_iigen(inode, NULL)))
1facf9fc 11057+ goto out_iput;
11058+
11059+ dentry = NULL;
11060+ if (!dir_ino || S_ISDIR(inode->i_mode))
11061+ dentry = d_find_alias(inode);
11062+ else {
027c5e7a 11063+ spin_lock(&inode->i_lock);
c1595e42 11064+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 11065+ spin_lock(&d->d_lock);
1facf9fc 11066+ if (!au_test_anon(d)
11067+ && d->d_parent->d_inode->i_ino == dir_ino) {
027c5e7a
AM
11068+ dentry = dget_dlock(d);
11069+ spin_unlock(&d->d_lock);
1facf9fc 11070+ break;
11071+ }
027c5e7a
AM
11072+ spin_unlock(&d->d_lock);
11073+ }
11074+ spin_unlock(&inode->i_lock);
1facf9fc 11075+ }
027c5e7a 11076+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
2cbb1c4b 11077+ /* need to refresh */
1facf9fc 11078+ dput(dentry);
2cbb1c4b 11079+ dentry = NULL;
1facf9fc 11080+ }
11081+
4f0767ce 11082+out_iput:
1facf9fc 11083+ iput(inode);
4f0767ce 11084+out:
2cbb1c4b 11085+ AuTraceErrPtr(dentry);
1facf9fc 11086+ return dentry;
11087+}
11088+
11089+/* ---------------------------------------------------------------------- */
11090+
11091+/* todo: dirty? */
11092+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
4a4d8108
AM
11093+
11094+struct au_compare_mnt_args {
11095+ /* input */
11096+ struct super_block *sb;
11097+
11098+ /* output */
11099+ struct vfsmount *mnt;
11100+};
11101+
11102+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
11103+{
11104+ struct au_compare_mnt_args *a = arg;
11105+
11106+ if (mnt->mnt_sb != a->sb)
11107+ return 0;
11108+ a->mnt = mntget(mnt);
11109+ return 1;
11110+}
11111+
1facf9fc 11112+static struct vfsmount *au_mnt_get(struct super_block *sb)
11113+{
4a4d8108 11114+ int err;
7eafdf33 11115+ struct path root;
4a4d8108
AM
11116+ struct au_compare_mnt_args args = {
11117+ .sb = sb
11118+ };
1facf9fc 11119+
7eafdf33 11120+ get_fs_root(current->fs, &root);
523b37e3 11121+ rcu_read_lock();
7eafdf33 11122+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
523b37e3 11123+ rcu_read_unlock();
7eafdf33 11124+ path_put(&root);
4a4d8108
AM
11125+ AuDebugOn(!err);
11126+ AuDebugOn(!args.mnt);
11127+ return args.mnt;
1facf9fc 11128+}
11129+
11130+struct au_nfsd_si_lock {
4a4d8108 11131+ unsigned int sigen;
027c5e7a 11132+ aufs_bindex_t bindex, br_id;
1facf9fc 11133+ unsigned char force_lock;
11134+};
11135+
027c5e7a
AM
11136+static int si_nfsd_read_lock(struct super_block *sb,
11137+ struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11138+{
027c5e7a 11139+ int err;
1facf9fc 11140+ aufs_bindex_t bindex;
11141+
11142+ si_read_lock(sb, AuLock_FLUSH);
11143+
11144+ /* branch id may be wrapped around */
027c5e7a 11145+ err = 0;
1facf9fc 11146+ bindex = au_br_index(sb, nsi_lock->br_id);
11147+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
11148+ goto out; /* success */
11149+
027c5e7a
AM
11150+ err = -ESTALE;
11151+ bindex = -1;
1facf9fc 11152+ if (!nsi_lock->force_lock)
11153+ si_read_unlock(sb);
1facf9fc 11154+
4f0767ce 11155+out:
027c5e7a
AM
11156+ nsi_lock->bindex = bindex;
11157+ return err;
1facf9fc 11158+}
11159+
11160+struct find_name_by_ino {
392086de 11161+ struct dir_context ctx;
1facf9fc 11162+ int called, found;
11163+ ino_t ino;
11164+ char *name;
11165+ int namelen;
11166+};
11167+
11168+static int
392086de
AM
11169+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
11170+ loff_t offset, u64 ino, unsigned int d_type)
1facf9fc 11171+{
392086de
AM
11172+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
11173+ ctx);
1facf9fc 11174+
11175+ a->called++;
11176+ if (a->ino != ino)
11177+ return 0;
11178+
11179+ memcpy(a->name, name, namelen);
11180+ a->namelen = namelen;
11181+ a->found = 1;
11182+ return 1;
11183+}
11184+
11185+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
11186+ struct au_nfsd_si_lock *nsi_lock)
11187+{
11188+ struct dentry *dentry, *parent;
11189+ struct file *file;
11190+ struct inode *dir;
392086de
AM
11191+ struct find_name_by_ino arg = {
11192+ .ctx = {
11193+ .actor = au_diractor(find_name_by_ino)
11194+ }
11195+ };
1facf9fc 11196+ int err;
11197+
11198+ parent = path->dentry;
11199+ if (nsi_lock)
11200+ si_read_unlock(parent->d_sb);
4a4d8108 11201+ file = vfsub_dentry_open(path, au_dir_roflags);
1facf9fc 11202+ dentry = (void *)file;
11203+ if (IS_ERR(file))
11204+ goto out;
11205+
11206+ dentry = ERR_PTR(-ENOMEM);
537831f9 11207+ arg.name = (void *)__get_free_page(GFP_NOFS);
1facf9fc 11208+ if (unlikely(!arg.name))
11209+ goto out_file;
11210+ arg.ino = ino;
11211+ arg.found = 0;
11212+ do {
11213+ arg.called = 0;
11214+ /* smp_mb(); */
392086de 11215+ err = vfsub_iterate_dir(file, &arg.ctx);
1facf9fc 11216+ } while (!err && !arg.found && arg.called);
11217+ dentry = ERR_PTR(err);
11218+ if (unlikely(err))
11219+ goto out_name;
1716fcea
AM
11220+ /* instead of ENOENT */
11221+ dentry = ERR_PTR(-ESTALE);
1facf9fc 11222+ if (!arg.found)
11223+ goto out_name;
11224+
b4510431 11225+ /* do not call vfsub_lkup_one() */
1facf9fc 11226+ dir = parent->d_inode;
11227+ mutex_lock(&dir->i_mutex);
11228+ dentry = vfsub_lookup_one_len(arg.name, parent, arg.namelen);
11229+ mutex_unlock(&dir->i_mutex);
11230+ AuTraceErrPtr(dentry);
11231+ if (IS_ERR(dentry))
11232+ goto out_name;
11233+ AuDebugOn(au_test_anon(dentry));
11234+ if (unlikely(!dentry->d_inode)) {
11235+ dput(dentry);
11236+ dentry = ERR_PTR(-ENOENT);
11237+ }
11238+
4f0767ce 11239+out_name:
537831f9 11240+ free_page((unsigned long)arg.name);
4f0767ce 11241+out_file:
1facf9fc 11242+ fput(file);
4f0767ce 11243+out:
1facf9fc 11244+ if (unlikely(nsi_lock
11245+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
11246+ if (!IS_ERR(dentry)) {
11247+ dput(dentry);
11248+ dentry = ERR_PTR(-ESTALE);
11249+ }
11250+ AuTraceErrPtr(dentry);
11251+ return dentry;
11252+}
11253+
11254+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
11255+ ino_t dir_ino,
11256+ struct au_nfsd_si_lock *nsi_lock)
11257+{
11258+ struct dentry *dentry;
11259+ struct path path;
11260+
11261+ if (dir_ino != AUFS_ROOT_INO) {
11262+ path.dentry = decode_by_ino(sb, dir_ino, 0);
11263+ dentry = path.dentry;
11264+ if (!path.dentry || IS_ERR(path.dentry))
11265+ goto out;
11266+ AuDebugOn(au_test_anon(path.dentry));
11267+ } else
11268+ path.dentry = dget(sb->s_root);
11269+
11270+ path.mnt = au_mnt_get(sb);
11271+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
11272+ path_put(&path);
11273+
4f0767ce 11274+out:
1facf9fc 11275+ AuTraceErrPtr(dentry);
11276+ return dentry;
11277+}
11278+
11279+/* ---------------------------------------------------------------------- */
11280+
11281+static int h_acceptable(void *expv, struct dentry *dentry)
11282+{
11283+ return 1;
11284+}
11285+
11286+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
11287+ char *buf, int len, struct super_block *sb)
11288+{
11289+ char *p;
11290+ int n;
11291+ struct path path;
11292+
11293+ p = d_path(h_rootpath, buf, len);
11294+ if (IS_ERR(p))
11295+ goto out;
11296+ n = strlen(p);
11297+
11298+ path.mnt = h_rootpath->mnt;
11299+ path.dentry = h_parent;
11300+ p = d_path(&path, buf, len);
11301+ if (IS_ERR(p))
11302+ goto out;
11303+ if (n != 1)
11304+ p += n;
11305+
11306+ path.mnt = au_mnt_get(sb);
11307+ path.dentry = sb->s_root;
11308+ p = d_path(&path, buf, len - strlen(p));
11309+ mntput(path.mnt);
11310+ if (IS_ERR(p))
11311+ goto out;
11312+ if (n != 1)
11313+ p[strlen(p)] = '/';
11314+
4f0767ce 11315+out:
1facf9fc 11316+ AuTraceErrPtr(p);
11317+ return p;
11318+}
11319+
11320+static
027c5e7a
AM
11321+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
11322+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
1facf9fc 11323+{
11324+ struct dentry *dentry, *h_parent, *root;
11325+ struct super_block *h_sb;
11326+ char *pathname, *p;
11327+ struct vfsmount *h_mnt;
11328+ struct au_branch *br;
11329+ int err;
11330+ struct path path;
11331+
027c5e7a 11332+ br = au_sbr(sb, nsi_lock->bindex);
86dc4139 11333+ h_mnt = au_br_mnt(br);
1facf9fc 11334+ h_sb = h_mnt->mnt_sb;
11335+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
11336+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
11337+ fh_len - Fh_tail, fh[Fh_h_type],
11338+ h_acceptable, /*context*/NULL);
11339+ dentry = h_parent;
11340+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
11341+ AuWarn1("%s decode_fh failed, %ld\n",
11342+ au_sbtype(h_sb), PTR_ERR(h_parent));
11343+ goto out;
11344+ }
11345+ dentry = NULL;
11346+ if (unlikely(au_test_anon(h_parent))) {
11347+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
11348+ au_sbtype(h_sb));
11349+ goto out_h_parent;
11350+ }
11351+
11352+ dentry = ERR_PTR(-ENOMEM);
11353+ pathname = (void *)__get_free_page(GFP_NOFS);
11354+ if (unlikely(!pathname))
11355+ goto out_h_parent;
11356+
11357+ root = sb->s_root;
11358+ path.mnt = h_mnt;
11359+ di_read_lock_parent(root, !AuLock_IR);
027c5e7a 11360+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
1facf9fc 11361+ di_read_unlock(root, !AuLock_IR);
11362+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
11363+ dentry = (void *)p;
11364+ if (IS_ERR(p))
11365+ goto out_pathname;
11366+
11367+ si_read_unlock(sb);
11368+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
11369+ dentry = ERR_PTR(err);
11370+ if (unlikely(err))
11371+ goto out_relock;
11372+
11373+ dentry = ERR_PTR(-ENOENT);
11374+ AuDebugOn(au_test_anon(path.dentry));
11375+ if (unlikely(!path.dentry->d_inode))
11376+ goto out_path;
11377+
11378+ if (ino != path.dentry->d_inode->i_ino)
11379+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
11380+ else
11381+ dentry = dget(path.dentry);
11382+
4f0767ce 11383+out_path:
1facf9fc 11384+ path_put(&path);
4f0767ce 11385+out_relock:
1facf9fc 11386+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
11387+ if (!IS_ERR(dentry)) {
11388+ dput(dentry);
11389+ dentry = ERR_PTR(-ESTALE);
11390+ }
4f0767ce 11391+out_pathname:
1facf9fc 11392+ free_page((unsigned long)pathname);
4f0767ce 11393+out_h_parent:
1facf9fc 11394+ dput(h_parent);
4f0767ce 11395+out:
1facf9fc 11396+ AuTraceErrPtr(dentry);
11397+ return dentry;
11398+}
11399+
11400+/* ---------------------------------------------------------------------- */
11401+
11402+static struct dentry *
11403+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
11404+ int fh_type)
11405+{
11406+ struct dentry *dentry;
11407+ __u32 *fh = fid->raw;
027c5e7a 11408+ struct au_branch *br;
1facf9fc 11409+ ino_t ino, dir_ino;
1facf9fc 11410+ struct au_nfsd_si_lock nsi_lock = {
1facf9fc 11411+ .force_lock = 0
11412+ };
11413+
1facf9fc 11414+ dentry = ERR_PTR(-ESTALE);
4a4d8108
AM
11415+ /* it should never happen, but the file handle is unreliable */
11416+ if (unlikely(fh_len < Fh_tail))
11417+ goto out;
11418+ nsi_lock.sigen = fh[Fh_sigen];
11419+ nsi_lock.br_id = fh[Fh_br_id];
11420+
1facf9fc 11421+ /* branch id may be wrapped around */
027c5e7a
AM
11422+ br = NULL;
11423+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
1facf9fc 11424+ goto out;
11425+ nsi_lock.force_lock = 1;
11426+
11427+ /* is this inode still cached? */
11428+ ino = decode_ino(fh + Fh_ino);
4a4d8108
AM
11429+ /* it should never happen */
11430+ if (unlikely(ino == AUFS_ROOT_INO))
7f2ca4b1 11431+ goto out_unlock;
4a4d8108 11432+
1facf9fc 11433+ dir_ino = decode_ino(fh + Fh_dir_ino);
11434+ dentry = decode_by_ino(sb, ino, dir_ino);
11435+ if (IS_ERR(dentry))
11436+ goto out_unlock;
11437+ if (dentry)
11438+ goto accept;
11439+
11440+ /* is the parent dir cached? */
027c5e7a
AM
11441+ br = au_sbr(sb, nsi_lock.bindex);
11442+ atomic_inc(&br->br_count);
1facf9fc 11443+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
11444+ if (IS_ERR(dentry))
11445+ goto out_unlock;
11446+ if (dentry)
11447+ goto accept;
11448+
11449+ /* lookup path */
027c5e7a 11450+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
1facf9fc 11451+ if (IS_ERR(dentry))
11452+ goto out_unlock;
11453+ if (unlikely(!dentry))
11454+ /* todo?: make it ESTALE */
11455+ goto out_unlock;
11456+
4f0767ce 11457+accept:
027c5e7a
AM
11458+ if (!au_digen_test(dentry, au_sigen(sb))
11459+ && dentry->d_inode->i_generation == fh[Fh_igen])
1facf9fc 11460+ goto out_unlock; /* success */
11461+
11462+ dput(dentry);
11463+ dentry = ERR_PTR(-ESTALE);
4f0767ce 11464+out_unlock:
027c5e7a
AM
11465+ if (br)
11466+ atomic_dec(&br->br_count);
1facf9fc 11467+ si_read_unlock(sb);
4f0767ce 11468+out:
1facf9fc 11469+ AuTraceErrPtr(dentry);
11470+ return dentry;
11471+}
11472+
11473+#if 0 /* reserved for future use */
11474+/* support subtreecheck option */
11475+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
11476+ int fh_len, int fh_type)
11477+{
11478+ struct dentry *parent;
11479+ __u32 *fh = fid->raw;
11480+ ino_t dir_ino;
11481+
11482+ dir_ino = decode_ino(fh + Fh_dir_ino);
11483+ parent = decode_by_ino(sb, dir_ino, 0);
11484+ if (IS_ERR(parent))
11485+ goto out;
11486+ if (!parent)
11487+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
11488+ dir_ino, fh, fh_len);
11489+
4f0767ce 11490+out:
1facf9fc 11491+ AuTraceErrPtr(parent);
11492+ return parent;
11493+}
11494+#endif
11495+
11496+/* ---------------------------------------------------------------------- */
11497+
0c3ec466
AM
11498+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
11499+ struct inode *dir)
1facf9fc 11500+{
11501+ int err;
0c3ec466 11502+ aufs_bindex_t bindex;
1facf9fc 11503+ struct super_block *sb, *h_sb;
0c3ec466
AM
11504+ struct dentry *dentry, *parent, *h_parent;
11505+ struct inode *h_dir;
1facf9fc 11506+ struct au_branch *br;
11507+
1facf9fc 11508+ err = -ENOSPC;
11509+ if (unlikely(*max_len <= Fh_tail)) {
11510+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
11511+ goto out;
11512+ }
11513+
11514+ err = FILEID_ROOT;
0c3ec466
AM
11515+ if (inode->i_ino == AUFS_ROOT_INO) {
11516+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
1facf9fc 11517+ goto out;
11518+ }
11519+
1facf9fc 11520+ h_parent = NULL;
0c3ec466
AM
11521+ sb = inode->i_sb;
11522+ err = si_read_lock(sb, AuLock_FLUSH);
027c5e7a
AM
11523+ if (unlikely(err))
11524+ goto out;
11525+
1facf9fc 11526+#ifdef CONFIG_AUFS_DEBUG
11527+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
11528+ AuWarn1("NFS-exporting requires xino\n");
11529+#endif
027c5e7a 11530+ err = -EIO;
0c3ec466
AM
11531+ parent = NULL;
11532+ ii_read_lock_child(inode);
11533+ bindex = au_ibstart(inode);
11534+ if (!dir) {
c1595e42 11535+ dentry = d_find_any_alias(inode);
0c3ec466
AM
11536+ if (unlikely(!dentry))
11537+ goto out_unlock;
11538+ AuDebugOn(au_test_anon(dentry));
11539+ parent = dget_parent(dentry);
11540+ dput(dentry);
11541+ if (unlikely(!parent))
11542+ goto out_unlock;
11543+ dir = parent->d_inode;
1facf9fc 11544+ }
0c3ec466
AM
11545+
11546+ ii_read_lock_parent(dir);
11547+ h_dir = au_h_iptr(dir, bindex);
11548+ ii_read_unlock(dir);
11549+ if (unlikely(!h_dir))
11550+ goto out_parent;
c1595e42 11551+ h_parent = d_find_any_alias(h_dir);
1facf9fc 11552+ if (unlikely(!h_parent))
0c3ec466 11553+ goto out_hparent;
1facf9fc 11554+
11555+ err = -EPERM;
11556+ br = au_sbr(sb, bindex);
86dc4139 11557+ h_sb = au_br_sb(br);
1facf9fc 11558+ if (unlikely(!h_sb->s_export_op)) {
11559+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
0c3ec466 11560+ goto out_hparent;
1facf9fc 11561+ }
11562+
11563+ fh[Fh_br_id] = br->br_id;
11564+ fh[Fh_sigen] = au_sigen(sb);
11565+ encode_ino(fh + Fh_ino, inode->i_ino);
0c3ec466 11566+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
1facf9fc 11567+ fh[Fh_igen] = inode->i_generation;
11568+
11569+ *max_len -= Fh_tail;
11570+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
11571+ max_len,
11572+ /*connectable or subtreecheck*/0);
11573+ err = fh[Fh_h_type];
11574+ *max_len += Fh_tail;
11575+ /* todo: macros? */
1716fcea 11576+ if (err != FILEID_INVALID)
1facf9fc 11577+ err = 99;
11578+ else
11579+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
11580+
0c3ec466 11581+out_hparent:
1facf9fc 11582+ dput(h_parent);
0c3ec466 11583+out_parent:
1facf9fc 11584+ dput(parent);
0c3ec466
AM
11585+out_unlock:
11586+ ii_read_unlock(inode);
11587+ si_read_unlock(sb);
4f0767ce 11588+out:
1facf9fc 11589+ if (unlikely(err < 0))
1716fcea 11590+ err = FILEID_INVALID;
1facf9fc 11591+ return err;
11592+}
11593+
11594+/* ---------------------------------------------------------------------- */
11595+
4a4d8108
AM
11596+static int aufs_commit_metadata(struct inode *inode)
11597+{
11598+ int err;
11599+ aufs_bindex_t bindex;
11600+ struct super_block *sb;
11601+ struct inode *h_inode;
11602+ int (*f)(struct inode *inode);
11603+
11604+ sb = inode->i_sb;
e49829fe 11605+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108
AM
11606+ ii_write_lock_child(inode);
11607+ bindex = au_ibstart(inode);
11608+ AuDebugOn(bindex < 0);
11609+ h_inode = au_h_iptr(inode, bindex);
11610+
11611+ f = h_inode->i_sb->s_export_op->commit_metadata;
11612+ if (f)
11613+ err = f(h_inode);
11614+ else {
11615+ struct writeback_control wbc = {
11616+ .sync_mode = WB_SYNC_ALL,
11617+ .nr_to_write = 0 /* metadata only */
11618+ };
11619+
11620+ err = sync_inode(h_inode, &wbc);
11621+ }
11622+
11623+ au_cpup_attr_timesizes(inode);
11624+ ii_write_unlock(inode);
11625+ si_read_unlock(sb);
11626+ return err;
11627+}
11628+
11629+/* ---------------------------------------------------------------------- */
11630+
1facf9fc 11631+static struct export_operations aufs_export_op = {
4a4d8108 11632+ .fh_to_dentry = aufs_fh_to_dentry,
1facf9fc 11633+ /* .fh_to_parent = aufs_fh_to_parent, */
4a4d8108
AM
11634+ .encode_fh = aufs_encode_fh,
11635+ .commit_metadata = aufs_commit_metadata
1facf9fc 11636+};
11637+
11638+void au_export_init(struct super_block *sb)
11639+{
11640+ struct au_sbinfo *sbinfo;
11641+ __u32 u;
11642+
11643+ sb->s_export_op = &aufs_export_op;
11644+ sbinfo = au_sbi(sb);
11645+ sbinfo->si_xigen = NULL;
11646+ get_random_bytes(&u, sizeof(u));
11647+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
11648+ atomic_set(&sbinfo->si_xigen_next, u);
11649+}
076b876e
AM
11650diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
11651--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 11652+++ linux/fs/aufs/fhsm.c 2016-02-28 11:27:01.280579017 +0100
c1595e42 11653@@ -0,0 +1,426 @@
076b876e 11654+/*
7f2ca4b1 11655+ * Copyright (C) 2011-2016 Junjiro R. Okajima
076b876e
AM
11656+ *
11657+ * This program, aufs is free software; you can redistribute it and/or modify
11658+ * it under the terms of the GNU General Public License as published by
11659+ * the Free Software Foundation; either version 2 of the License, or
11660+ * (at your option) any later version.
11661+ *
11662+ * This program is distributed in the hope that it will be useful,
11663+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
11664+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11665+ * GNU General Public License for more details.
11666+ *
11667+ * You should have received a copy of the GNU General Public License
11668+ * along with this program; if not, write to the Free Software
11669+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
11670+ */
11671+
11672+/*
11673+ * File-based Hierarchy Storage Management
11674+ */
11675+
11676+#include <linux/anon_inodes.h>
11677+#include <linux/poll.h>
11678+#include <linux/seq_file.h>
11679+#include <linux/statfs.h>
11680+#include "aufs.h"
11681+
c1595e42
JR
11682+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
11683+{
11684+ struct au_sbinfo *sbinfo;
11685+ struct au_fhsm *fhsm;
11686+
11687+ SiMustAnyLock(sb);
11688+
11689+ sbinfo = au_sbi(sb);
11690+ fhsm = &sbinfo->si_fhsm;
11691+ AuDebugOn(!fhsm);
11692+ return fhsm->fhsm_bottom;
11693+}
11694+
11695+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
11696+{
11697+ struct au_sbinfo *sbinfo;
11698+ struct au_fhsm *fhsm;
11699+
11700+ SiMustWriteLock(sb);
11701+
11702+ sbinfo = au_sbi(sb);
11703+ fhsm = &sbinfo->si_fhsm;
11704+ AuDebugOn(!fhsm);
11705+ fhsm->fhsm_bottom = bindex;
11706+}
11707+
11708+/* ---------------------------------------------------------------------- */
11709+
076b876e
AM
11710+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
11711+{
11712+ struct au_br_fhsm *bf;
11713+
11714+ bf = br->br_fhsm;
11715+ MtxMustLock(&bf->bf_lock);
11716+
11717+ return !bf->bf_readable
11718+ || time_after(jiffies,
11719+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
11720+}
11721+
11722+/* ---------------------------------------------------------------------- */
11723+
11724+static void au_fhsm_notify(struct super_block *sb, int val)
11725+{
11726+ struct au_sbinfo *sbinfo;
11727+ struct au_fhsm *fhsm;
11728+
11729+ SiMustAnyLock(sb);
11730+
11731+ sbinfo = au_sbi(sb);
11732+ fhsm = &sbinfo->si_fhsm;
11733+ if (au_fhsm_pid(fhsm)
11734+ && atomic_read(&fhsm->fhsm_readable) != -1) {
11735+ atomic_set(&fhsm->fhsm_readable, val);
11736+ if (val)
11737+ wake_up(&fhsm->fhsm_wqh);
11738+ }
11739+}
11740+
11741+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
11742+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
11743+{
11744+ int err;
11745+ struct au_branch *br;
11746+ struct au_br_fhsm *bf;
11747+
11748+ br = au_sbr(sb, bindex);
11749+ AuDebugOn(au_br_rdonly(br));
11750+ bf = br->br_fhsm;
11751+ AuDebugOn(!bf);
11752+
11753+ if (do_lock)
11754+ mutex_lock(&bf->bf_lock);
11755+ else
11756+ MtxMustLock(&bf->bf_lock);
11757+
11758+ /* sb->s_root for NFS is unreliable */
11759+ err = au_br_stfs(br, &bf->bf_stfs);
11760+ if (unlikely(err)) {
11761+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
11762+ goto out;
11763+ }
11764+
11765+ bf->bf_jiffy = jiffies;
11766+ bf->bf_readable = 1;
11767+ if (do_notify)
11768+ au_fhsm_notify(sb, /*val*/1);
11769+ if (rstfs)
11770+ *rstfs = bf->bf_stfs;
11771+
11772+out:
11773+ if (do_lock)
11774+ mutex_unlock(&bf->bf_lock);
11775+ au_fhsm_notify(sb, /*val*/1);
11776+
11777+ return err;
11778+}
11779+
11780+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
11781+{
11782+ int err;
076b876e
AM
11783+ struct au_sbinfo *sbinfo;
11784+ struct au_fhsm *fhsm;
11785+ struct au_branch *br;
11786+ struct au_br_fhsm *bf;
11787+
11788+ AuDbg("b%d, force %d\n", bindex, force);
11789+ SiMustAnyLock(sb);
11790+
11791+ sbinfo = au_sbi(sb);
11792+ fhsm = &sbinfo->si_fhsm;
c1595e42
JR
11793+ if (!au_ftest_si(sbinfo, FHSM)
11794+ || fhsm->fhsm_bottom == bindex)
076b876e
AM
11795+ return;
11796+
11797+ br = au_sbr(sb, bindex);
11798+ bf = br->br_fhsm;
11799+ AuDebugOn(!bf);
11800+ mutex_lock(&bf->bf_lock);
11801+ if (force
11802+ || au_fhsm_pid(fhsm)
11803+ || au_fhsm_test_jiffy(sbinfo, br))
11804+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
11805+ /*do_notify*/1);
11806+ mutex_unlock(&bf->bf_lock);
11807+}
11808+
11809+void au_fhsm_wrote_all(struct super_block *sb, int force)
11810+{
11811+ aufs_bindex_t bindex, bend;
11812+ struct au_branch *br;
11813+
11814+ /* exclude the bottom */
c1595e42 11815+ bend = au_fhsm_bottom(sb);
076b876e
AM
11816+ for (bindex = 0; bindex < bend; bindex++) {
11817+ br = au_sbr(sb, bindex);
11818+ if (au_br_fhsm(br->br_perm))
11819+ au_fhsm_wrote(sb, bindex, force);
11820+ }
11821+}
11822+
11823+/* ---------------------------------------------------------------------- */
11824+
11825+static unsigned int au_fhsm_poll(struct file *file,
11826+ struct poll_table_struct *wait)
11827+{
11828+ unsigned int mask;
11829+ struct au_sbinfo *sbinfo;
11830+ struct au_fhsm *fhsm;
11831+
11832+ mask = 0;
11833+ sbinfo = file->private_data;
11834+ fhsm = &sbinfo->si_fhsm;
11835+ poll_wait(file, &fhsm->fhsm_wqh, wait);
11836+ if (atomic_read(&fhsm->fhsm_readable))
11837+ mask = POLLIN /* | POLLRDNORM */;
11838+
11839+ AuTraceErr((int)mask);
11840+ return mask;
11841+}
11842+
11843+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
11844+ struct aufs_stfs *stfs, __s16 brid)
11845+{
11846+ int err;
11847+
11848+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
11849+ if (!err)
11850+ err = __put_user(brid, &stbr->brid);
11851+ if (unlikely(err))
11852+ err = -EFAULT;
11853+
11854+ return err;
11855+}
11856+
11857+static ssize_t au_fhsm_do_read(struct super_block *sb,
11858+ struct aufs_stbr __user *stbr, size_t count)
11859+{
11860+ ssize_t err;
11861+ int nstbr;
11862+ aufs_bindex_t bindex, bend;
11863+ struct au_branch *br;
11864+ struct au_br_fhsm *bf;
11865+
11866+ /* except the bottom branch */
11867+ err = 0;
11868+ nstbr = 0;
c1595e42 11869+ bend = au_fhsm_bottom(sb);
076b876e
AM
11870+ for (bindex = 0; !err && bindex < bend; bindex++) {
11871+ br = au_sbr(sb, bindex);
11872+ if (!au_br_fhsm(br->br_perm))
11873+ continue;
11874+
11875+ bf = br->br_fhsm;
11876+ mutex_lock(&bf->bf_lock);
11877+ if (bf->bf_readable) {
11878+ err = -EFAULT;
11879+ if (count >= sizeof(*stbr))
11880+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
11881+ br->br_id);
11882+ if (!err) {
11883+ bf->bf_readable = 0;
11884+ count -= sizeof(*stbr);
11885+ nstbr++;
11886+ }
11887+ }
11888+ mutex_unlock(&bf->bf_lock);
11889+ }
11890+ if (!err)
11891+ err = sizeof(*stbr) * nstbr;
11892+
11893+ return err;
11894+}
11895+
11896+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
11897+ loff_t *pos)
11898+{
11899+ ssize_t err;
11900+ int readable;
11901+ aufs_bindex_t nfhsm, bindex, bend;
11902+ struct au_sbinfo *sbinfo;
11903+ struct au_fhsm *fhsm;
11904+ struct au_branch *br;
11905+ struct super_block *sb;
11906+
11907+ err = 0;
11908+ sbinfo = file->private_data;
11909+ fhsm = &sbinfo->si_fhsm;
11910+need_data:
11911+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
11912+ if (!atomic_read(&fhsm->fhsm_readable)) {
11913+ if (vfsub_file_flags(file) & O_NONBLOCK)
11914+ err = -EAGAIN;
11915+ else
11916+ err = wait_event_interruptible_locked_irq
11917+ (fhsm->fhsm_wqh,
11918+ atomic_read(&fhsm->fhsm_readable));
11919+ }
11920+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
11921+ if (unlikely(err))
11922+ goto out;
11923+
11924+ /* sb may already be dead */
11925+ au_rw_read_lock(&sbinfo->si_rwsem);
11926+ readable = atomic_read(&fhsm->fhsm_readable);
11927+ if (readable > 0) {
11928+ sb = sbinfo->si_sb;
11929+ AuDebugOn(!sb);
11930+ /* exclude the bottom branch */
11931+ nfhsm = 0;
c1595e42 11932+ bend = au_fhsm_bottom(sb);
076b876e
AM
11933+ for (bindex = 0; bindex < bend; bindex++) {
11934+ br = au_sbr(sb, bindex);
11935+ if (au_br_fhsm(br->br_perm))
11936+ nfhsm++;
11937+ }
11938+ err = -EMSGSIZE;
11939+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
11940+ atomic_set(&fhsm->fhsm_readable, 0);
11941+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
11942+ count);
11943+ }
11944+ }
11945+ au_rw_read_unlock(&sbinfo->si_rwsem);
11946+ if (!readable)
11947+ goto need_data;
11948+
11949+out:
11950+ return err;
11951+}
11952+
11953+static int au_fhsm_release(struct inode *inode, struct file *file)
11954+{
11955+ struct au_sbinfo *sbinfo;
11956+ struct au_fhsm *fhsm;
11957+
11958+ /* sb may already be dead */
11959+ sbinfo = file->private_data;
11960+ fhsm = &sbinfo->si_fhsm;
11961+ spin_lock(&fhsm->fhsm_spin);
11962+ fhsm->fhsm_pid = 0;
11963+ spin_unlock(&fhsm->fhsm_spin);
11964+ kobject_put(&sbinfo->si_kobj);
11965+
11966+ return 0;
11967+}
11968+
11969+static const struct file_operations au_fhsm_fops = {
11970+ .owner = THIS_MODULE,
11971+ .llseek = noop_llseek,
11972+ .read = au_fhsm_read,
11973+ .poll = au_fhsm_poll,
11974+ .release = au_fhsm_release
11975+};
11976+
11977+int au_fhsm_fd(struct super_block *sb, int oflags)
11978+{
11979+ int err, fd;
11980+ struct au_sbinfo *sbinfo;
11981+ struct au_fhsm *fhsm;
11982+
11983+ err = -EPERM;
11984+ if (unlikely(!capable(CAP_SYS_ADMIN)))
11985+ goto out;
11986+
11987+ err = -EINVAL;
11988+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
11989+ goto out;
11990+
11991+ err = 0;
11992+ sbinfo = au_sbi(sb);
11993+ fhsm = &sbinfo->si_fhsm;
11994+ spin_lock(&fhsm->fhsm_spin);
11995+ if (!fhsm->fhsm_pid)
11996+ fhsm->fhsm_pid = current->pid;
11997+ else
11998+ err = -EBUSY;
11999+ spin_unlock(&fhsm->fhsm_spin);
12000+ if (unlikely(err))
12001+ goto out;
12002+
12003+ oflags |= O_RDONLY;
12004+ /* oflags |= FMODE_NONOTIFY; */
12005+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
12006+ err = fd;
12007+ if (unlikely(fd < 0))
12008+ goto out_pid;
12009+
12010+ /* succeed reglardless 'fhsm' status */
12011+ kobject_get(&sbinfo->si_kobj);
12012+ si_noflush_read_lock(sb);
12013+ if (au_ftest_si(sbinfo, FHSM))
12014+ au_fhsm_wrote_all(sb, /*force*/0);
12015+ si_read_unlock(sb);
12016+ goto out; /* success */
12017+
12018+out_pid:
12019+ spin_lock(&fhsm->fhsm_spin);
12020+ fhsm->fhsm_pid = 0;
12021+ spin_unlock(&fhsm->fhsm_spin);
12022+out:
12023+ AuTraceErr(err);
12024+ return err;
12025+}
12026+
12027+/* ---------------------------------------------------------------------- */
12028+
12029+int au_fhsm_br_alloc(struct au_branch *br)
12030+{
12031+ int err;
12032+
12033+ err = 0;
12034+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
12035+ if (br->br_fhsm)
12036+ au_br_fhsm_init(br->br_fhsm);
12037+ else
12038+ err = -ENOMEM;
12039+
12040+ return err;
12041+}
12042+
12043+/* ---------------------------------------------------------------------- */
12044+
12045+void au_fhsm_fin(struct super_block *sb)
12046+{
12047+ au_fhsm_notify(sb, /*val*/-1);
12048+}
12049+
12050+void au_fhsm_init(struct au_sbinfo *sbinfo)
12051+{
12052+ struct au_fhsm *fhsm;
12053+
12054+ fhsm = &sbinfo->si_fhsm;
12055+ spin_lock_init(&fhsm->fhsm_spin);
12056+ init_waitqueue_head(&fhsm->fhsm_wqh);
12057+ atomic_set(&fhsm->fhsm_readable, 0);
12058+ fhsm->fhsm_expire
12059+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
c1595e42 12060+ fhsm->fhsm_bottom = -1;
076b876e
AM
12061+}
12062+
12063+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
12064+{
12065+ sbinfo->si_fhsm.fhsm_expire
12066+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
12067+}
12068+
12069+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
12070+{
12071+ unsigned int u;
12072+
12073+ if (!au_ftest_si(sbinfo, FHSM))
12074+ return;
12075+
12076+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
12077+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
12078+ seq_printf(seq, ",fhsm_sec=%u", u);
12079+}
7f207e10
AM
12080diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
12081--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
12082+++ linux/fs/aufs/file.c 2016-02-28 11:27:01.280579017 +0100
12083@@ -0,0 +1,857 @@
1facf9fc 12084+/*
7f2ca4b1 12085+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 12086+ *
12087+ * This program, aufs is free software; you can redistribute it and/or modify
12088+ * it under the terms of the GNU General Public License as published by
12089+ * the Free Software Foundation; either version 2 of the License, or
12090+ * (at your option) any later version.
dece6358
AM
12091+ *
12092+ * This program is distributed in the hope that it will be useful,
12093+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12094+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12095+ * GNU General Public License for more details.
12096+ *
12097+ * You should have received a copy of the GNU General Public License
523b37e3 12098+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 12099+ */
12100+
12101+/*
4a4d8108 12102+ * handling file/dir, and address_space operation
1facf9fc 12103+ */
12104+
7eafdf33
AM
12105+#ifdef CONFIG_AUFS_DEBUG
12106+#include <linux/migrate.h>
12107+#endif
4a4d8108 12108+#include <linux/pagemap.h>
1facf9fc 12109+#include "aufs.h"
12110+
4a4d8108
AM
12111+/* drop flags for writing */
12112+unsigned int au_file_roflags(unsigned int flags)
12113+{
12114+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
12115+ flags |= O_RDONLY | O_NOATIME;
12116+ return flags;
12117+}
12118+
12119+/* common functions to regular file and dir */
12120+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 12121+ struct file *file, int force_wr)
1facf9fc 12122+{
1308ab2a 12123+ struct file *h_file;
4a4d8108
AM
12124+ struct dentry *h_dentry;
12125+ struct inode *h_inode;
12126+ struct super_block *sb;
12127+ struct au_branch *br;
12128+ struct path h_path;
7f2ca4b1 12129+ int err;
1facf9fc 12130+
4a4d8108
AM
12131+ /* a race condition can happen between open and unlink/rmdir */
12132+ h_file = ERR_PTR(-ENOENT);
12133+ h_dentry = au_h_dptr(dentry, bindex);
b752ccd1 12134+ if (au_test_nfsd() && !h_dentry)
4a4d8108
AM
12135+ goto out;
12136+ h_inode = h_dentry->d_inode;
b752ccd1 12137+ if (au_test_nfsd() && !h_inode)
4a4d8108 12138+ goto out;
027c5e7a
AM
12139+ spin_lock(&h_dentry->d_lock);
12140+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
12141+ || !h_inode
12142+ /* || !dentry->d_inode->i_nlink */
12143+ ;
12144+ spin_unlock(&h_dentry->d_lock);
12145+ if (unlikely(err))
4a4d8108 12146+ goto out;
1facf9fc 12147+
4a4d8108
AM
12148+ sb = dentry->d_sb;
12149+ br = au_sbr(sb, bindex);
7f2ca4b1
JR
12150+ err = au_br_test_oflag(flags, br);
12151+ h_file = ERR_PTR(err);
12152+ if (unlikely(err))
027c5e7a 12153+ goto out;
1facf9fc 12154+
4a4d8108 12155+ /* drop flags for writing */
392086de
AM
12156+ if (au_test_ro(sb, bindex, dentry->d_inode)) {
12157+ if (force_wr && !(flags & O_WRONLY))
12158+ force_wr = 0;
4a4d8108 12159+ flags = au_file_roflags(flags);
392086de
AM
12160+ if (force_wr) {
12161+ h_file = ERR_PTR(-EROFS);
12162+ flags = au_file_roflags(flags);
12163+ if (unlikely(vfsub_native_ro(h_inode)
12164+ || IS_APPEND(h_inode)))
12165+ goto out;
12166+ flags &= ~O_ACCMODE;
12167+ flags |= O_WRONLY;
12168+ }
12169+ }
4a4d8108
AM
12170+ flags &= ~O_CREAT;
12171+ atomic_inc(&br->br_count);
12172+ h_path.dentry = h_dentry;
86dc4139 12173+ h_path.mnt = au_br_mnt(br);
38d290e6 12174+ h_file = vfsub_dentry_open(&h_path, flags);
4a4d8108
AM
12175+ if (IS_ERR(h_file))
12176+ goto out_br;
dece6358 12177+
7f2ca4b1 12178+ if (flags & __FMODE_EXEC) {
4a4d8108
AM
12179+ err = deny_write_access(h_file);
12180+ if (unlikely(err)) {
12181+ fput(h_file);
12182+ h_file = ERR_PTR(err);
12183+ goto out_br;
12184+ }
12185+ }
953406b4 12186+ fsnotify_open(h_file);
4a4d8108 12187+ goto out; /* success */
1facf9fc 12188+
4f0767ce 12189+out_br:
4a4d8108 12190+ atomic_dec(&br->br_count);
4f0767ce 12191+out:
4a4d8108
AM
12192+ return h_file;
12193+}
1308ab2a 12194+
076b876e
AM
12195+static int au_cmoo(struct dentry *dentry)
12196+{
12197+ int err, cmoo;
12198+ unsigned int udba;
12199+ struct path h_path;
12200+ struct au_pin pin;
12201+ struct au_cp_generic cpg = {
12202+ .dentry = dentry,
12203+ .bdst = -1,
12204+ .bsrc = -1,
12205+ .len = -1,
12206+ .pin = &pin,
12207+ .flags = AuCpup_DTIME | AuCpup_HOPEN
12208+ };
12209+ struct inode *inode, *delegated;
12210+ struct super_block *sb;
12211+ struct au_sbinfo *sbinfo;
12212+ struct au_fhsm *fhsm;
12213+ pid_t pid;
12214+ struct au_branch *br;
12215+ struct dentry *parent;
12216+ struct au_hinode *hdir;
12217+
12218+ DiMustWriteLock(dentry);
12219+ inode = dentry->d_inode;
12220+ IiMustWriteLock(inode);
12221+
12222+ err = 0;
12223+ if (IS_ROOT(dentry))
12224+ goto out;
12225+ cpg.bsrc = au_dbstart(dentry);
12226+ if (!cpg.bsrc)
12227+ goto out;
12228+
12229+ sb = dentry->d_sb;
12230+ sbinfo = au_sbi(sb);
12231+ fhsm = &sbinfo->si_fhsm;
12232+ pid = au_fhsm_pid(fhsm);
12233+ if (pid
12234+ && (current->pid == pid
12235+ || current->real_parent->pid == pid))
12236+ goto out;
12237+
12238+ br = au_sbr(sb, cpg.bsrc);
12239+ cmoo = au_br_cmoo(br->br_perm);
12240+ if (!cmoo)
12241+ goto out;
12242+ if (!S_ISREG(inode->i_mode))
12243+ cmoo &= AuBrAttr_COO_ALL;
12244+ if (!cmoo)
12245+ goto out;
12246+
12247+ parent = dget_parent(dentry);
12248+ di_write_lock_parent(parent);
12249+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
12250+ cpg.bdst = err;
12251+ if (unlikely(err < 0)) {
12252+ err = 0; /* there is no upper writable branch */
12253+ goto out_dgrade;
12254+ }
12255+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
12256+
12257+ /* do not respect the coo attrib for the target branch */
12258+ err = au_cpup_dirs(dentry, cpg.bdst);
12259+ if (unlikely(err))
12260+ goto out_dgrade;
12261+
12262+ di_downgrade_lock(parent, AuLock_IR);
12263+ udba = au_opt_udba(sb);
12264+ err = au_pin(&pin, dentry, cpg.bdst, udba,
12265+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12266+ if (unlikely(err))
12267+ goto out_parent;
12268+
12269+ err = au_sio_cpup_simple(&cpg);
12270+ au_unpin(&pin);
12271+ if (unlikely(err))
12272+ goto out_parent;
12273+ if (!(cmoo & AuBrWAttr_MOO))
12274+ goto out_parent; /* success */
12275+
12276+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
12277+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12278+ if (unlikely(err))
12279+ goto out_parent;
12280+
12281+ h_path.mnt = au_br_mnt(br);
12282+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
12283+ hdir = au_hi(parent->d_inode, cpg.bsrc);
12284+ delegated = NULL;
12285+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
12286+ au_unpin(&pin);
12287+ /* todo: keep h_dentry or not? */
12288+ if (unlikely(err == -EWOULDBLOCK)) {
12289+ pr_warn("cannot retry for NFSv4 delegation"
12290+ " for an internal unlink\n");
12291+ iput(delegated);
12292+ }
12293+ if (unlikely(err)) {
12294+ pr_err("unlink %pd after coo failed (%d), ignored\n",
12295+ dentry, err);
12296+ err = 0;
12297+ }
12298+ goto out_parent; /* success */
12299+
12300+out_dgrade:
12301+ di_downgrade_lock(parent, AuLock_IR);
12302+out_parent:
12303+ di_read_unlock(parent, AuLock_IR);
12304+ dput(parent);
12305+out:
12306+ AuTraceErr(err);
12307+ return err;
12308+}
12309+
7f2ca4b1 12310+int au_do_open(struct file *file, struct au_do_open_args *args)
1facf9fc 12311+{
7f2ca4b1 12312+ int err, no_lock = args->no_lock;
1facf9fc 12313+ struct dentry *dentry;
076b876e 12314+ struct au_finfo *finfo;
1308ab2a 12315+
7f2ca4b1
JR
12316+ if (!no_lock)
12317+ err = au_finfo_init(file, args->fidir);
12318+ else {
12319+ lockdep_off();
12320+ err = au_finfo_init(file, args->fidir);
12321+ lockdep_on();
12322+ }
4a4d8108
AM
12323+ if (unlikely(err))
12324+ goto out;
1facf9fc 12325+
12326+ dentry = file->f_dentry;
7f2ca4b1
JR
12327+ AuDebugOn(IS_ERR_OR_NULL(dentry));
12328+ if (!no_lock) {
12329+ di_write_lock_child(dentry);
12330+ err = au_cmoo(dentry);
12331+ di_downgrade_lock(dentry, AuLock_IR);
12332+ if (!err)
12333+ err = args->open(file, vfsub_file_flags(file), NULL);
12334+ di_read_unlock(dentry, AuLock_IR);
12335+ } else {
12336+ err = au_cmoo(dentry);
12337+ if (!err)
12338+ err = args->open(file, vfsub_file_flags(file),
12339+ args->h_file);
12340+ if (!err && au_fbstart(file) != au_dbstart(dentry))
12341+ /*
12342+ * cmoo happens after h_file was opened.
12343+ * need to refresh file later.
12344+ */
12345+ atomic_dec(&au_fi(file)->fi_generation);
12346+ }
1facf9fc 12347+
076b876e
AM
12348+ finfo = au_fi(file);
12349+ if (!err) {
12350+ finfo->fi_file = file;
12351+ au_sphl_add(&finfo->fi_hlist,
12352+ &au_sbi(file->f_dentry->d_sb)->si_files);
12353+ }
7f2ca4b1
JR
12354+ if (!no_lock)
12355+ fi_write_unlock(file);
12356+ else {
12357+ lockdep_off();
12358+ fi_write_unlock(file);
12359+ lockdep_on();
12360+ }
4a4d8108 12361+ if (unlikely(err)) {
076b876e 12362+ finfo->fi_hdir = NULL;
4a4d8108 12363+ au_finfo_fin(file);
1308ab2a 12364+ }
4a4d8108 12365+
4f0767ce 12366+out:
1308ab2a 12367+ return err;
12368+}
dece6358 12369+
4a4d8108 12370+int au_reopen_nondir(struct file *file)
1308ab2a 12371+{
4a4d8108
AM
12372+ int err;
12373+ aufs_bindex_t bstart;
12374+ struct dentry *dentry;
12375+ struct file *h_file, *h_file_tmp;
1308ab2a 12376+
4a4d8108 12377+ dentry = file->f_dentry;
4a4d8108
AM
12378+ bstart = au_dbstart(dentry);
12379+ h_file_tmp = NULL;
12380+ if (au_fbstart(file) == bstart) {
12381+ h_file = au_hf_top(file);
12382+ if (file->f_mode == h_file->f_mode)
12383+ return 0; /* success */
12384+ h_file_tmp = h_file;
12385+ get_file(h_file_tmp);
12386+ au_set_h_fptr(file, bstart, NULL);
12387+ }
12388+ AuDebugOn(au_fi(file)->fi_hdir);
86dc4139
AM
12389+ /*
12390+ * it can happen
12391+ * file exists on both of rw and ro
12392+ * open --> dbstart and fbstart are both 0
12393+ * prepend a branch as rw, "rw" become ro
12394+ * remove rw/file
12395+ * delete the top branch, "rw" becomes rw again
12396+ * --> dbstart is 1, fbstart is still 0
12397+ * write --> fbstart is 0 but dbstart is 1
12398+ */
12399+ /* AuDebugOn(au_fbstart(file) < bstart); */
1308ab2a 12400+
4a4d8108 12401+ h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
392086de 12402+ file, /*force_wr*/0);
4a4d8108 12403+ err = PTR_ERR(h_file);
86dc4139
AM
12404+ if (IS_ERR(h_file)) {
12405+ if (h_file_tmp) {
12406+ atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count);
12407+ au_set_h_fptr(file, bstart, h_file_tmp);
12408+ h_file_tmp = NULL;
12409+ }
4a4d8108 12410+ goto out; /* todo: close all? */
86dc4139 12411+ }
4a4d8108
AM
12412+
12413+ err = 0;
12414+ au_set_fbstart(file, bstart);
12415+ au_set_h_fptr(file, bstart, h_file);
12416+ au_update_figen(file);
12417+ /* todo: necessary? */
12418+ /* file->f_ra = h_file->f_ra; */
12419+
4f0767ce 12420+out:
4a4d8108
AM
12421+ if (h_file_tmp)
12422+ fput(h_file_tmp);
12423+ return err;
1facf9fc 12424+}
12425+
1308ab2a 12426+/* ---------------------------------------------------------------------- */
12427+
4a4d8108
AM
12428+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
12429+ struct dentry *hi_wh)
1facf9fc 12430+{
4a4d8108
AM
12431+ int err;
12432+ aufs_bindex_t bstart;
12433+ struct au_dinfo *dinfo;
12434+ struct dentry *h_dentry;
12435+ struct au_hdentry *hdp;
1facf9fc 12436+
4a4d8108
AM
12437+ dinfo = au_di(file->f_dentry);
12438+ AuRwMustWriteLock(&dinfo->di_rwsem);
dece6358 12439+
4a4d8108
AM
12440+ bstart = dinfo->di_bstart;
12441+ dinfo->di_bstart = btgt;
12442+ hdp = dinfo->di_hdentry;
12443+ h_dentry = hdp[0 + btgt].hd_dentry;
12444+ hdp[0 + btgt].hd_dentry = hi_wh;
12445+ err = au_reopen_nondir(file);
12446+ hdp[0 + btgt].hd_dentry = h_dentry;
12447+ dinfo->di_bstart = bstart;
1facf9fc 12448+
1facf9fc 12449+ return err;
12450+}
12451+
4a4d8108 12452+static int au_ready_to_write_wh(struct file *file, loff_t len,
86dc4139 12453+ aufs_bindex_t bcpup, struct au_pin *pin)
1facf9fc 12454+{
4a4d8108 12455+ int err;
027c5e7a 12456+ struct inode *inode, *h_inode;
c2b27bf2
AM
12457+ struct dentry *h_dentry, *hi_wh;
12458+ struct au_cp_generic cpg = {
12459+ .dentry = file->f_dentry,
12460+ .bdst = bcpup,
12461+ .bsrc = -1,
12462+ .len = len,
12463+ .pin = pin
12464+ };
1facf9fc 12465+
c2b27bf2
AM
12466+ au_update_dbstart(cpg.dentry);
12467+ inode = cpg.dentry->d_inode;
027c5e7a 12468+ h_inode = NULL;
c2b27bf2
AM
12469+ if (au_dbstart(cpg.dentry) <= bcpup
12470+ && au_dbend(cpg.dentry) >= bcpup) {
12471+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
027c5e7a
AM
12472+ if (h_dentry)
12473+ h_inode = h_dentry->d_inode;
12474+ }
4a4d8108 12475+ hi_wh = au_hi_wh(inode, bcpup);
027c5e7a 12476+ if (!hi_wh && !h_inode)
c2b27bf2 12477+ err = au_sio_cpup_wh(&cpg, file);
4a4d8108
AM
12478+ else
12479+ /* already copied-up after unlink */
12480+ err = au_reopen_wh(file, bcpup, hi_wh);
1facf9fc 12481+
4a4d8108 12482+ if (!err
38d290e6
JR
12483+ && (inode->i_nlink > 1
12484+ || (inode->i_state & I_LINKABLE))
c2b27bf2
AM
12485+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
12486+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
1308ab2a 12487+
dece6358 12488+ return err;
1facf9fc 12489+}
12490+
4a4d8108
AM
12491+/*
12492+ * prepare the @file for writing.
12493+ */
12494+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
1facf9fc 12495+{
4a4d8108 12496+ int err;
c2b27bf2 12497+ aufs_bindex_t dbstart;
c1595e42 12498+ struct dentry *parent;
86dc4139 12499+ struct inode *inode;
1facf9fc 12500+ struct super_block *sb;
4a4d8108 12501+ struct file *h_file;
c2b27bf2
AM
12502+ struct au_cp_generic cpg = {
12503+ .dentry = file->f_dentry,
12504+ .bdst = -1,
12505+ .bsrc = -1,
12506+ .len = len,
12507+ .pin = pin,
12508+ .flags = AuCpup_DTIME
12509+ };
1facf9fc 12510+
c2b27bf2
AM
12511+ sb = cpg.dentry->d_sb;
12512+ inode = cpg.dentry->d_inode;
c2b27bf2
AM
12513+ cpg.bsrc = au_fbstart(file);
12514+ err = au_test_ro(sb, cpg.bsrc, inode);
4a4d8108 12515+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
c2b27bf2
AM
12516+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
12517+ /*flags*/0);
1facf9fc 12518+ goto out;
4a4d8108 12519+ }
1facf9fc 12520+
027c5e7a 12521+ /* need to cpup or reopen */
c2b27bf2 12522+ parent = dget_parent(cpg.dentry);
4a4d8108 12523+ di_write_lock_parent(parent);
c2b27bf2
AM
12524+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12525+ cpg.bdst = err;
4a4d8108
AM
12526+ if (unlikely(err < 0))
12527+ goto out_dgrade;
12528+ err = 0;
12529+
c2b27bf2
AM
12530+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
12531+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
1facf9fc 12532+ if (unlikely(err))
4a4d8108
AM
12533+ goto out_dgrade;
12534+ }
12535+
c2b27bf2 12536+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108
AM
12537+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
12538+ if (unlikely(err))
12539+ goto out_dgrade;
12540+
c2b27bf2 12541+ dbstart = au_dbstart(cpg.dentry);
c1595e42 12542+ if (dbstart <= cpg.bdst)
c2b27bf2 12543+ cpg.bsrc = cpg.bdst;
027c5e7a 12544+
c2b27bf2
AM
12545+ if (dbstart <= cpg.bdst /* just reopen */
12546+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
027c5e7a 12547+ ) {
392086de 12548+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
86dc4139 12549+ if (IS_ERR(h_file))
027c5e7a 12550+ err = PTR_ERR(h_file);
86dc4139 12551+ else {
027c5e7a 12552+ di_downgrade_lock(parent, AuLock_IR);
c2b27bf2
AM
12553+ if (dbstart > cpg.bdst)
12554+ err = au_sio_cpup_simple(&cpg);
027c5e7a
AM
12555+ if (!err)
12556+ err = au_reopen_nondir(file);
c2b27bf2 12557+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
027c5e7a 12558+ }
027c5e7a
AM
12559+ } else { /* copyup as wh and reopen */
12560+ /*
12561+ * since writable hfsplus branch is not supported,
12562+ * h_open_pre/post() are unnecessary.
12563+ */
c2b27bf2 12564+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
4a4d8108 12565+ di_downgrade_lock(parent, AuLock_IR);
4a4d8108 12566+ }
4a4d8108
AM
12567+
12568+ if (!err) {
12569+ au_pin_set_parent_lflag(pin, /*lflag*/0);
12570+ goto out_dput; /* success */
12571+ }
12572+ au_unpin(pin);
12573+ goto out_unlock;
1facf9fc 12574+
4f0767ce 12575+out_dgrade:
4a4d8108 12576+ di_downgrade_lock(parent, AuLock_IR);
4f0767ce 12577+out_unlock:
4a4d8108 12578+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12579+out_dput:
4a4d8108 12580+ dput(parent);
4f0767ce 12581+out:
1facf9fc 12582+ return err;
12583+}
12584+
4a4d8108
AM
12585+/* ---------------------------------------------------------------------- */
12586+
12587+int au_do_flush(struct file *file, fl_owner_t id,
12588+ int (*flush)(struct file *file, fl_owner_t id))
1facf9fc 12589+{
4a4d8108 12590+ int err;
1facf9fc 12591+ struct super_block *sb;
4a4d8108 12592+ struct inode *inode;
1facf9fc 12593+
c06a8ce3
AM
12594+ inode = file_inode(file);
12595+ sb = inode->i_sb;
4a4d8108
AM
12596+ si_noflush_read_lock(sb);
12597+ fi_read_lock(file);
b752ccd1 12598+ ii_read_lock_child(inode);
1facf9fc 12599+
4a4d8108
AM
12600+ err = flush(file, id);
12601+ au_cpup_attr_timesizes(inode);
1facf9fc 12602+
b752ccd1 12603+ ii_read_unlock(inode);
4a4d8108 12604+ fi_read_unlock(file);
1308ab2a 12605+ si_read_unlock(sb);
dece6358 12606+ return err;
1facf9fc 12607+}
12608+
4a4d8108
AM
12609+/* ---------------------------------------------------------------------- */
12610+
12611+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
1facf9fc 12612+{
4a4d8108 12613+ int err;
4a4d8108
AM
12614+ struct au_pin pin;
12615+ struct au_finfo *finfo;
c2b27bf2 12616+ struct dentry *parent, *hi_wh;
4a4d8108 12617+ struct inode *inode;
1facf9fc 12618+ struct super_block *sb;
c2b27bf2
AM
12619+ struct au_cp_generic cpg = {
12620+ .dentry = file->f_dentry,
12621+ .bdst = -1,
12622+ .bsrc = -1,
12623+ .len = -1,
12624+ .pin = &pin,
12625+ .flags = AuCpup_DTIME
12626+ };
1facf9fc 12627+
4a4d8108
AM
12628+ FiMustWriteLock(file);
12629+
12630+ err = 0;
12631+ finfo = au_fi(file);
c2b27bf2
AM
12632+ sb = cpg.dentry->d_sb;
12633+ inode = cpg.dentry->d_inode;
12634+ cpg.bdst = au_ibstart(inode);
12635+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
1308ab2a 12636+ goto out;
dece6358 12637+
c2b27bf2
AM
12638+ parent = dget_parent(cpg.dentry);
12639+ if (au_test_ro(sb, cpg.bdst, inode)) {
4a4d8108 12640+ di_read_lock_parent(parent, !AuLock_IR);
c2b27bf2
AM
12641+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
12642+ cpg.bdst = err;
4a4d8108
AM
12643+ di_read_unlock(parent, !AuLock_IR);
12644+ if (unlikely(err < 0))
12645+ goto out_parent;
12646+ err = 0;
1facf9fc 12647+ }
1facf9fc 12648+
4a4d8108 12649+ di_read_lock_parent(parent, AuLock_IR);
c2b27bf2 12650+ hi_wh = au_hi_wh(inode, cpg.bdst);
7f207e10
AM
12651+ if (!S_ISDIR(inode->i_mode)
12652+ && au_opt_test(au_mntflags(sb), PLINK)
4a4d8108 12653+ && au_plink_test(inode)
c2b27bf2
AM
12654+ && !d_unhashed(cpg.dentry)
12655+ && cpg.bdst < au_dbstart(cpg.dentry)) {
12656+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
4a4d8108
AM
12657+ if (unlikely(err))
12658+ goto out_unlock;
12659+
12660+ /* always superio. */
c2b27bf2 12661+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
4a4d8108 12662+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 12663+ if (!err) {
c2b27bf2 12664+ err = au_sio_cpup_simple(&cpg);
367653fa
AM
12665+ au_unpin(&pin);
12666+ }
4a4d8108
AM
12667+ } else if (hi_wh) {
12668+ /* already copied-up after unlink */
c2b27bf2 12669+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
4a4d8108
AM
12670+ *need_reopen = 0;
12671+ }
1facf9fc 12672+
4f0767ce 12673+out_unlock:
4a4d8108 12674+ di_read_unlock(parent, AuLock_IR);
4f0767ce 12675+out_parent:
4a4d8108 12676+ dput(parent);
4f0767ce 12677+out:
1308ab2a 12678+ return err;
dece6358 12679+}
1facf9fc 12680+
4a4d8108 12681+static void au_do_refresh_dir(struct file *file)
dece6358 12682+{
4a4d8108
AM
12683+ aufs_bindex_t bindex, bend, new_bindex, brid;
12684+ struct au_hfile *p, tmp, *q;
12685+ struct au_finfo *finfo;
1308ab2a 12686+ struct super_block *sb;
4a4d8108 12687+ struct au_fidir *fidir;
1facf9fc 12688+
4a4d8108 12689+ FiMustWriteLock(file);
1facf9fc 12690+
4a4d8108
AM
12691+ sb = file->f_dentry->d_sb;
12692+ finfo = au_fi(file);
12693+ fidir = finfo->fi_hdir;
12694+ AuDebugOn(!fidir);
12695+ p = fidir->fd_hfile + finfo->fi_btop;
12696+ brid = p->hf_br->br_id;
12697+ bend = fidir->fd_bbot;
12698+ for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
12699+ if (!p->hf_file)
12700+ continue;
1308ab2a 12701+
4a4d8108
AM
12702+ new_bindex = au_br_index(sb, p->hf_br->br_id);
12703+ if (new_bindex == bindex)
12704+ continue;
12705+ if (new_bindex < 0) {
12706+ au_set_h_fptr(file, bindex, NULL);
12707+ continue;
12708+ }
1308ab2a 12709+
4a4d8108
AM
12710+ /* swap two lower inode, and loop again */
12711+ q = fidir->fd_hfile + new_bindex;
12712+ tmp = *q;
12713+ *q = *p;
12714+ *p = tmp;
12715+ if (tmp.hf_file) {
12716+ bindex--;
12717+ p--;
12718+ }
12719+ }
1308ab2a 12720+
4a4d8108 12721+ p = fidir->fd_hfile;
027c5e7a 12722+ if (!au_test_mmapped(file) && !d_unlinked(file->f_dentry)) {
4a4d8108
AM
12723+ bend = au_sbend(sb);
12724+ for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
12725+ finfo->fi_btop++, p++)
12726+ if (p->hf_file) {
c06a8ce3 12727+ if (file_inode(p->hf_file))
4a4d8108 12728+ break;
c1595e42 12729+ au_hfput(p, file);
4a4d8108
AM
12730+ }
12731+ } else {
12732+ bend = au_br_index(sb, brid);
12733+ for (finfo->fi_btop = 0; finfo->fi_btop < bend;
12734+ finfo->fi_btop++, p++)
12735+ if (p->hf_file)
12736+ au_hfput(p, file);
12737+ bend = au_sbend(sb);
12738+ }
1308ab2a 12739+
4a4d8108
AM
12740+ p = fidir->fd_hfile + bend;
12741+ for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
12742+ fidir->fd_bbot--, p--)
12743+ if (p->hf_file) {
c06a8ce3 12744+ if (file_inode(p->hf_file))
4a4d8108 12745+ break;
c1595e42 12746+ au_hfput(p, file);
4a4d8108
AM
12747+ }
12748+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
1308ab2a 12749+}
12750+
4a4d8108
AM
12751+/*
12752+ * after branch manipulating, refresh the file.
12753+ */
12754+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
1facf9fc 12755+{
4a4d8108
AM
12756+ int err, need_reopen;
12757+ aufs_bindex_t bend, bindex;
12758+ struct dentry *dentry;
1308ab2a 12759+ struct au_finfo *finfo;
4a4d8108 12760+ struct au_hfile *hfile;
1facf9fc 12761+
4a4d8108 12762+ dentry = file->f_dentry;
1308ab2a 12763+ finfo = au_fi(file);
4a4d8108
AM
12764+ if (!finfo->fi_hdir) {
12765+ hfile = &finfo->fi_htop;
12766+ AuDebugOn(!hfile->hf_file);
12767+ bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
12768+ AuDebugOn(bindex < 0);
12769+ if (bindex != finfo->fi_btop)
12770+ au_set_fbstart(file, bindex);
12771+ } else {
12772+ err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
12773+ if (unlikely(err))
12774+ goto out;
12775+ au_do_refresh_dir(file);
12776+ }
1facf9fc 12777+
4a4d8108
AM
12778+ err = 0;
12779+ need_reopen = 1;
12780+ if (!au_test_mmapped(file))
12781+ err = au_file_refresh_by_inode(file, &need_reopen);
027c5e7a 12782+ if (!err && need_reopen && !d_unlinked(dentry))
4a4d8108
AM
12783+ err = reopen(file);
12784+ if (!err) {
12785+ au_update_figen(file);
12786+ goto out; /* success */
12787+ }
12788+
12789+ /* error, close all lower files */
12790+ if (finfo->fi_hdir) {
12791+ bend = au_fbend_dir(file);
12792+ for (bindex = au_fbstart(file); bindex <= bend; bindex++)
12793+ au_set_h_fptr(file, bindex, NULL);
12794+ }
1facf9fc 12795+
4f0767ce 12796+out:
1facf9fc 12797+ return err;
12798+}
12799+
4a4d8108
AM
12800+/* common function to regular file and dir */
12801+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
12802+ int wlock)
dece6358 12803+{
1308ab2a 12804+ int err;
4a4d8108
AM
12805+ unsigned int sigen, figen;
12806+ aufs_bindex_t bstart;
12807+ unsigned char pseudo_link;
12808+ struct dentry *dentry;
12809+ struct inode *inode;
1facf9fc 12810+
4a4d8108
AM
12811+ err = 0;
12812+ dentry = file->f_dentry;
12813+ inode = dentry->d_inode;
4a4d8108
AM
12814+ sigen = au_sigen(dentry->d_sb);
12815+ fi_write_lock(file);
12816+ figen = au_figen(file);
12817+ di_write_lock_child(dentry);
12818+ bstart = au_dbstart(dentry);
12819+ pseudo_link = (bstart != au_ibstart(inode));
12820+ if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
12821+ if (!wlock) {
12822+ di_downgrade_lock(dentry, AuLock_IR);
12823+ fi_downgrade_lock(file);
12824+ }
12825+ goto out; /* success */
12826+ }
dece6358 12827+
4a4d8108 12828+ AuDbg("sigen %d, figen %d\n", sigen, figen);
027c5e7a 12829+ if (au_digen_test(dentry, sigen)) {
4a4d8108 12830+ err = au_reval_dpath(dentry, sigen);
027c5e7a 12831+ AuDebugOn(!err && au_digen_test(dentry, sigen));
4a4d8108 12832+ }
dece6358 12833+
027c5e7a
AM
12834+ if (!err)
12835+ err = refresh_file(file, reopen);
4a4d8108
AM
12836+ if (!err) {
12837+ if (!wlock) {
12838+ di_downgrade_lock(dentry, AuLock_IR);
12839+ fi_downgrade_lock(file);
12840+ }
12841+ } else {
12842+ di_write_unlock(dentry);
12843+ fi_write_unlock(file);
12844+ }
1facf9fc 12845+
4f0767ce 12846+out:
1308ab2a 12847+ return err;
12848+}
1facf9fc 12849+
4a4d8108
AM
12850+/* ---------------------------------------------------------------------- */
12851+
12852+/* cf. aufs_nopage() */
12853+/* for madvise(2) */
12854+static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
1308ab2a 12855+{
4a4d8108
AM
12856+ unlock_page(page);
12857+ return 0;
12858+}
1facf9fc 12859+
4a4d8108
AM
12860+/* it will never be called, but necessary to support O_DIRECT */
12861+static ssize_t aufs_direct_IO(int rw, struct kiocb *iocb,
076b876e 12862+ struct iov_iter *iter, loff_t offset)
4a4d8108 12863+{ BUG(); return 0; }
1facf9fc 12864+
4a4d8108
AM
12865+/*
12866+ * it will never be called, but madvise and fadvise behaves differently
12867+ * when get_xip_mem is defined
12868+ */
12869+static int aufs_get_xip_mem(struct address_space *mapping, pgoff_t pgoff,
12870+ int create, void **kmem, unsigned long *pfn)
12871+{ BUG(); return 0; }
1facf9fc 12872+
4a4d8108
AM
12873+/* they will never be called. */
12874+#ifdef CONFIG_AUFS_DEBUG
12875+static int aufs_write_begin(struct file *file, struct address_space *mapping,
12876+ loff_t pos, unsigned len, unsigned flags,
12877+ struct page **pagep, void **fsdata)
12878+{ AuUnsupport(); return 0; }
12879+static int aufs_write_end(struct file *file, struct address_space *mapping,
12880+ loff_t pos, unsigned len, unsigned copied,
12881+ struct page *page, void *fsdata)
12882+{ AuUnsupport(); return 0; }
12883+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
12884+{ AuUnsupport(); return 0; }
1308ab2a 12885+
4a4d8108
AM
12886+static int aufs_set_page_dirty(struct page *page)
12887+{ AuUnsupport(); return 0; }
392086de
AM
12888+static void aufs_invalidatepage(struct page *page, unsigned int offset,
12889+ unsigned int length)
4a4d8108
AM
12890+{ AuUnsupport(); }
12891+static int aufs_releasepage(struct page *page, gfp_t gfp)
12892+{ AuUnsupport(); return 0; }
7f2ca4b1 12893+#if 0 /* called by memory compaction regardless file */
4a4d8108 12894+static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
7eafdf33 12895+ struct page *page, enum migrate_mode mode)
4a4d8108 12896+{ AuUnsupport(); return 0; }
7f2ca4b1 12897+#endif
4a4d8108
AM
12898+static int aufs_launder_page(struct page *page)
12899+{ AuUnsupport(); return 0; }
12900+static int aufs_is_partially_uptodate(struct page *page,
38d290e6
JR
12901+ unsigned long from,
12902+ unsigned long count)
4a4d8108 12903+{ AuUnsupport(); return 0; }
392086de
AM
12904+static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
12905+ bool *writeback)
12906+{ AuUnsupport(); }
4a4d8108
AM
12907+static int aufs_error_remove_page(struct address_space *mapping,
12908+ struct page *page)
12909+{ AuUnsupport(); return 0; }
b4510431
AM
12910+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
12911+ sector_t *span)
12912+{ AuUnsupport(); return 0; }
12913+static void aufs_swap_deactivate(struct file *file)
12914+{ AuUnsupport(); }
4a4d8108
AM
12915+#endif /* CONFIG_AUFS_DEBUG */
12916+
12917+const struct address_space_operations aufs_aop = {
12918+ .readpage = aufs_readpage,
12919+ .direct_IO = aufs_direct_IO,
12920+ .get_xip_mem = aufs_get_xip_mem,
12921+#ifdef CONFIG_AUFS_DEBUG
12922+ .writepage = aufs_writepage,
4a4d8108
AM
12923+ /* no writepages, because of writepage */
12924+ .set_page_dirty = aufs_set_page_dirty,
12925+ /* no readpages, because of readpage */
12926+ .write_begin = aufs_write_begin,
12927+ .write_end = aufs_write_end,
12928+ /* no bmap, no block device */
12929+ .invalidatepage = aufs_invalidatepage,
12930+ .releasepage = aufs_releasepage,
7f2ca4b1
JR
12931+ /* is fallback_migrate_page ok? */
12932+ /* .migratepage = aufs_migratepage, */
4a4d8108
AM
12933+ .launder_page = aufs_launder_page,
12934+ .is_partially_uptodate = aufs_is_partially_uptodate,
392086de 12935+ .is_dirty_writeback = aufs_is_dirty_writeback,
b4510431
AM
12936+ .error_remove_page = aufs_error_remove_page,
12937+ .swap_activate = aufs_swap_activate,
12938+ .swap_deactivate = aufs_swap_deactivate
4a4d8108 12939+#endif /* CONFIG_AUFS_DEBUG */
dece6358 12940+};
7f207e10
AM
12941diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
12942--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
12943+++ linux/fs/aufs/file.h 2016-02-28 11:27:01.280579017 +0100
12944@@ -0,0 +1,291 @@
4a4d8108 12945+/*
7f2ca4b1 12946+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
12947+ *
12948+ * This program, aufs is free software; you can redistribute it and/or modify
12949+ * it under the terms of the GNU General Public License as published by
12950+ * the Free Software Foundation; either version 2 of the License, or
12951+ * (at your option) any later version.
12952+ *
12953+ * This program is distributed in the hope that it will be useful,
12954+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12955+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12956+ * GNU General Public License for more details.
12957+ *
12958+ * You should have received a copy of the GNU General Public License
523b37e3 12959+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 12960+ */
1facf9fc 12961+
4a4d8108
AM
12962+/*
12963+ * file operations
12964+ */
1facf9fc 12965+
4a4d8108
AM
12966+#ifndef __AUFS_FILE_H__
12967+#define __AUFS_FILE_H__
1facf9fc 12968+
4a4d8108 12969+#ifdef __KERNEL__
1facf9fc 12970+
2cbb1c4b 12971+#include <linux/file.h>
4a4d8108
AM
12972+#include <linux/fs.h>
12973+#include <linux/poll.h>
4a4d8108 12974+#include "rwsem.h"
1facf9fc 12975+
4a4d8108
AM
12976+struct au_branch;
12977+struct au_hfile {
12978+ struct file *hf_file;
12979+ struct au_branch *hf_br;
12980+};
1facf9fc 12981+
4a4d8108
AM
12982+struct au_vdir;
12983+struct au_fidir {
12984+ aufs_bindex_t fd_bbot;
12985+ aufs_bindex_t fd_nent;
12986+ struct au_vdir *fd_vdir_cache;
12987+ struct au_hfile fd_hfile[];
12988+};
1facf9fc 12989+
4a4d8108 12990+static inline int au_fidir_sz(int nent)
dece6358 12991+{
4f0767ce
JR
12992+ AuDebugOn(nent < 0);
12993+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
4a4d8108 12994+}
1facf9fc 12995+
4a4d8108
AM
12996+struct au_finfo {
12997+ atomic_t fi_generation;
dece6358 12998+
4a4d8108
AM
12999+ struct au_rwsem fi_rwsem;
13000+ aufs_bindex_t fi_btop;
13001+
13002+ /* do not union them */
13003+ struct { /* for non-dir */
13004+ struct au_hfile fi_htop;
2cbb1c4b 13005+ atomic_t fi_mmapped;
4a4d8108
AM
13006+ };
13007+ struct au_fidir *fi_hdir; /* for dir only */
523b37e3
AM
13008+
13009+ struct hlist_node fi_hlist;
13010+ struct file *fi_file; /* very ugly */
4a4d8108 13011+} ____cacheline_aligned_in_smp;
1facf9fc 13012+
4a4d8108 13013+/* ---------------------------------------------------------------------- */
1facf9fc 13014+
4a4d8108
AM
13015+/* file.c */
13016+extern const struct address_space_operations aufs_aop;
13017+unsigned int au_file_roflags(unsigned int flags);
13018+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
392086de 13019+ struct file *file, int force_wr);
7f2ca4b1
JR
13020+struct au_do_open_args {
13021+ int no_lock;
13022+ int (*open)(struct file *file, int flags,
13023+ struct file *h_file);
13024+ struct au_fidir *fidir;
13025+ struct file *h_file;
13026+};
13027+int au_do_open(struct file *file, struct au_do_open_args *args);
4a4d8108
AM
13028+int au_reopen_nondir(struct file *file);
13029+struct au_pin;
13030+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
13031+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
13032+ int wlock);
13033+int au_do_flush(struct file *file, fl_owner_t id,
13034+ int (*flush)(struct file *file, fl_owner_t id));
1facf9fc 13035+
4a4d8108
AM
13036+/* poll.c */
13037+#ifdef CONFIG_AUFS_POLL
13038+unsigned int aufs_poll(struct file *file, poll_table *wait);
13039+#endif
1facf9fc 13040+
4a4d8108
AM
13041+#ifdef CONFIG_AUFS_BR_HFSPLUS
13042+/* hfsplus.c */
392086de
AM
13043+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
13044+ int force_wr);
4a4d8108
AM
13045+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
13046+ struct file *h_file);
13047+#else
c1595e42
JR
13048+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
13049+ aufs_bindex_t bindex, int force_wr)
4a4d8108
AM
13050+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
13051+ struct file *h_file);
13052+#endif
1facf9fc 13053+
4a4d8108
AM
13054+/* f_op.c */
13055+extern const struct file_operations aufs_file_fop;
7f2ca4b1 13056+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
4a4d8108 13057+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
7f2ca4b1 13058+struct file *au_read_pre(struct file *file, int keep_fi);
4a4d8108 13059+
4a4d8108
AM
13060+/* finfo.c */
13061+void au_hfput(struct au_hfile *hf, struct file *file);
13062+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
13063+ struct file *h_file);
1facf9fc 13064+
4a4d8108 13065+void au_update_figen(struct file *file);
4a4d8108
AM
13066+struct au_fidir *au_fidir_alloc(struct super_block *sb);
13067+int au_fidir_realloc(struct au_finfo *finfo, int nbr);
1facf9fc 13068+
4a4d8108
AM
13069+void au_fi_init_once(void *_fi);
13070+void au_finfo_fin(struct file *file);
13071+int au_finfo_init(struct file *file, struct au_fidir *fidir);
1facf9fc 13072+
4a4d8108
AM
13073+/* ioctl.c */
13074+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
b752ccd1
AM
13075+#ifdef CONFIG_COMPAT
13076+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
13077+ unsigned long arg);
c2b27bf2
AM
13078+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
13079+ unsigned long arg);
b752ccd1 13080+#endif
1facf9fc 13081+
4a4d8108 13082+/* ---------------------------------------------------------------------- */
1facf9fc 13083+
4a4d8108
AM
13084+static inline struct au_finfo *au_fi(struct file *file)
13085+{
38d290e6 13086+ return file->private_data;
4a4d8108 13087+}
1facf9fc 13088+
4a4d8108 13089+/* ---------------------------------------------------------------------- */
1facf9fc 13090+
4a4d8108
AM
13091+/*
13092+ * fi_read_lock, fi_write_lock,
13093+ * fi_read_unlock, fi_write_unlock, fi_downgrade_lock
13094+ */
13095+AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
1308ab2a 13096+
4a4d8108
AM
13097+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
13098+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
13099+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
1facf9fc 13100+
1308ab2a 13101+/* ---------------------------------------------------------------------- */
13102+
4a4d8108
AM
13103+/* todo: hard/soft set? */
13104+static inline aufs_bindex_t au_fbstart(struct file *file)
dece6358 13105+{
4a4d8108
AM
13106+ FiMustAnyLock(file);
13107+ return au_fi(file)->fi_btop;
13108+}
dece6358 13109+
4a4d8108
AM
13110+static inline aufs_bindex_t au_fbend_dir(struct file *file)
13111+{
13112+ FiMustAnyLock(file);
13113+ AuDebugOn(!au_fi(file)->fi_hdir);
13114+ return au_fi(file)->fi_hdir->fd_bbot;
13115+}
1facf9fc 13116+
4a4d8108
AM
13117+static inline struct au_vdir *au_fvdir_cache(struct file *file)
13118+{
13119+ FiMustAnyLock(file);
13120+ AuDebugOn(!au_fi(file)->fi_hdir);
13121+ return au_fi(file)->fi_hdir->fd_vdir_cache;
13122+}
1facf9fc 13123+
4a4d8108
AM
13124+static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
13125+{
13126+ FiMustWriteLock(file);
13127+ au_fi(file)->fi_btop = bindex;
13128+}
1facf9fc 13129+
4a4d8108
AM
13130+static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
13131+{
13132+ FiMustWriteLock(file);
13133+ AuDebugOn(!au_fi(file)->fi_hdir);
13134+ au_fi(file)->fi_hdir->fd_bbot = bindex;
13135+}
1308ab2a 13136+
4a4d8108
AM
13137+static inline void au_set_fvdir_cache(struct file *file,
13138+ struct au_vdir *vdir_cache)
13139+{
13140+ FiMustWriteLock(file);
13141+ AuDebugOn(!au_fi(file)->fi_hdir);
13142+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
13143+}
dece6358 13144+
4a4d8108
AM
13145+static inline struct file *au_hf_top(struct file *file)
13146+{
13147+ FiMustAnyLock(file);
13148+ AuDebugOn(au_fi(file)->fi_hdir);
13149+ return au_fi(file)->fi_htop.hf_file;
13150+}
1facf9fc 13151+
4a4d8108
AM
13152+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
13153+{
13154+ FiMustAnyLock(file);
13155+ AuDebugOn(!au_fi(file)->fi_hdir);
13156+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
dece6358
AM
13157+}
13158+
4a4d8108
AM
13159+/* todo: memory barrier? */
13160+static inline unsigned int au_figen(struct file *f)
dece6358 13161+{
4a4d8108
AM
13162+ return atomic_read(&au_fi(f)->fi_generation);
13163+}
dece6358 13164+
2cbb1c4b
JR
13165+static inline void au_set_mmapped(struct file *f)
13166+{
13167+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
13168+ return;
0c3ec466 13169+ pr_warn("fi_mmapped wrapped around\n");
2cbb1c4b
JR
13170+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
13171+ ;
13172+}
13173+
13174+static inline void au_unset_mmapped(struct file *f)
13175+{
13176+ atomic_dec(&au_fi(f)->fi_mmapped);
13177+}
13178+
4a4d8108
AM
13179+static inline int au_test_mmapped(struct file *f)
13180+{
2cbb1c4b
JR
13181+ return atomic_read(&au_fi(f)->fi_mmapped);
13182+}
13183+
13184+/* customize vma->vm_file */
13185+
13186+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
13187+ struct file *file)
13188+{
53392da6
AM
13189+ struct file *f;
13190+
13191+ f = vma->vm_file;
2cbb1c4b
JR
13192+ get_file(file);
13193+ vma->vm_file = file;
53392da6 13194+ fput(f);
2cbb1c4b
JR
13195+}
13196+
13197+#ifdef CONFIG_MMU
13198+#define AuDbgVmRegion(file, vma) do {} while (0)
13199+
13200+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13201+ struct file *file)
13202+{
13203+ au_do_vm_file_reset(vma, file);
13204+}
13205+#else
13206+#define AuDbgVmRegion(file, vma) \
13207+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
13208+
13209+static inline void au_vm_file_reset(struct vm_area_struct *vma,
13210+ struct file *file)
13211+{
53392da6
AM
13212+ struct file *f;
13213+
2cbb1c4b 13214+ au_do_vm_file_reset(vma, file);
53392da6 13215+ f = vma->vm_region->vm_file;
2cbb1c4b
JR
13216+ get_file(file);
13217+ vma->vm_region->vm_file = file;
53392da6 13218+ fput(f);
2cbb1c4b
JR
13219+}
13220+#endif /* CONFIG_MMU */
13221+
13222+/* handle vma->vm_prfile */
fb47a38f 13223+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
2cbb1c4b
JR
13224+ struct file *file)
13225+{
2cbb1c4b
JR
13226+ get_file(file);
13227+ vma->vm_prfile = file;
13228+#ifndef CONFIG_MMU
13229+ get_file(file);
13230+ vma->vm_region->vm_prfile = file;
13231+#endif
fb47a38f 13232+}
1308ab2a 13233+
4a4d8108
AM
13234+#endif /* __KERNEL__ */
13235+#endif /* __AUFS_FILE_H__ */
7f207e10
AM
13236diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
13237--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 13238+++ linux/fs/aufs/finfo.c 2016-02-28 11:27:01.280579017 +0100
523b37e3 13239@@ -0,0 +1,156 @@
4a4d8108 13240+/*
7f2ca4b1 13241+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
13242+ *
13243+ * This program, aufs is free software; you can redistribute it and/or modify
13244+ * it under the terms of the GNU General Public License as published by
13245+ * the Free Software Foundation; either version 2 of the License, or
13246+ * (at your option) any later version.
13247+ *
13248+ * This program is distributed in the hope that it will be useful,
13249+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13250+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13251+ * GNU General Public License for more details.
13252+ *
13253+ * You should have received a copy of the GNU General Public License
523b37e3 13254+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 13255+ */
1308ab2a 13256+
4a4d8108
AM
13257+/*
13258+ * file private data
13259+ */
1facf9fc 13260+
4a4d8108 13261+#include "aufs.h"
1facf9fc 13262+
4a4d8108
AM
13263+void au_hfput(struct au_hfile *hf, struct file *file)
13264+{
13265+ /* todo: direct access f_flags */
2cbb1c4b 13266+ if (vfsub_file_flags(file) & __FMODE_EXEC)
4a4d8108
AM
13267+ allow_write_access(hf->hf_file);
13268+ fput(hf->hf_file);
13269+ hf->hf_file = NULL;
e49829fe 13270+ atomic_dec(&hf->hf_br->br_count);
4a4d8108
AM
13271+ hf->hf_br = NULL;
13272+}
1facf9fc 13273+
4a4d8108
AM
13274+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
13275+{
13276+ struct au_finfo *finfo = au_fi(file);
13277+ struct au_hfile *hf;
13278+ struct au_fidir *fidir;
13279+
13280+ fidir = finfo->fi_hdir;
13281+ if (!fidir) {
13282+ AuDebugOn(finfo->fi_btop != bindex);
13283+ hf = &finfo->fi_htop;
13284+ } else
13285+ hf = fidir->fd_hfile + bindex;
13286+
13287+ if (hf && hf->hf_file)
13288+ au_hfput(hf, file);
13289+ if (val) {
13290+ FiMustWriteLock(file);
7f2ca4b1 13291+ AuDebugOn(IS_ERR_OR_NULL(file->f_dentry));
4a4d8108
AM
13292+ hf->hf_file = val;
13293+ hf->hf_br = au_sbr(file->f_dentry->d_sb, bindex);
1308ab2a 13294+ }
4a4d8108 13295+}
1facf9fc 13296+
4a4d8108
AM
13297+void au_update_figen(struct file *file)
13298+{
13299+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_dentry));
13300+ /* smp_mb(); */ /* atomic_set */
1facf9fc 13301+}
13302+
4a4d8108
AM
13303+/* ---------------------------------------------------------------------- */
13304+
4a4d8108
AM
13305+struct au_fidir *au_fidir_alloc(struct super_block *sb)
13306+{
13307+ struct au_fidir *fidir;
13308+ int nbr;
13309+
13310+ nbr = au_sbend(sb) + 1;
13311+ if (nbr < 2)
13312+ nbr = 2; /* initial allocate for 2 branches */
13313+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
13314+ if (fidir) {
13315+ fidir->fd_bbot = -1;
13316+ fidir->fd_nent = nbr;
4a4d8108
AM
13317+ }
13318+
13319+ return fidir;
13320+}
13321+
13322+int au_fidir_realloc(struct au_finfo *finfo, int nbr)
13323+{
13324+ int err;
13325+ struct au_fidir *fidir, *p;
13326+
13327+ AuRwMustWriteLock(&finfo->fi_rwsem);
13328+ fidir = finfo->fi_hdir;
13329+ AuDebugOn(!fidir);
13330+
13331+ err = -ENOMEM;
13332+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
13333+ GFP_NOFS);
13334+ if (p) {
13335+ p->fd_nent = nbr;
13336+ finfo->fi_hdir = p;
13337+ err = 0;
13338+ }
1facf9fc 13339+
dece6358 13340+ return err;
1facf9fc 13341+}
1308ab2a 13342+
13343+/* ---------------------------------------------------------------------- */
13344+
4a4d8108 13345+void au_finfo_fin(struct file *file)
1308ab2a 13346+{
4a4d8108
AM
13347+ struct au_finfo *finfo;
13348+
7f207e10
AM
13349+ au_nfiles_dec(file->f_dentry->d_sb);
13350+
4a4d8108
AM
13351+ finfo = au_fi(file);
13352+ AuDebugOn(finfo->fi_hdir);
13353+ AuRwDestroy(&finfo->fi_rwsem);
13354+ au_cache_free_finfo(finfo);
1308ab2a 13355+}
1308ab2a 13356+
e49829fe 13357+void au_fi_init_once(void *_finfo)
4a4d8108 13358+{
e49829fe 13359+ struct au_finfo *finfo = _finfo;
2cbb1c4b 13360+ static struct lock_class_key aufs_fi;
1308ab2a 13361+
e49829fe
JR
13362+ au_rw_init(&finfo->fi_rwsem);
13363+ au_rw_class(&finfo->fi_rwsem, &aufs_fi);
4a4d8108 13364+}
1308ab2a 13365+
4a4d8108
AM
13366+int au_finfo_init(struct file *file, struct au_fidir *fidir)
13367+{
1716fcea 13368+ int err;
4a4d8108
AM
13369+ struct au_finfo *finfo;
13370+ struct dentry *dentry;
13371+
13372+ err = -ENOMEM;
13373+ dentry = file->f_dentry;
13374+ finfo = au_cache_alloc_finfo();
13375+ if (unlikely(!finfo))
13376+ goto out;
13377+
13378+ err = 0;
7f207e10 13379+ au_nfiles_inc(dentry->d_sb);
1716fcea
AM
13380+ /* verbose coding for lock class name */
13381+ if (!fidir)
13382+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO);
13383+ else
13384+ au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO);
4a4d8108
AM
13385+ au_rw_write_lock(&finfo->fi_rwsem);
13386+ finfo->fi_btop = -1;
13387+ finfo->fi_hdir = fidir;
13388+ atomic_set(&finfo->fi_generation, au_digen(dentry));
13389+ /* smp_mb(); */ /* atomic_set */
13390+
13391+ file->private_data = finfo;
13392+
13393+out:
13394+ return err;
13395+}
7f207e10
AM
13396diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
13397--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
13398+++ linux/fs/aufs/f_op.c 2016-02-28 11:27:01.280579017 +0100
13399@@ -0,0 +1,757 @@
dece6358 13400+/*
7f2ca4b1 13401+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
13402+ *
13403+ * This program, aufs is free software; you can redistribute it and/or modify
13404+ * it under the terms of the GNU General Public License as published by
13405+ * the Free Software Foundation; either version 2 of the License, or
13406+ * (at your option) any later version.
13407+ *
13408+ * This program is distributed in the hope that it will be useful,
13409+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13410+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13411+ * GNU General Public License for more details.
13412+ *
13413+ * You should have received a copy of the GNU General Public License
523b37e3 13414+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 13415+ */
1facf9fc 13416+
13417+/*
4a4d8108 13418+ * file and vm operations
1facf9fc 13419+ */
dece6358 13420+
86dc4139 13421+#include <linux/aio.h>
4a4d8108
AM
13422+#include <linux/fs_stack.h>
13423+#include <linux/mman.h>
4a4d8108 13424+#include <linux/security.h>
dece6358
AM
13425+#include "aufs.h"
13426+
7f2ca4b1 13427+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
1facf9fc 13428+{
4a4d8108
AM
13429+ int err;
13430+ aufs_bindex_t bindex;
7f2ca4b1 13431+ struct dentry *dentry, *h_dentry;
4a4d8108 13432+ struct au_finfo *finfo;
38d290e6 13433+ struct inode *h_inode;
4a4d8108
AM
13434+
13435+ FiMustWriteLock(file);
13436+
523b37e3 13437+ err = 0;
4a4d8108 13438+ dentry = file->f_dentry;
7f2ca4b1 13439+ AuDebugOn(IS_ERR_OR_NULL(dentry));
4a4d8108
AM
13440+ finfo = au_fi(file);
13441+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
2cbb1c4b 13442+ atomic_set(&finfo->fi_mmapped, 0);
4a4d8108 13443+ bindex = au_dbstart(dentry);
7f2ca4b1
JR
13444+ if (!h_file) {
13445+ h_dentry = au_h_dptr(dentry, bindex);
13446+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13447+ if (unlikely(err))
13448+ goto out;
13449+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
13450+ } else {
13451+ h_dentry = h_file->f_dentry;
13452+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
13453+ if (unlikely(err))
13454+ goto out;
13455+ get_file(h_file);
13456+ }
4a4d8108
AM
13457+ if (IS_ERR(h_file))
13458+ err = PTR_ERR(h_file);
13459+ else {
38d290e6
JR
13460+ if ((flags & __O_TMPFILE)
13461+ && !(flags & O_EXCL)) {
13462+ h_inode = file_inode(h_file);
13463+ spin_lock(&h_inode->i_lock);
13464+ h_inode->i_state |= I_LINKABLE;
13465+ spin_unlock(&h_inode->i_lock);
13466+ }
4a4d8108
AM
13467+ au_set_fbstart(file, bindex);
13468+ au_set_h_fptr(file, bindex, h_file);
13469+ au_update_figen(file);
13470+ /* todo: necessary? */
13471+ /* file->f_ra = h_file->f_ra; */
13472+ }
027c5e7a 13473+
7f2ca4b1 13474+out:
4a4d8108 13475+ return err;
1facf9fc 13476+}
13477+
4a4d8108
AM
13478+static int aufs_open_nondir(struct inode *inode __maybe_unused,
13479+ struct file *file)
1facf9fc 13480+{
4a4d8108 13481+ int err;
1308ab2a 13482+ struct super_block *sb;
7f2ca4b1
JR
13483+ struct au_do_open_args args = {
13484+ .open = au_do_open_nondir
13485+ };
1facf9fc 13486+
523b37e3
AM
13487+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
13488+ file, vfsub_file_flags(file), file->f_mode);
1facf9fc 13489+
4a4d8108
AM
13490+ sb = file->f_dentry->d_sb;
13491+ si_read_lock(sb, AuLock_FLUSH);
7f2ca4b1 13492+ err = au_do_open(file, &args);
4a4d8108
AM
13493+ si_read_unlock(sb);
13494+ return err;
13495+}
1facf9fc 13496+
4a4d8108
AM
13497+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
13498+{
13499+ struct au_finfo *finfo;
13500+ aufs_bindex_t bindex;
1facf9fc 13501+
4a4d8108 13502+ finfo = au_fi(file);
523b37e3 13503+ au_sphl_del(&finfo->fi_hlist, &au_sbi(file->f_dentry->d_sb)->si_files);
4a4d8108 13504+ bindex = finfo->fi_btop;
b4510431 13505+ if (bindex >= 0)
4a4d8108 13506+ au_set_h_fptr(file, bindex, NULL);
7f207e10 13507+
4a4d8108
AM
13508+ au_finfo_fin(file);
13509+ return 0;
1facf9fc 13510+}
13511+
4a4d8108
AM
13512+/* ---------------------------------------------------------------------- */
13513+
13514+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
dece6358 13515+{
1308ab2a 13516+ int err;
4a4d8108
AM
13517+ struct file *h_file;
13518+
13519+ err = 0;
13520+ h_file = au_hf_top(file);
13521+ if (h_file)
13522+ err = vfsub_flush(h_file, id);
13523+ return err;
13524+}
13525+
13526+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
13527+{
13528+ return au_do_flush(file, id, au_do_flush_nondir);
13529+}
13530+
13531+/* ---------------------------------------------------------------------- */
9dbd164d
AM
13532+/*
13533+ * read and write functions acquire [fdi]_rwsem once, but release before
13534+ * mmap_sem. This is because to stop a race condition between mmap(2).
13535+ * Releasing these aufs-rwsem should be safe, no branch-mamagement (by keeping
13536+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
13537+ * read functions after [fdi]_rwsem are released, but it should be harmless.
13538+ */
4a4d8108 13539+
7f2ca4b1
JR
13540+/* Callers should call au_read_post() or fput() in the end */
13541+struct file *au_read_pre(struct file *file, int keep_fi)
4a4d8108 13542+{
4a4d8108 13543+ struct file *h_file;
7f2ca4b1 13544+ int err;
1facf9fc 13545+
4a4d8108 13546+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0);
7f2ca4b1
JR
13547+ if (!err) {
13548+ di_read_unlock(file->f_dentry, AuLock_IR);
13549+ h_file = au_hf_top(file);
13550+ get_file(h_file);
13551+ if (!keep_fi)
13552+ fi_read_unlock(file);
13553+ } else
13554+ h_file = ERR_PTR(err);
13555+
13556+ return h_file;
13557+}
13558+
13559+static void au_read_post(struct inode *inode, struct file *h_file)
13560+{
13561+ /* update without lock, I don't think it a problem */
13562+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13563+ fput(h_file);
13564+}
13565+
13566+struct au_write_pre {
13567+ blkcnt_t blks;
13568+ aufs_bindex_t bstart;
13569+};
13570+
13571+/*
13572+ * return with iinfo is write-locked
13573+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
13574+ * end
13575+ */
13576+static struct file *au_write_pre(struct file *file, int do_ready,
13577+ struct au_write_pre *wpre)
13578+{
13579+ struct file *h_file;
13580+ struct dentry *dentry;
13581+ int err;
13582+ struct au_pin pin;
13583+
13584+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
13585+ h_file = ERR_PTR(err);
dece6358
AM
13586+ if (unlikely(err))
13587+ goto out;
1facf9fc 13588+
7f2ca4b1
JR
13589+ dentry = file->f_dentry;
13590+ if (do_ready) {
13591+ err = au_ready_to_write(file, -1, &pin);
13592+ if (unlikely(err)) {
13593+ h_file = ERR_PTR(err);
13594+ di_write_unlock(dentry);
13595+ goto out_fi;
13596+ }
13597+ }
13598+
13599+ di_downgrade_lock(dentry, /*flags*/0);
13600+ if (wpre)
13601+ wpre->bstart = au_fbstart(file);
4a4d8108 13602+ h_file = au_hf_top(file);
9dbd164d 13603+ get_file(h_file);
7f2ca4b1
JR
13604+ if (wpre)
13605+ wpre->blks = file_inode(h_file)->i_blocks;
13606+ if (do_ready)
13607+ au_unpin(&pin);
13608+ di_read_unlock(dentry, /*flags*/0);
13609+
13610+out_fi:
13611+ fi_write_unlock(file);
13612+out:
13613+ return h_file;
13614+}
13615+
13616+static void au_write_post(struct inode *inode, struct file *h_file,
13617+ struct au_write_pre *wpre, ssize_t written)
13618+{
13619+ struct inode *h_inode;
13620+
13621+ au_cpup_attr_timesizes(inode);
13622+ AuDebugOn(au_ibstart(inode) != wpre->bstart);
13623+ h_inode = file_inode(h_file);
13624+ inode->i_mode = h_inode->i_mode;
13625+ ii_write_unlock(inode);
13626+ fput(h_file);
13627+
13628+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
13629+ if (written > 0)
13630+ au_fhsm_wrote(inode->i_sb, wpre->bstart,
13631+ /*force*/h_inode->i_blocks > wpre->blks);
13632+}
13633+
13634+static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
13635+ loff_t *ppos)
13636+{
13637+ ssize_t err;
13638+ struct inode *inode;
13639+ struct file *h_file;
13640+ struct super_block *sb;
13641+
13642+ inode = file_inode(file);
13643+ sb = inode->i_sb;
13644+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13645+
13646+ h_file = au_read_pre(file, /*keep_fi*/0);
13647+ err = PTR_ERR(h_file);
13648+ if (IS_ERR(h_file))
13649+ goto out;
9dbd164d
AM
13650+
13651+ /* filedata may be obsoleted by concurrent copyup, but no problem */
4a4d8108
AM
13652+ err = vfsub_read_u(h_file, buf, count, ppos);
13653+ /* todo: necessary? */
13654+ /* file->f_ra = h_file->f_ra; */
7f2ca4b1 13655+ au_read_post(inode, h_file);
1308ab2a 13656+
4f0767ce 13657+out:
dece6358
AM
13658+ si_read_unlock(sb);
13659+ return err;
13660+}
1facf9fc 13661+
e49829fe
JR
13662+/*
13663+ * todo: very ugly
13664+ * it locks both of i_mutex and si_rwsem for read in safe.
13665+ * if the plink maintenance mode continues forever (that is the problem),
13666+ * may loop forever.
13667+ */
13668+static void au_mtx_and_read_lock(struct inode *inode)
13669+{
13670+ int err;
13671+ struct super_block *sb = inode->i_sb;
13672+
13673+ while (1) {
13674+ mutex_lock(&inode->i_mutex);
13675+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
13676+ if (!err)
13677+ break;
13678+ mutex_unlock(&inode->i_mutex);
13679+ si_read_lock(sb, AuLock_NOPLMW);
13680+ si_read_unlock(sb);
13681+ }
13682+}
13683+
4a4d8108
AM
13684+static ssize_t aufs_write(struct file *file, const char __user *ubuf,
13685+ size_t count, loff_t *ppos)
dece6358 13686+{
4a4d8108 13687+ ssize_t err;
7f2ca4b1
JR
13688+ struct au_write_pre wpre;
13689+ struct inode *inode;
4a4d8108
AM
13690+ struct file *h_file;
13691+ char __user *buf = (char __user *)ubuf;
1facf9fc 13692+
7f2ca4b1 13693+ inode = file_inode(file);
e49829fe 13694+ au_mtx_and_read_lock(inode);
1facf9fc 13695+
7f2ca4b1
JR
13696+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13697+ err = PTR_ERR(h_file);
13698+ if (IS_ERR(h_file))
9dbd164d 13699+ goto out;
9dbd164d 13700+
4a4d8108 13701+ err = vfsub_write_u(h_file, buf, count, ppos);
7f2ca4b1 13702+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13703+
4f0767ce 13704+out:
7f2ca4b1 13705+ si_read_unlock(inode->i_sb);
4a4d8108 13706+ mutex_unlock(&inode->i_mutex);
dece6358
AM
13707+ return err;
13708+}
1facf9fc 13709+
076b876e
AM
13710+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
13711+ struct iov_iter *iov_iter)
dece6358 13712+{
4a4d8108
AM
13713+ ssize_t err;
13714+ struct file *file;
076b876e
AM
13715+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
13716+ ssize_t (*aio)(struct kiocb *, const struct iovec *, unsigned long,
13717+ loff_t);
1facf9fc 13718+
4a4d8108
AM
13719+ err = security_file_permission(h_file, rw);
13720+ if (unlikely(err))
13721+ goto out;
1facf9fc 13722+
4a4d8108 13723+ err = -ENOSYS;
076b876e
AM
13724+ iter = NULL;
13725+ aio = NULL;
13726+ if (rw == MAY_READ) {
13727+ iter = h_file->f_op->read_iter;
13728+ aio = h_file->f_op->aio_read;
13729+ } else if (rw == MAY_WRITE) {
13730+ iter = h_file->f_op->write_iter;
13731+ aio = h_file->f_op->aio_write;
13732+ }
13733+
13734+ file = kio->ki_filp;
13735+ kio->ki_filp = h_file;
13736+ if (iter) {
2cbb1c4b 13737+ lockdep_off();
076b876e
AM
13738+ err = iter(kio, iov_iter);
13739+ lockdep_on();
13740+ } else if (aio) {
13741+ lockdep_off();
13742+ err = aio(kio, iov_iter->iov, iov_iter->nr_segs, kio->ki_pos);
2cbb1c4b 13743+ lockdep_on();
4a4d8108
AM
13744+ } else
13745+ /* currently there is no such fs */
13746+ WARN_ON_ONCE(1);
076b876e 13747+ kio->ki_filp = file;
1facf9fc 13748+
4f0767ce 13749+out:
dece6358
AM
13750+ return err;
13751+}
1facf9fc 13752+
076b876e 13753+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1facf9fc 13754+{
4a4d8108
AM
13755+ ssize_t err;
13756+ struct file *file, *h_file;
7f2ca4b1 13757+ struct inode *inode;
dece6358 13758+ struct super_block *sb;
1facf9fc 13759+
4a4d8108 13760+ file = kio->ki_filp;
7f2ca4b1
JR
13761+ inode = file_inode(file);
13762+ sb = inode->i_sb;
e49829fe 13763+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
4a4d8108 13764+
7f2ca4b1
JR
13765+ h_file = au_read_pre(file, /*keep_fi*/0);
13766+ err = PTR_ERR(h_file);
13767+ if (IS_ERR(h_file))
13768+ goto out;
9dbd164d 13769+
076b876e 13770+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
4a4d8108
AM
13771+ /* todo: necessary? */
13772+ /* file->f_ra = h_file->f_ra; */
7f2ca4b1 13773+ au_read_post(inode, h_file);
1facf9fc 13774+
4f0767ce 13775+out:
4a4d8108 13776+ si_read_unlock(sb);
1308ab2a 13777+ return err;
13778+}
1facf9fc 13779+
076b876e 13780+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
1308ab2a 13781+{
4a4d8108 13782+ ssize_t err;
7f2ca4b1
JR
13783+ struct au_write_pre wpre;
13784+ struct inode *inode;
4a4d8108 13785+ struct file *file, *h_file;
1308ab2a 13786+
4a4d8108 13787+ file = kio->ki_filp;
7f2ca4b1 13788+ inode = file_inode(file);
e49829fe
JR
13789+ au_mtx_and_read_lock(inode);
13790+
7f2ca4b1
JR
13791+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13792+ err = PTR_ERR(h_file);
13793+ if (IS_ERR(h_file))
9dbd164d 13794+ goto out;
9dbd164d 13795+
076b876e 13796+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
7f2ca4b1 13797+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13798+
4f0767ce 13799+out:
7f2ca4b1 13800+ si_read_unlock(inode->i_sb);
4a4d8108 13801+ mutex_unlock(&inode->i_mutex);
dece6358 13802+ return err;
1facf9fc 13803+}
13804+
4a4d8108
AM
13805+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
13806+ struct pipe_inode_info *pipe, size_t len,
13807+ unsigned int flags)
1facf9fc 13808+{
4a4d8108
AM
13809+ ssize_t err;
13810+ struct file *h_file;
7f2ca4b1 13811+ struct inode *inode;
dece6358 13812+ struct super_block *sb;
1facf9fc 13813+
7f2ca4b1
JR
13814+ inode = file_inode(file);
13815+ sb = inode->i_sb;
e49829fe 13816+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
7f2ca4b1
JR
13817+
13818+ h_file = au_read_pre(file, /*keep_fi*/1);
13819+ err = PTR_ERR(h_file);
13820+ if (IS_ERR(h_file))
dece6358 13821+ goto out;
1facf9fc 13822+
4a4d8108 13823+ if (au_test_loopback_kthread()) {
87a755f4
AM
13824+ au_warn_loopback(h_file->f_dentry->d_sb);
13825+ if (file->f_mapping != h_file->f_mapping) {
13826+ file->f_mapping = h_file->f_mapping;
13827+ smp_mb(); /* unnecessary? */
13828+ }
1308ab2a 13829+ }
9dbd164d
AM
13830+ fi_read_unlock(file);
13831+
4a4d8108
AM
13832+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
13833+ /* todo: necessasry? */
13834+ /* file->f_ra = h_file->f_ra; */
7f2ca4b1 13835+ au_read_post(inode, h_file);
1facf9fc 13836+
4f0767ce 13837+out:
4a4d8108 13838+ si_read_unlock(sb);
dece6358 13839+ return err;
1facf9fc 13840+}
13841+
4a4d8108
AM
13842+static ssize_t
13843+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
13844+ size_t len, unsigned int flags)
1facf9fc 13845+{
4a4d8108 13846+ ssize_t err;
7f2ca4b1
JR
13847+ struct au_write_pre wpre;
13848+ struct inode *inode;
076b876e 13849+ struct file *h_file;
1facf9fc 13850+
7f2ca4b1 13851+ inode = file_inode(file);
e49829fe 13852+ au_mtx_and_read_lock(inode);
9dbd164d 13853+
7f2ca4b1
JR
13854+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13855+ err = PTR_ERR(h_file);
13856+ if (IS_ERR(h_file))
9dbd164d 13857+ goto out;
9dbd164d 13858+
4a4d8108 13859+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
7f2ca4b1 13860+ au_write_post(inode, h_file, &wpre, err);
1facf9fc 13861+
4f0767ce 13862+out:
7f2ca4b1 13863+ si_read_unlock(inode->i_sb);
4a4d8108
AM
13864+ mutex_unlock(&inode->i_mutex);
13865+ return err;
13866+}
1facf9fc 13867+
38d290e6
JR
13868+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
13869+ loff_t len)
13870+{
13871+ long err;
7f2ca4b1 13872+ struct au_write_pre wpre;
38d290e6
JR
13873+ struct inode *inode;
13874+ struct file *h_file;
13875+
7f2ca4b1 13876+ inode = file_inode(file);
38d290e6
JR
13877+ au_mtx_and_read_lock(inode);
13878+
7f2ca4b1
JR
13879+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
13880+ err = PTR_ERR(h_file);
13881+ if (IS_ERR(h_file))
38d290e6 13882+ goto out;
38d290e6
JR
13883+
13884+ lockdep_off();
13885+ err = do_fallocate(h_file, mode, offset, len);
13886+ lockdep_on();
7f2ca4b1 13887+ au_write_post(inode, h_file, &wpre, /*written*/1);
38d290e6
JR
13888+
13889+out:
7f2ca4b1 13890+ si_read_unlock(inode->i_sb);
38d290e6
JR
13891+ mutex_unlock(&inode->i_mutex);
13892+ return err;
13893+}
13894+
4a4d8108
AM
13895+/* ---------------------------------------------------------------------- */
13896+
9dbd164d
AM
13897+/*
13898+ * The locking order around current->mmap_sem.
13899+ * - in most and regular cases
13900+ * file I/O syscall -- aufs_read() or something
13901+ * -- si_rwsem for read -- mmap_sem
13902+ * (Note that [fdi]i_rwsem are released before mmap_sem).
13903+ * - in mmap case
13904+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
13905+ * This AB-BA order is definitly bad, but is not a problem since "si_rwsem for
13906+ * read" allows muliple processes to acquire it and [fdi]i_rwsem are not held in
13907+ * file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
13908+ * It means that when aufs acquires si_rwsem for write, the process should never
13909+ * acquire mmap_sem.
13910+ *
392086de 13911+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
9dbd164d
AM
13912+ * problem either since any directory is not able to be mmap-ed.
13913+ * The similar scenario is applied to aufs_readlink() too.
13914+ */
13915+
38d290e6 13916+#if 0 /* stop calling security_file_mmap() */
2dfbb274
AM
13917+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
13918+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
13919+
13920+static unsigned long au_arch_prot_conv(unsigned long flags)
13921+{
13922+ /* currently ppc64 only */
13923+#ifdef CONFIG_PPC64
13924+ /* cf. linux/arch/powerpc/include/asm/mman.h */
13925+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
13926+ return AuConv_VM_PROT(flags, SAO);
13927+#else
13928+ AuDebugOn(arch_calc_vm_prot_bits(-1));
13929+ return 0;
13930+#endif
13931+}
13932+
13933+static unsigned long au_prot_conv(unsigned long flags)
13934+{
13935+ return AuConv_VM_PROT(flags, READ)
13936+ | AuConv_VM_PROT(flags, WRITE)
13937+ | AuConv_VM_PROT(flags, EXEC)
13938+ | au_arch_prot_conv(flags);
13939+}
13940+
13941+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
13942+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
13943+
13944+static unsigned long au_flag_conv(unsigned long flags)
13945+{
13946+ return AuConv_VM_MAP(flags, GROWSDOWN)
13947+ | AuConv_VM_MAP(flags, DENYWRITE)
2dfbb274
AM
13948+ | AuConv_VM_MAP(flags, LOCKED);
13949+}
38d290e6 13950+#endif
2dfbb274 13951+
9dbd164d 13952+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
dece6358 13953+{
4a4d8108 13954+ int err;
4a4d8108 13955+ const unsigned char wlock
9dbd164d 13956+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
4a4d8108 13957+ struct super_block *sb;
9dbd164d 13958+ struct file *h_file;
7f2ca4b1 13959+ struct inode *inode;
9dbd164d
AM
13960+
13961+ AuDbgVmRegion(file, vma);
1308ab2a 13962+
7f2ca4b1
JR
13963+ inode = file_inode(file);
13964+ sb = inode->i_sb;
9dbd164d 13965+ lockdep_off();
e49829fe 13966+ si_read_lock(sb, AuLock_NOPLMW);
4a4d8108 13967+
7f2ca4b1 13968+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
9dbd164d 13969+ lockdep_on();
7f2ca4b1
JR
13970+ err = PTR_ERR(h_file);
13971+ if (IS_ERR(h_file))
13972+ goto out;
1308ab2a 13973+
7f2ca4b1
JR
13974+ err = 0;
13975+ au_set_mmapped(file);
9dbd164d 13976+ au_vm_file_reset(vma, h_file);
38d290e6
JR
13977+ /*
13978+ * we cannot call security_mmap_file() here since it may acquire
13979+ * mmap_sem or i_mutex.
13980+ *
13981+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
13982+ * au_flag_conv(vma->vm_flags));
13983+ */
9dbd164d
AM
13984+ if (!err)
13985+ err = h_file->f_op->mmap(h_file, vma);
7f2ca4b1
JR
13986+ if (!err) {
13987+ au_vm_prfile_set(vma, file);
13988+ fsstack_copy_attr_atime(inode, file_inode(h_file));
13989+ goto out_fput; /* success */
13990+ }
2cbb1c4b
JR
13991+ au_unset_mmapped(file);
13992+ au_vm_file_reset(vma, file);
7f2ca4b1 13993+
2cbb1c4b 13994+out_fput:
9dbd164d 13995+ lockdep_off();
7f2ca4b1
JR
13996+ ii_write_unlock(inode);
13997+ lockdep_on();
13998+ fput(h_file);
4f0767ce 13999+out:
7f2ca4b1 14000+ lockdep_off();
9dbd164d
AM
14001+ si_read_unlock(sb);
14002+ lockdep_on();
14003+ AuTraceErr(err);
4a4d8108
AM
14004+ return err;
14005+}
14006+
14007+/* ---------------------------------------------------------------------- */
14008+
1e00d052
AM
14009+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
14010+ int datasync)
4a4d8108
AM
14011+{
14012+ int err;
7f2ca4b1 14013+ struct au_write_pre wpre;
4a4d8108
AM
14014+ struct inode *inode;
14015+ struct file *h_file;
4a4d8108
AM
14016+
14017+ err = 0; /* -EBADF; */ /* posix? */
14018+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
7f2ca4b1 14019+ goto out;
4a4d8108 14020+
7f2ca4b1
JR
14021+ inode = file_inode(file);
14022+ au_mtx_and_read_lock(inode);
14023+
14024+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14025+ err = PTR_ERR(h_file);
14026+ if (IS_ERR(h_file))
4a4d8108 14027+ goto out_unlock;
4a4d8108 14028+
53392da6 14029+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
7f2ca4b1 14030+ au_write_post(inode, h_file, &wpre, /*written*/0);
4a4d8108 14031+
4f0767ce 14032+out_unlock:
7f2ca4b1 14033+ si_read_unlock(inode->i_sb);
1e00d052 14034+ mutex_unlock(&inode->i_mutex);
7f2ca4b1 14035+out:
4a4d8108 14036+ return err;
dece6358
AM
14037+}
14038+
4a4d8108
AM
14039+/* no one supports this operation, currently */
14040+#if 0
14041+static int aufs_aio_fsync_nondir(struct kiocb *kio, int datasync)
dece6358 14042+{
4a4d8108 14043+ int err;
7f2ca4b1 14044+ struct au_write_pre wpre;
4a4d8108
AM
14045+ struct inode *inode;
14046+ struct file *file, *h_file;
1308ab2a 14047+
4a4d8108
AM
14048+ err = 0; /* -EBADF; */ /* posix? */
14049+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
14050+ goto out;
1308ab2a 14051+
7f2ca4b1
JR
14052+ file = kio->ki_filp;
14053+ inode = file_inode(file);
14054+ au_mtx_and_read_lock(inode);
14055+
14056+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
14057+ err = PTR_ERR(h_file);
14058+ if (IS_ERR(h_file))
4a4d8108 14059+ goto out_unlock;
1308ab2a 14060+
4a4d8108
AM
14061+ err = -ENOSYS;
14062+ h_file = au_hf_top(file);
523b37e3 14063+ if (h_file->f_op->aio_fsync) {
4a4d8108 14064+ struct mutex *h_mtx;
1308ab2a 14065+
c06a8ce3 14066+ h_mtx = &file_inode(h_file)->i_mutex;
4a4d8108
AM
14067+ if (!is_sync_kiocb(kio)) {
14068+ get_file(h_file);
14069+ fput(file);
14070+ }
14071+ kio->ki_filp = h_file;
14072+ err = h_file->f_op->aio_fsync(kio, datasync);
14073+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
14074+ if (!err)
14075+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL);
14076+ /*ignore*/
4a4d8108
AM
14077+ mutex_unlock(h_mtx);
14078+ }
7f2ca4b1 14079+ au_write_post(inode, h_file, &wpre, /*written*/0);
1308ab2a 14080+
4f0767ce 14081+out_unlock:
e49829fe 14082+ si_read_unlock(inode->sb);
4a4d8108 14083+ mutex_unlock(&inode->i_mutex);
7f2ca4b1 14084+out:
4a4d8108 14085+ return err;
dece6358 14086+}
4a4d8108 14087+#endif
dece6358 14088+
4a4d8108 14089+static int aufs_fasync(int fd, struct file *file, int flag)
dece6358 14090+{
4a4d8108
AM
14091+ int err;
14092+ struct file *h_file;
4a4d8108 14093+ struct super_block *sb;
1308ab2a 14094+
7f2ca4b1 14095+ sb = file->f_dentry->d_sb;
e49829fe 14096+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
7f2ca4b1
JR
14097+
14098+ h_file = au_read_pre(file, /*keep_fi*/0);
14099+ err = PTR_ERR(h_file);
14100+ if (IS_ERR(h_file))
4a4d8108
AM
14101+ goto out;
14102+
523b37e3 14103+ if (h_file->f_op->fasync)
4a4d8108 14104+ err = h_file->f_op->fasync(fd, h_file, flag);
7f2ca4b1 14105+ fput(h_file); /* instead of au_read_post() */
1308ab2a 14106+
4f0767ce 14107+out:
4a4d8108 14108+ si_read_unlock(sb);
1308ab2a 14109+ return err;
dece6358 14110+}
4a4d8108
AM
14111+
14112+/* ---------------------------------------------------------------------- */
14113+
14114+/* no one supports this operation, currently */
14115+#if 0
14116+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
7f2ca4b1 14117+ size_t len, loff_t *pos, int more)
4a4d8108
AM
14118+{
14119+}
14120+#endif
14121+
14122+/* ---------------------------------------------------------------------- */
14123+
14124+const struct file_operations aufs_file_fop = {
14125+ .owner = THIS_MODULE,
2cbb1c4b 14126+
027c5e7a 14127+ .llseek = default_llseek,
4a4d8108
AM
14128+
14129+ .read = aufs_read,
14130+ .write = aufs_write,
076b876e
AM
14131+ .read_iter = aufs_read_iter,
14132+ .write_iter = aufs_write_iter,
14133+
4a4d8108
AM
14134+#ifdef CONFIG_AUFS_POLL
14135+ .poll = aufs_poll,
14136+#endif
14137+ .unlocked_ioctl = aufs_ioctl_nondir,
b752ccd1 14138+#ifdef CONFIG_COMPAT
c2b27bf2 14139+ .compat_ioctl = aufs_compat_ioctl_nondir,
b752ccd1 14140+#endif
4a4d8108
AM
14141+ .mmap = aufs_mmap,
14142+ .open = aufs_open_nondir,
14143+ .flush = aufs_flush_nondir,
14144+ .release = aufs_release_nondir,
14145+ .fsync = aufs_fsync_nondir,
14146+ /* .aio_fsync = aufs_aio_fsync_nondir, */
14147+ .fasync = aufs_fasync,
14148+ /* .sendpage = aufs_sendpage, */
14149+ .splice_write = aufs_splice_write,
14150+ .splice_read = aufs_splice_read,
14151+#if 0
14152+ .aio_splice_write = aufs_aio_splice_write,
38d290e6 14153+ .aio_splice_read = aufs_aio_splice_read,
4a4d8108 14154+#endif
38d290e6 14155+ .fallocate = aufs_fallocate
4a4d8108 14156+};
7f207e10
AM
14157diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
14158--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
14159+++ linux/fs/aufs/fstype.h 2016-02-28 11:27:01.280579017 +0100
14160@@ -0,0 +1,400 @@
4a4d8108 14161+/*
7f2ca4b1 14162+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
14163+ *
14164+ * This program, aufs is free software; you can redistribute it and/or modify
14165+ * it under the terms of the GNU General Public License as published by
14166+ * the Free Software Foundation; either version 2 of the License, or
14167+ * (at your option) any later version.
14168+ *
14169+ * This program is distributed in the hope that it will be useful,
14170+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14171+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14172+ * GNU General Public License for more details.
14173+ *
14174+ * You should have received a copy of the GNU General Public License
523b37e3 14175+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
14176+ */
14177+
14178+/*
14179+ * judging filesystem type
14180+ */
14181+
14182+#ifndef __AUFS_FSTYPE_H__
14183+#define __AUFS_FSTYPE_H__
14184+
14185+#ifdef __KERNEL__
14186+
14187+#include <linux/fs.h>
14188+#include <linux/magic.h>
7f2ca4b1 14189+#include <linux/nfs_fs.h>
4a4d8108 14190+#include <linux/romfs_fs.h>
4a4d8108
AM
14191+
14192+static inline int au_test_aufs(struct super_block *sb)
14193+{
14194+ return sb->s_magic == AUFS_SUPER_MAGIC;
14195+}
14196+
14197+static inline const char *au_sbtype(struct super_block *sb)
14198+{
14199+ return sb->s_type->name;
14200+}
1308ab2a 14201+
14202+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
14203+{
7f2ca4b1
JR
14204+#if defined(CONFIG_ISO9660_FS) || defined(CONFIG_ISO9660_FS_MODULE)
14205+ return sb->s_magic == ISOFS_SUPER_MAGIC;
dece6358
AM
14206+#else
14207+ return 0;
14208+#endif
14209+}
14210+
1308ab2a 14211+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
dece6358 14212+{
7f2ca4b1
JR
14213+#if defined(CONFIG_ROMFS_FS) || defined(CONFIG_ROMFS_FS_MODULE)
14214+ return sb->s_magic == ROMFS_MAGIC;
dece6358
AM
14215+#else
14216+ return 0;
14217+#endif
14218+}
14219+
1308ab2a 14220+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
dece6358 14221+{
1308ab2a 14222+#if defined(CONFIG_CRAMFS) || defined(CONFIG_CRAMFS_MODULE)
14223+ return sb->s_magic == CRAMFS_MAGIC;
14224+#endif
14225+ return 0;
14226+}
14227+
14228+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
14229+{
14230+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE)
14231+ return sb->s_magic == NFS_SUPER_MAGIC;
dece6358
AM
14232+#else
14233+ return 0;
14234+#endif
14235+}
14236+
1308ab2a 14237+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
dece6358 14238+{
1308ab2a 14239+#if defined(CONFIG_FUSE_FS) || defined(CONFIG_FUSE_FS_MODULE)
14240+ return sb->s_magic == FUSE_SUPER_MAGIC;
dece6358
AM
14241+#else
14242+ return 0;
14243+#endif
14244+}
14245+
1308ab2a 14246+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
dece6358 14247+{
1308ab2a 14248+#if defined(CONFIG_XFS_FS) || defined(CONFIG_XFS_FS_MODULE)
14249+ return sb->s_magic == XFS_SB_MAGIC;
dece6358
AM
14250+#else
14251+ return 0;
14252+#endif
14253+}
14254+
1308ab2a 14255+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
dece6358 14256+{
1308ab2a 14257+#ifdef CONFIG_TMPFS
14258+ return sb->s_magic == TMPFS_MAGIC;
14259+#else
14260+ return 0;
dece6358 14261+#endif
dece6358
AM
14262+}
14263+
1308ab2a 14264+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
1facf9fc 14265+{
1308ab2a 14266+#if defined(CONFIG_ECRYPT_FS) || defined(CONFIG_ECRYPT_FS_MODULE)
14267+ return !strcmp(au_sbtype(sb), "ecryptfs");
14268+#else
14269+ return 0;
14270+#endif
1facf9fc 14271+}
14272+
1308ab2a 14273+static inline int au_test_ramfs(struct super_block *sb)
14274+{
14275+ return sb->s_magic == RAMFS_MAGIC;
14276+}
14277+
14278+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
14279+{
14280+#if defined(CONFIG_UBIFS_FS) || defined(CONFIG_UBIFS_FS_MODULE)
14281+ return sb->s_magic == UBIFS_SUPER_MAGIC;
14282+#else
14283+ return 0;
14284+#endif
14285+}
14286+
14287+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
14288+{
14289+#ifdef CONFIG_PROC_FS
14290+ return sb->s_magic == PROC_SUPER_MAGIC;
14291+#else
14292+ return 0;
14293+#endif
14294+}
14295+
14296+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
14297+{
14298+#ifdef CONFIG_SYSFS
14299+ return sb->s_magic == SYSFS_MAGIC;
14300+#else
14301+ return 0;
14302+#endif
14303+}
14304+
14305+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
14306+{
14307+#if defined(CONFIG_CONFIGFS_FS) || defined(CONFIG_CONFIGFS_FS_MODULE)
14308+ return sb->s_magic == CONFIGFS_MAGIC;
14309+#else
14310+ return 0;
14311+#endif
14312+}
14313+
14314+static inline int au_test_minix(struct super_block *sb __maybe_unused)
14315+{
14316+#if defined(CONFIG_MINIX_FS) || defined(CONFIG_MINIX_FS_MODULE)
14317+ return sb->s_magic == MINIX3_SUPER_MAGIC
14318+ || sb->s_magic == MINIX2_SUPER_MAGIC
14319+ || sb->s_magic == MINIX2_SUPER_MAGIC2
14320+ || sb->s_magic == MINIX_SUPER_MAGIC
14321+ || sb->s_magic == MINIX_SUPER_MAGIC2;
14322+#else
14323+ return 0;
14324+#endif
14325+}
14326+
1308ab2a 14327+static inline int au_test_fat(struct super_block *sb __maybe_unused)
14328+{
14329+#if defined(CONFIG_FAT_FS) || defined(CONFIG_FAT_FS_MODULE)
14330+ return sb->s_magic == MSDOS_SUPER_MAGIC;
14331+#else
14332+ return 0;
14333+#endif
14334+}
14335+
14336+static inline int au_test_msdos(struct super_block *sb)
14337+{
14338+ return au_test_fat(sb);
14339+}
14340+
14341+static inline int au_test_vfat(struct super_block *sb)
14342+{
14343+ return au_test_fat(sb);
14344+}
14345+
14346+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
14347+{
14348+#ifdef CONFIG_SECURITYFS
14349+ return sb->s_magic == SECURITYFS_MAGIC;
14350+#else
14351+ return 0;
14352+#endif
14353+}
14354+
14355+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
14356+{
14357+#if defined(CONFIG_SQUASHFS) || defined(CONFIG_SQUASHFS_MODULE)
14358+ return sb->s_magic == SQUASHFS_MAGIC;
14359+#else
14360+ return 0;
14361+#endif
14362+}
14363+
14364+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
14365+{
14366+#if defined(CONFIG_BTRFS_FS) || defined(CONFIG_BTRFS_FS_MODULE)
14367+ return sb->s_magic == BTRFS_SUPER_MAGIC;
14368+#else
14369+ return 0;
14370+#endif
14371+}
14372+
14373+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
14374+{
14375+#if defined(CONFIG_XENFS) || defined(CONFIG_XENFS_MODULE)
14376+ return sb->s_magic == XENFS_SUPER_MAGIC;
14377+#else
14378+ return 0;
14379+#endif
14380+}
14381+
14382+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
14383+{
14384+#ifdef CONFIG_DEBUG_FS
14385+ return sb->s_magic == DEBUGFS_MAGIC;
14386+#else
14387+ return 0;
14388+#endif
14389+}
14390+
14391+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
14392+{
14393+#if defined(CONFIG_NILFS) || defined(CONFIG_NILFS_MODULE)
14394+ return sb->s_magic == NILFS_SUPER_MAGIC;
14395+#else
14396+ return 0;
14397+#endif
14398+}
14399+
4a4d8108
AM
14400+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
14401+{
14402+#if defined(CONFIG_HFSPLUS_FS) || defined(CONFIG_HFSPLUS_FS_MODULE)
14403+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
14404+#else
14405+ return 0;
14406+#endif
14407+}
14408+
1308ab2a 14409+/* ---------------------------------------------------------------------- */
14410+/*
14411+ * they can't be an aufs branch.
14412+ */
14413+static inline int au_test_fs_unsuppoted(struct super_block *sb)
14414+{
14415+ return
14416+#ifndef CONFIG_AUFS_BR_RAMFS
14417+ au_test_ramfs(sb) ||
14418+#endif
14419+ au_test_procfs(sb)
14420+ || au_test_sysfs(sb)
14421+ || au_test_configfs(sb)
14422+ || au_test_debugfs(sb)
14423+ || au_test_securityfs(sb)
14424+ || au_test_xenfs(sb)
14425+ || au_test_ecryptfs(sb)
14426+ /* || !strcmp(au_sbtype(sb), "unionfs") */
14427+ || au_test_aufs(sb); /* will be supported in next version */
14428+}
14429+
1308ab2a 14430+static inline int au_test_fs_remote(struct super_block *sb)
14431+{
14432+ return !au_test_tmpfs(sb)
14433+#ifdef CONFIG_AUFS_BR_RAMFS
14434+ && !au_test_ramfs(sb)
14435+#endif
14436+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
14437+}
14438+
14439+/* ---------------------------------------------------------------------- */
14440+
14441+/*
14442+ * Note: these functions (below) are created after reading ->getattr() in all
14443+ * filesystems under linux/fs. it means we have to do so in every update...
14444+ */
14445+
14446+/*
14447+ * some filesystems require getattr to refresh the inode attributes before
14448+ * referencing.
14449+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
14450+ * and leave the work for d_revalidate()
14451+ */
14452+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
14453+{
14454+ return au_test_nfs(sb)
14455+ || au_test_fuse(sb)
1308ab2a 14456+ /* || au_test_btrfs(sb) */ /* untested */
1308ab2a 14457+ ;
14458+}
14459+
14460+/*
14461+ * filesystems which don't maintain i_size or i_blocks.
14462+ */
14463+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
14464+{
14465+ return au_test_xfs(sb)
4a4d8108
AM
14466+ || au_test_btrfs(sb)
14467+ || au_test_ubifs(sb)
14468+ || au_test_hfsplus(sb) /* maintained, but incorrect */
1308ab2a 14469+ /* || au_test_minix(sb) */ /* untested */
14470+ ;
14471+}
14472+
14473+/*
14474+ * filesystems which don't store the correct value in some of their inode
14475+ * attributes.
14476+ */
14477+static inline int au_test_fs_bad_iattr(struct super_block *sb)
14478+{
14479+ return au_test_fs_bad_iattr_size(sb)
1308ab2a 14480+ || au_test_fat(sb)
14481+ || au_test_msdos(sb)
14482+ || au_test_vfat(sb);
1facf9fc 14483+}
14484+
14485+/* they don't check i_nlink in link(2) */
14486+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
14487+{
14488+ return au_test_tmpfs(sb)
14489+#ifdef CONFIG_AUFS_BR_RAMFS
14490+ || au_test_ramfs(sb)
14491+#endif
4a4d8108 14492+ || au_test_ubifs(sb)
4a4d8108 14493+ || au_test_hfsplus(sb);
1facf9fc 14494+}
14495+
14496+/*
14497+ * filesystems which sets S_NOATIME and S_NOCMTIME.
14498+ */
14499+static inline int au_test_fs_notime(struct super_block *sb)
14500+{
14501+ return au_test_nfs(sb)
14502+ || au_test_fuse(sb)
dece6358 14503+ || au_test_ubifs(sb)
1facf9fc 14504+ ;
14505+}
14506+
1facf9fc 14507+/* temporary support for i#1 in cramfs */
14508+static inline int au_test_fs_unique_ino(struct inode *inode)
14509+{
14510+ if (au_test_cramfs(inode->i_sb))
14511+ return inode->i_ino != 1;
14512+ return 1;
14513+}
14514+
14515+/* ---------------------------------------------------------------------- */
14516+
14517+/*
14518+ * the filesystem where the xino files placed must support i/o after unlink and
14519+ * maintain i_size and i_blocks.
14520+ */
14521+static inline int au_test_fs_bad_xino(struct super_block *sb)
14522+{
14523+ return au_test_fs_remote(sb)
14524+ || au_test_fs_bad_iattr_size(sb)
1facf9fc 14525+ /* don't want unnecessary work for xino */
14526+ || au_test_aufs(sb)
1308ab2a 14527+ || au_test_ecryptfs(sb)
14528+ || au_test_nilfs(sb);
1facf9fc 14529+}
14530+
14531+static inline int au_test_fs_trunc_xino(struct super_block *sb)
14532+{
14533+ return au_test_tmpfs(sb)
14534+ || au_test_ramfs(sb);
14535+}
14536+
14537+/*
14538+ * test if the @sb is real-readonly.
14539+ */
14540+static inline int au_test_fs_rr(struct super_block *sb)
14541+{
14542+ return au_test_squashfs(sb)
14543+ || au_test_iso9660(sb)
14544+ || au_test_cramfs(sb)
14545+ || au_test_romfs(sb);
14546+}
14547+
7f2ca4b1
JR
14548+/*
14549+ * test if the @inode is nfs with 'noacl' option
14550+ * NFS always sets MS_POSIXACL regardless its mount option 'noacl.'
14551+ */
14552+static inline int au_test_nfs_noacl(struct inode *inode)
14553+{
14554+ return au_test_nfs(inode->i_sb)
14555+ /* && IS_POSIXACL(inode) */
14556+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
14557+}
14558+
1facf9fc 14559+#endif /* __KERNEL__ */
14560+#endif /* __AUFS_FSTYPE_H__ */
7f207e10
AM
14561diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
14562--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 14563+++ linux/fs/aufs/hfsnotify.c 2016-02-28 11:27:01.280579017 +0100
c1595e42 14564@@ -0,0 +1,288 @@
1facf9fc 14565+/*
7f2ca4b1 14566+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 14567+ *
14568+ * This program, aufs is free software; you can redistribute it and/or modify
14569+ * it under the terms of the GNU General Public License as published by
14570+ * the Free Software Foundation; either version 2 of the License, or
14571+ * (at your option) any later version.
dece6358
AM
14572+ *
14573+ * This program is distributed in the hope that it will be useful,
14574+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14575+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14576+ * GNU General Public License for more details.
14577+ *
14578+ * You should have received a copy of the GNU General Public License
523b37e3 14579+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 14580+ */
14581+
14582+/*
4a4d8108 14583+ * fsnotify for the lower directories
1facf9fc 14584+ */
14585+
14586+#include "aufs.h"
14587+
4a4d8108
AM
14588+/* FS_IN_IGNORED is unnecessary */
14589+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
14590+ | FS_CREATE | FS_EVENT_ON_CHILD);
7f207e10 14591+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
7eafdf33 14592+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
1facf9fc 14593+
0c5527e5 14594+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
1facf9fc 14595+{
0c5527e5
AM
14596+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
14597+ hn_mark);
4a4d8108 14598+ AuDbg("here\n");
7eafdf33 14599+ au_cache_free_hnotify(hn);
076b876e 14600+ smp_mb__before_atomic();
1716fcea
AM
14601+ if (atomic64_dec_and_test(&au_hfsn_ifree))
14602+ wake_up(&au_hfsn_wq);
4a4d8108 14603+}
1facf9fc 14604+
027c5e7a 14605+static int au_hfsn_alloc(struct au_hinode *hinode)
4a4d8108 14606+{
1716fcea 14607+ int err;
027c5e7a
AM
14608+ struct au_hnotify *hn;
14609+ struct super_block *sb;
14610+ struct au_branch *br;
0c5527e5 14611+ struct fsnotify_mark *mark;
027c5e7a 14612+ aufs_bindex_t bindex;
1facf9fc 14613+
027c5e7a
AM
14614+ hn = hinode->hi_notify;
14615+ sb = hn->hn_aufs_inode->i_sb;
14616+ bindex = au_br_index(sb, hinode->hi_id);
14617+ br = au_sbr(sb, bindex);
1716fcea
AM
14618+ AuDebugOn(!br->br_hfsn);
14619+
0c5527e5
AM
14620+ mark = &hn->hn_mark;
14621+ fsnotify_init_mark(mark, au_hfsn_free_mark);
14622+ mark->mask = AuHfsnMask;
7f207e10
AM
14623+ /*
14624+ * by udba rename or rmdir, aufs assign a new inode to the known
14625+ * h_inode, so specify 1 to allow dups.
14626+ */
c1595e42 14627+ lockdep_off();
1716fcea 14628+ err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
027c5e7a 14629+ /*mnt*/NULL, /*allow_dups*/1);
1716fcea
AM
14630+ /* even if err */
14631+ fsnotify_put_mark(mark);
c1595e42 14632+ lockdep_on();
1716fcea
AM
14633+
14634+ return err;
1facf9fc 14635+}
14636+
7eafdf33 14637+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
1facf9fc 14638+{
0c5527e5 14639+ struct fsnotify_mark *mark;
7eafdf33 14640+ unsigned long long ull;
1716fcea 14641+ struct fsnotify_group *group;
7eafdf33
AM
14642+
14643+ ull = atomic64_inc_return(&au_hfsn_ifree);
14644+ BUG_ON(!ull);
953406b4 14645+
0c5527e5 14646+ mark = &hn->hn_mark;
1716fcea
AM
14647+ spin_lock(&mark->lock);
14648+ group = mark->group;
14649+ fsnotify_get_group(group);
14650+ spin_unlock(&mark->lock);
c1595e42 14651+ lockdep_off();
1716fcea
AM
14652+ fsnotify_destroy_mark(mark, group);
14653+ fsnotify_put_group(group);
c1595e42 14654+ lockdep_on();
7f207e10 14655+
7eafdf33
AM
14656+ /* free hn by myself */
14657+ return 0;
1facf9fc 14658+}
14659+
14660+/* ---------------------------------------------------------------------- */
14661+
4a4d8108 14662+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
1facf9fc 14663+{
0c5527e5 14664+ struct fsnotify_mark *mark;
1facf9fc 14665+
0c5527e5
AM
14666+ mark = &hinode->hi_notify->hn_mark;
14667+ spin_lock(&mark->lock);
1facf9fc 14668+ if (do_set) {
0c5527e5
AM
14669+ AuDebugOn(mark->mask & AuHfsnMask);
14670+ mark->mask |= AuHfsnMask;
1facf9fc 14671+ } else {
0c5527e5
AM
14672+ AuDebugOn(!(mark->mask & AuHfsnMask));
14673+ mark->mask &= ~AuHfsnMask;
1facf9fc 14674+ }
0c5527e5 14675+ spin_unlock(&mark->lock);
4a4d8108 14676+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
1facf9fc 14677+}
14678+
4a4d8108 14679+/* ---------------------------------------------------------------------- */
1facf9fc 14680+
4a4d8108
AM
14681+/* #define AuDbgHnotify */
14682+#ifdef AuDbgHnotify
14683+static char *au_hfsn_name(u32 mask)
14684+{
14685+#ifdef CONFIG_AUFS_DEBUG
c06a8ce3
AM
14686+#define test_ret(flag) \
14687+ do { \
14688+ if (mask & flag) \
14689+ return #flag; \
14690+ } while (0)
4a4d8108
AM
14691+ test_ret(FS_ACCESS);
14692+ test_ret(FS_MODIFY);
14693+ test_ret(FS_ATTRIB);
14694+ test_ret(FS_CLOSE_WRITE);
14695+ test_ret(FS_CLOSE_NOWRITE);
14696+ test_ret(FS_OPEN);
14697+ test_ret(FS_MOVED_FROM);
14698+ test_ret(FS_MOVED_TO);
14699+ test_ret(FS_CREATE);
14700+ test_ret(FS_DELETE);
14701+ test_ret(FS_DELETE_SELF);
14702+ test_ret(FS_MOVE_SELF);
14703+ test_ret(FS_UNMOUNT);
14704+ test_ret(FS_Q_OVERFLOW);
14705+ test_ret(FS_IN_IGNORED);
7f2ca4b1 14706+ test_ret(FS_ISDIR);
4a4d8108
AM
14707+ test_ret(FS_IN_ONESHOT);
14708+ test_ret(FS_EVENT_ON_CHILD);
14709+ return "";
14710+#undef test_ret
14711+#else
14712+ return "??";
14713+#endif
1facf9fc 14714+}
4a4d8108 14715+#endif
1facf9fc 14716+
14717+/* ---------------------------------------------------------------------- */
14718+
1716fcea
AM
14719+static void au_hfsn_free_group(struct fsnotify_group *group)
14720+{
14721+ struct au_br_hfsnotify *hfsn = group->private;
14722+
14723+ AuDbg("here\n");
14724+ kfree(hfsn);
14725+}
14726+
4a4d8108 14727+static int au_hfsn_handle_event(struct fsnotify_group *group,
fb47a38f 14728+ struct inode *inode,
0c5527e5
AM
14729+ struct fsnotify_mark *inode_mark,
14730+ struct fsnotify_mark *vfsmount_mark,
fb47a38f
JR
14731+ u32 mask, void *data, int data_type,
14732+ const unsigned char *file_name, u32 cookie)
1facf9fc 14733+{
14734+ int err;
4a4d8108
AM
14735+ struct au_hnotify *hnotify;
14736+ struct inode *h_dir, *h_inode;
fb47a38f 14737+ struct qstr h_child_qstr = QSTR_INIT(file_name, strlen(file_name));
4a4d8108 14738+
fb47a38f 14739+ AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
1facf9fc 14740+
14741+ err = 0;
0c5527e5 14742+ /* if FS_UNMOUNT happens, there must be another bug */
4a4d8108 14743+ AuDebugOn(mask & FS_UNMOUNT);
0c5527e5 14744+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
1facf9fc 14745+ goto out;
1facf9fc 14746+
fb47a38f
JR
14747+ h_dir = inode;
14748+ h_inode = NULL;
4a4d8108 14749+#ifdef AuDbgHnotify
392086de 14750+ au_debug_on();
4a4d8108
AM
14751+ if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
14752+ || strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
14753+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
14754+ h_dir->i_ino, mask, au_hfsn_name(mask),
14755+ AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
14756+ /* WARN_ON(1); */
1facf9fc 14757+ }
392086de 14758+ au_debug_off();
1facf9fc 14759+#endif
4a4d8108 14760+
0c5527e5
AM
14761+ AuDebugOn(!inode_mark);
14762+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
14763+ err = au_hnotify(h_dir, hnotify, mask, &h_child_qstr, h_inode);
1facf9fc 14764+
4a4d8108
AM
14765+out:
14766+ return err;
14767+}
1facf9fc 14768+
4a4d8108 14769+static struct fsnotify_ops au_hfsn_ops = {
1716fcea
AM
14770+ .handle_event = au_hfsn_handle_event,
14771+ .free_group_priv = au_hfsn_free_group
4a4d8108
AM
14772+};
14773+
14774+/* ---------------------------------------------------------------------- */
14775+
027c5e7a
AM
14776+static void au_hfsn_fin_br(struct au_branch *br)
14777+{
1716fcea 14778+ struct au_br_hfsnotify *hfsn;
027c5e7a 14779+
1716fcea 14780+ hfsn = br->br_hfsn;
c1595e42
JR
14781+ if (hfsn) {
14782+ lockdep_off();
1716fcea 14783+ fsnotify_put_group(hfsn->hfsn_group);
c1595e42
JR
14784+ lockdep_on();
14785+ }
027c5e7a
AM
14786+}
14787+
1716fcea 14788+static int au_hfsn_init_br(struct au_branch *br, int perm)
4a4d8108
AM
14789+{
14790+ int err;
1716fcea
AM
14791+ struct fsnotify_group *group;
14792+ struct au_br_hfsnotify *hfsn;
1facf9fc 14793+
4a4d8108 14794+ err = 0;
1716fcea
AM
14795+ br->br_hfsn = NULL;
14796+ if (!au_br_hnotifyable(perm))
027c5e7a 14797+ goto out;
027c5e7a 14798+
1716fcea
AM
14799+ err = -ENOMEM;
14800+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
14801+ if (unlikely(!hfsn))
027c5e7a
AM
14802+ goto out;
14803+
1716fcea
AM
14804+ err = 0;
14805+ group = fsnotify_alloc_group(&au_hfsn_ops);
14806+ if (IS_ERR(group)) {
14807+ err = PTR_ERR(group);
0c5527e5 14808+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
1716fcea 14809+ goto out_hfsn;
4a4d8108 14810+ }
1facf9fc 14811+
1716fcea
AM
14812+ group->private = hfsn;
14813+ hfsn->hfsn_group = group;
14814+ br->br_hfsn = hfsn;
14815+ goto out; /* success */
14816+
14817+out_hfsn:
14818+ kfree(hfsn);
027c5e7a 14819+out:
1716fcea
AM
14820+ return err;
14821+}
14822+
14823+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
14824+{
14825+ int err;
14826+
14827+ err = 0;
14828+ if (!br->br_hfsn)
14829+ err = au_hfsn_init_br(br, perm);
14830+
1facf9fc 14831+ return err;
14832+}
14833+
7eafdf33
AM
14834+/* ---------------------------------------------------------------------- */
14835+
14836+static void au_hfsn_fin(void)
14837+{
14838+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
14839+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
14840+}
14841+
4a4d8108
AM
14842+const struct au_hnotify_op au_hnotify_op = {
14843+ .ctl = au_hfsn_ctl,
14844+ .alloc = au_hfsn_alloc,
14845+ .free = au_hfsn_free,
1facf9fc 14846+
7eafdf33
AM
14847+ .fin = au_hfsn_fin,
14848+
027c5e7a
AM
14849+ .reset_br = au_hfsn_reset_br,
14850+ .fin_br = au_hfsn_fin_br,
14851+ .init_br = au_hfsn_init_br
4a4d8108 14852+};
7f207e10
AM
14853diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
14854--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 14855+++ linux/fs/aufs/hfsplus.c 2016-02-28 11:27:01.280579017 +0100
523b37e3 14856@@ -0,0 +1,56 @@
4a4d8108 14857+/*
7f2ca4b1 14858+ * Copyright (C) 2010-2016 Junjiro R. Okajima
4a4d8108
AM
14859+ *
14860+ * This program, aufs is free software; you can redistribute it and/or modify
14861+ * it under the terms of the GNU General Public License as published by
14862+ * the Free Software Foundation; either version 2 of the License, or
14863+ * (at your option) any later version.
14864+ *
14865+ * This program is distributed in the hope that it will be useful,
14866+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14867+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14868+ * GNU General Public License for more details.
14869+ *
14870+ * You should have received a copy of the GNU General Public License
523b37e3 14871+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 14872+ */
1facf9fc 14873+
4a4d8108
AM
14874+/*
14875+ * special support for filesystems which aqucires an inode mutex
14876+ * at final closing a file, eg, hfsplus.
14877+ *
14878+ * This trick is very simple and stupid, just to open the file before really
14879+ * neceeary open to tell hfsplus that this is not the final closing.
14880+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
14881+ * and au_h_open_post() after releasing it.
14882+ */
1facf9fc 14883+
4a4d8108 14884+#include "aufs.h"
1facf9fc 14885+
392086de
AM
14886+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
14887+ int force_wr)
4a4d8108
AM
14888+{
14889+ struct file *h_file;
14890+ struct dentry *h_dentry;
1facf9fc 14891+
4a4d8108
AM
14892+ h_dentry = au_h_dptr(dentry, bindex);
14893+ AuDebugOn(!h_dentry);
14894+ AuDebugOn(!h_dentry->d_inode);
4a4d8108
AM
14895+
14896+ h_file = NULL;
14897+ if (au_test_hfsplus(h_dentry->d_sb)
14898+ && S_ISREG(h_dentry->d_inode->i_mode))
14899+ h_file = au_h_open(dentry, bindex,
14900+ O_RDONLY | O_NOATIME | O_LARGEFILE,
392086de 14901+ /*file*/NULL, force_wr);
4a4d8108 14902+ return h_file;
1facf9fc 14903+}
14904+
4a4d8108
AM
14905+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
14906+ struct file *h_file)
14907+{
14908+ if (h_file) {
14909+ fput(h_file);
14910+ au_sbr_put(dentry->d_sb, bindex);
14911+ }
14912+}
7f207e10
AM
14913diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
14914--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 14915+++ linux/fs/aufs/hnotify.c 2016-02-28 12:40:45.724388517 +0100
076b876e 14916@@ -0,0 +1,714 @@
e49829fe 14917+/*
7f2ca4b1 14918+ * Copyright (C) 2005-2016 Junjiro R. Okajima
e49829fe
JR
14919+ *
14920+ * This program, aufs is free software; you can redistribute it and/or modify
14921+ * it under the terms of the GNU General Public License as published by
14922+ * the Free Software Foundation; either version 2 of the License, or
14923+ * (at your option) any later version.
14924+ *
14925+ * This program is distributed in the hope that it will be useful,
14926+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14927+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14928+ * GNU General Public License for more details.
14929+ *
14930+ * You should have received a copy of the GNU General Public License
523b37e3 14931+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
14932+ */
14933+
14934+/*
7f207e10 14935+ * abstraction to notify the direct changes on lower directories
e49829fe
JR
14936+ */
14937+
14938+#include "aufs.h"
14939+
027c5e7a 14940+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
e49829fe
JR
14941+{
14942+ int err;
7f207e10 14943+ struct au_hnotify *hn;
1facf9fc 14944+
4a4d8108
AM
14945+ err = -ENOMEM;
14946+ hn = au_cache_alloc_hnotify();
14947+ if (hn) {
14948+ hn->hn_aufs_inode = inode;
027c5e7a
AM
14949+ hinode->hi_notify = hn;
14950+ err = au_hnotify_op.alloc(hinode);
14951+ AuTraceErr(err);
14952+ if (unlikely(err)) {
14953+ hinode->hi_notify = NULL;
4a4d8108
AM
14954+ au_cache_free_hnotify(hn);
14955+ /*
14956+ * The upper dir was removed by udba, but the same named
14957+ * dir left. In this case, aufs assignes a new inode
14958+ * number and set the monitor again.
14959+ * For the lower dir, the old monitnor is still left.
14960+ */
14961+ if (err == -EEXIST)
14962+ err = 0;
14963+ }
1308ab2a 14964+ }
1308ab2a 14965+
027c5e7a 14966+ AuTraceErr(err);
1308ab2a 14967+ return err;
dece6358 14968+}
1facf9fc 14969+
4a4d8108 14970+void au_hn_free(struct au_hinode *hinode)
dece6358 14971+{
4a4d8108 14972+ struct au_hnotify *hn;
1facf9fc 14973+
4a4d8108
AM
14974+ hn = hinode->hi_notify;
14975+ if (hn) {
4a4d8108 14976+ hinode->hi_notify = NULL;
7eafdf33
AM
14977+ if (au_hnotify_op.free(hinode, hn))
14978+ au_cache_free_hnotify(hn);
4a4d8108
AM
14979+ }
14980+}
dece6358 14981+
4a4d8108 14982+/* ---------------------------------------------------------------------- */
dece6358 14983+
4a4d8108
AM
14984+void au_hn_ctl(struct au_hinode *hinode, int do_set)
14985+{
14986+ if (hinode->hi_notify)
14987+ au_hnotify_op.ctl(hinode, do_set);
14988+}
14989+
14990+void au_hn_reset(struct inode *inode, unsigned int flags)
14991+{
14992+ aufs_bindex_t bindex, bend;
14993+ struct inode *hi;
14994+ struct dentry *iwhdentry;
1facf9fc 14995+
1308ab2a 14996+ bend = au_ibend(inode);
4a4d8108
AM
14997+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
14998+ hi = au_h_iptr(inode, bindex);
14999+ if (!hi)
15000+ continue;
1308ab2a 15001+
4a4d8108
AM
15002+ /* mutex_lock_nested(&hi->i_mutex, AuLsc_I_CHILD); */
15003+ iwhdentry = au_hi_wh(inode, bindex);
15004+ if (iwhdentry)
15005+ dget(iwhdentry);
15006+ au_igrab(hi);
15007+ au_set_h_iptr(inode, bindex, NULL, 0);
15008+ au_set_h_iptr(inode, bindex, au_igrab(hi),
15009+ flags & ~AuHi_XINO);
15010+ iput(hi);
15011+ dput(iwhdentry);
15012+ /* mutex_unlock(&hi->i_mutex); */
1facf9fc 15013+ }
1facf9fc 15014+}
15015+
1308ab2a 15016+/* ---------------------------------------------------------------------- */
1facf9fc 15017+
4a4d8108 15018+static int hn_xino(struct inode *inode, struct inode *h_inode)
1facf9fc 15019+{
4a4d8108
AM
15020+ int err;
15021+ aufs_bindex_t bindex, bend, bfound, bstart;
15022+ struct inode *h_i;
1facf9fc 15023+
4a4d8108
AM
15024+ err = 0;
15025+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15026+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15027+ goto out;
15028+ }
1facf9fc 15029+
4a4d8108
AM
15030+ bfound = -1;
15031+ bend = au_ibend(inode);
15032+ bstart = au_ibstart(inode);
15033+#if 0 /* reserved for future use */
15034+ if (bindex == bend) {
15035+ /* keep this ino in rename case */
15036+ goto out;
15037+ }
15038+#endif
15039+ for (bindex = bstart; bindex <= bend; bindex++)
15040+ if (au_h_iptr(inode, bindex) == h_inode) {
15041+ bfound = bindex;
15042+ break;
15043+ }
15044+ if (bfound < 0)
1308ab2a 15045+ goto out;
1facf9fc 15046+
4a4d8108
AM
15047+ for (bindex = bstart; bindex <= bend; bindex++) {
15048+ h_i = au_h_iptr(inode, bindex);
15049+ if (!h_i)
15050+ continue;
1facf9fc 15051+
4a4d8108
AM
15052+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
15053+ /* ignore this error */
15054+ /* bad action? */
1facf9fc 15055+ }
1facf9fc 15056+
4a4d8108 15057+ /* children inode number will be broken */
1facf9fc 15058+
4f0767ce 15059+out:
4a4d8108
AM
15060+ AuTraceErr(err);
15061+ return err;
1facf9fc 15062+}
15063+
4a4d8108 15064+static int hn_gen_tree(struct dentry *dentry)
1facf9fc 15065+{
4a4d8108
AM
15066+ int err, i, j, ndentry;
15067+ struct au_dcsub_pages dpages;
15068+ struct au_dpage *dpage;
15069+ struct dentry **dentries;
1facf9fc 15070+
4a4d8108
AM
15071+ err = au_dpages_init(&dpages, GFP_NOFS);
15072+ if (unlikely(err))
15073+ goto out;
15074+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
15075+ if (unlikely(err))
15076+ goto out_dpages;
1facf9fc 15077+
4a4d8108
AM
15078+ for (i = 0; i < dpages.ndpage; i++) {
15079+ dpage = dpages.dpages + i;
15080+ dentries = dpage->dentries;
15081+ ndentry = dpage->ndentry;
15082+ for (j = 0; j < ndentry; j++) {
15083+ struct dentry *d;
15084+
15085+ d = dentries[j];
15086+ if (IS_ROOT(d))
15087+ continue;
15088+
4a4d8108
AM
15089+ au_digen_dec(d);
15090+ if (d->d_inode)
15091+ /* todo: reset children xino?
15092+ cached children only? */
15093+ au_iigen_dec(d->d_inode);
1308ab2a 15094+ }
dece6358 15095+ }
1facf9fc 15096+
4f0767ce 15097+out_dpages:
4a4d8108 15098+ au_dpages_free(&dpages);
dece6358 15099+
027c5e7a 15100+#if 0
4a4d8108
AM
15101+ /* discard children */
15102+ dentry_unhash(dentry);
15103+ dput(dentry);
027c5e7a 15104+#endif
4f0767ce 15105+out:
dece6358
AM
15106+ return err;
15107+}
15108+
1308ab2a 15109+/*
4a4d8108 15110+ * return 0 if processed.
1308ab2a 15111+ */
4a4d8108
AM
15112+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
15113+ const unsigned int isdir)
dece6358 15114+{
1308ab2a 15115+ int err;
4a4d8108
AM
15116+ struct dentry *d;
15117+ struct qstr *dname;
1facf9fc 15118+
4a4d8108
AM
15119+ err = 1;
15120+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15121+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15122+ err = 0;
15123+ goto out;
15124+ }
dece6358 15125+
4a4d8108
AM
15126+ if (!isdir) {
15127+ AuDebugOn(!name);
15128+ au_iigen_dec(inode);
027c5e7a 15129+ spin_lock(&inode->i_lock);
c1595e42 15130+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
027c5e7a 15131+ spin_lock(&d->d_lock);
4a4d8108
AM
15132+ dname = &d->d_name;
15133+ if (dname->len != nlen
027c5e7a
AM
15134+ && memcmp(dname->name, name, nlen)) {
15135+ spin_unlock(&d->d_lock);
4a4d8108 15136+ continue;
027c5e7a 15137+ }
4a4d8108 15138+ err = 0;
4a4d8108
AM
15139+ au_digen_dec(d);
15140+ spin_unlock(&d->d_lock);
15141+ break;
1facf9fc 15142+ }
027c5e7a 15143+ spin_unlock(&inode->i_lock);
1308ab2a 15144+ } else {
027c5e7a 15145+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
c1595e42 15146+ d = d_find_any_alias(inode);
4a4d8108
AM
15147+ if (!d) {
15148+ au_iigen_dec(inode);
15149+ goto out;
15150+ }
1facf9fc 15151+
027c5e7a 15152+ spin_lock(&d->d_lock);
4a4d8108 15153+ dname = &d->d_name;
027c5e7a
AM
15154+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
15155+ spin_unlock(&d->d_lock);
4a4d8108 15156+ err = hn_gen_tree(d);
027c5e7a
AM
15157+ spin_lock(&d->d_lock);
15158+ }
15159+ spin_unlock(&d->d_lock);
4a4d8108
AM
15160+ dput(d);
15161+ }
1facf9fc 15162+
4f0767ce 15163+out:
4a4d8108 15164+ AuTraceErr(err);
1308ab2a 15165+ return err;
15166+}
dece6358 15167+
4a4d8108 15168+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
1facf9fc 15169+{
4a4d8108
AM
15170+ int err;
15171+ struct inode *inode;
1facf9fc 15172+
4a4d8108
AM
15173+ inode = dentry->d_inode;
15174+ if (IS_ROOT(dentry)
15175+ /* || (inode && inode->i_ino == AUFS_ROOT_INO) */
15176+ ) {
0c3ec466 15177+ pr_warn("branch root dir was changed\n");
4a4d8108
AM
15178+ return 0;
15179+ }
1308ab2a 15180+
4a4d8108
AM
15181+ err = 0;
15182+ if (!isdir) {
4a4d8108
AM
15183+ au_digen_dec(dentry);
15184+ if (inode)
15185+ au_iigen_dec(inode);
15186+ } else {
027c5e7a 15187+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
4a4d8108
AM
15188+ if (inode)
15189+ err = hn_gen_tree(dentry);
15190+ }
15191+
15192+ AuTraceErr(err);
15193+ return err;
1facf9fc 15194+}
15195+
4a4d8108 15196+/* ---------------------------------------------------------------------- */
1facf9fc 15197+
4a4d8108
AM
15198+/* hnotify job flags */
15199+#define AuHnJob_XINO0 1
15200+#define AuHnJob_GEN (1 << 1)
15201+#define AuHnJob_DIRENT (1 << 2)
15202+#define AuHnJob_ISDIR (1 << 3)
15203+#define AuHnJob_TRYXINO0 (1 << 4)
15204+#define AuHnJob_MNTPNT (1 << 5)
15205+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
7f207e10
AM
15206+#define au_fset_hnjob(flags, name) \
15207+ do { (flags) |= AuHnJob_##name; } while (0)
15208+#define au_fclr_hnjob(flags, name) \
15209+ do { (flags) &= ~AuHnJob_##name; } while (0)
1facf9fc 15210+
4a4d8108
AM
15211+enum {
15212+ AuHn_CHILD,
15213+ AuHn_PARENT,
15214+ AuHnLast
15215+};
1facf9fc 15216+
4a4d8108
AM
15217+struct au_hnotify_args {
15218+ struct inode *h_dir, *dir, *h_child_inode;
15219+ u32 mask;
15220+ unsigned int flags[AuHnLast];
15221+ unsigned int h_child_nlen;
15222+ char h_child_name[];
15223+};
1facf9fc 15224+
4a4d8108
AM
15225+struct hn_job_args {
15226+ unsigned int flags;
15227+ struct inode *inode, *h_inode, *dir, *h_dir;
15228+ struct dentry *dentry;
15229+ char *h_name;
15230+ int h_nlen;
15231+};
1308ab2a 15232+
4a4d8108
AM
15233+static int hn_job(struct hn_job_args *a)
15234+{
15235+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
076b876e 15236+ int e;
1308ab2a 15237+
4a4d8108
AM
15238+ /* reset xino */
15239+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
15240+ hn_xino(a->inode, a->h_inode); /* ignore this error */
1308ab2a 15241+
4a4d8108
AM
15242+ if (au_ftest_hnjob(a->flags, TRYXINO0)
15243+ && a->inode
15244+ && a->h_inode) {
15245+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
38d290e6
JR
15246+ if (!a->h_inode->i_nlink
15247+ && !(a->h_inode->i_state & I_LINKABLE))
4a4d8108
AM
15248+ hn_xino(a->inode, a->h_inode); /* ignore this error */
15249+ mutex_unlock(&a->h_inode->i_mutex);
1308ab2a 15250+ }
1facf9fc 15251+
4a4d8108
AM
15252+ /* make the generation obsolete */
15253+ if (au_ftest_hnjob(a->flags, GEN)) {
076b876e 15254+ e = -1;
4a4d8108 15255+ if (a->inode)
076b876e 15256+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
4a4d8108 15257+ isdir);
076b876e 15258+ if (e && a->dentry)
4a4d8108
AM
15259+ hn_gen_by_name(a->dentry, isdir);
15260+ /* ignore this error */
1facf9fc 15261+ }
1facf9fc 15262+
4a4d8108
AM
15263+ /* make dir entries obsolete */
15264+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
15265+ struct au_vdir *vdir;
1facf9fc 15266+
4a4d8108
AM
15267+ vdir = au_ivdir(a->inode);
15268+ if (vdir)
15269+ vdir->vd_jiffy = 0;
15270+ /* IMustLock(a->inode); */
15271+ /* a->inode->i_version++; */
15272+ }
1facf9fc 15273+
4a4d8108
AM
15274+ /* can do nothing but warn */
15275+ if (au_ftest_hnjob(a->flags, MNTPNT)
15276+ && a->dentry
15277+ && d_mountpoint(a->dentry))
523b37e3 15278+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
1facf9fc 15279+
4a4d8108 15280+ return 0;
1308ab2a 15281+}
1facf9fc 15282+
1308ab2a 15283+/* ---------------------------------------------------------------------- */
1facf9fc 15284+
4a4d8108
AM
15285+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
15286+ struct inode *dir)
1308ab2a 15287+{
4a4d8108
AM
15288+ struct dentry *dentry, *d, *parent;
15289+ struct qstr *dname;
1308ab2a 15290+
c1595e42 15291+ parent = d_find_any_alias(dir);
4a4d8108
AM
15292+ if (!parent)
15293+ return NULL;
1308ab2a 15294+
4a4d8108 15295+ dentry = NULL;
027c5e7a 15296+ spin_lock(&parent->d_lock);
c1595e42 15297+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
523b37e3 15298+ /* AuDbg("%pd\n", d); */
027c5e7a 15299+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
4a4d8108
AM
15300+ dname = &d->d_name;
15301+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
027c5e7a
AM
15302+ goto cont_unlock;
15303+ if (au_di(d))
15304+ au_digen_dec(d);
15305+ else
15306+ goto cont_unlock;
c1595e42 15307+ if (au_dcount(d) > 0) {
027c5e7a 15308+ dentry = dget_dlock(d);
4a4d8108 15309+ spin_unlock(&d->d_lock);
027c5e7a 15310+ break;
dece6358 15311+ }
1facf9fc 15312+
f6b6e03d 15313+cont_unlock:
027c5e7a 15314+ spin_unlock(&d->d_lock);
1308ab2a 15315+ }
027c5e7a 15316+ spin_unlock(&parent->d_lock);
4a4d8108 15317+ dput(parent);
1facf9fc 15318+
4a4d8108
AM
15319+ if (dentry)
15320+ di_write_lock_child(dentry);
1308ab2a 15321+
4a4d8108
AM
15322+ return dentry;
15323+}
dece6358 15324+
4a4d8108
AM
15325+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
15326+ aufs_bindex_t bindex, ino_t h_ino)
15327+{
15328+ struct inode *inode;
15329+ ino_t ino;
15330+ int err;
15331+
15332+ inode = NULL;
15333+ err = au_xino_read(sb, bindex, h_ino, &ino);
15334+ if (!err && ino)
15335+ inode = ilookup(sb, ino);
15336+ if (!inode)
15337+ goto out;
15338+
15339+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
0c3ec466 15340+ pr_warn("wrong root branch\n");
4a4d8108
AM
15341+ iput(inode);
15342+ inode = NULL;
15343+ goto out;
1308ab2a 15344+ }
15345+
4a4d8108 15346+ ii_write_lock_child(inode);
1308ab2a 15347+
4f0767ce 15348+out:
4a4d8108 15349+ return inode;
dece6358
AM
15350+}
15351+
4a4d8108 15352+static void au_hn_bh(void *_args)
1facf9fc 15353+{
4a4d8108
AM
15354+ struct au_hnotify_args *a = _args;
15355+ struct super_block *sb;
15356+ aufs_bindex_t bindex, bend, bfound;
15357+ unsigned char xino, try_iput;
1facf9fc 15358+ int err;
1308ab2a 15359+ struct inode *inode;
4a4d8108
AM
15360+ ino_t h_ino;
15361+ struct hn_job_args args;
15362+ struct dentry *dentry;
15363+ struct au_sbinfo *sbinfo;
1facf9fc 15364+
4a4d8108
AM
15365+ AuDebugOn(!_args);
15366+ AuDebugOn(!a->h_dir);
15367+ AuDebugOn(!a->dir);
15368+ AuDebugOn(!a->mask);
15369+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
15370+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
15371+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
1facf9fc 15372+
4a4d8108
AM
15373+ inode = NULL;
15374+ dentry = NULL;
15375+ /*
15376+ * do not lock a->dir->i_mutex here
15377+ * because of d_revalidate() may cause a deadlock.
15378+ */
15379+ sb = a->dir->i_sb;
15380+ AuDebugOn(!sb);
15381+ sbinfo = au_sbi(sb);
15382+ AuDebugOn(!sbinfo);
7f207e10 15383+ si_write_lock(sb, AuLock_NOPLMW);
1facf9fc 15384+
4a4d8108
AM
15385+ ii_read_lock_parent(a->dir);
15386+ bfound = -1;
15387+ bend = au_ibend(a->dir);
15388+ for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
15389+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
15390+ bfound = bindex;
15391+ break;
15392+ }
15393+ ii_read_unlock(a->dir);
15394+ if (unlikely(bfound < 0))
15395+ goto out;
1facf9fc 15396+
4a4d8108
AM
15397+ xino = !!au_opt_test(au_mntflags(sb), XINO);
15398+ h_ino = 0;
15399+ if (a->h_child_inode)
15400+ h_ino = a->h_child_inode->i_ino;
1facf9fc 15401+
4a4d8108
AM
15402+ if (a->h_child_nlen
15403+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
15404+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
15405+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
15406+ a->dir);
15407+ try_iput = 0;
15408+ if (dentry)
15409+ inode = dentry->d_inode;
15410+ if (xino && !inode && h_ino
15411+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
15412+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
15413+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
15414+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
15415+ try_iput = 1;
15416+ }
1facf9fc 15417+
4a4d8108
AM
15418+ args.flags = a->flags[AuHn_CHILD];
15419+ args.dentry = dentry;
15420+ args.inode = inode;
15421+ args.h_inode = a->h_child_inode;
15422+ args.dir = a->dir;
15423+ args.h_dir = a->h_dir;
15424+ args.h_name = a->h_child_name;
15425+ args.h_nlen = a->h_child_nlen;
15426+ err = hn_job(&args);
15427+ if (dentry) {
027c5e7a 15428+ if (au_di(dentry))
4a4d8108
AM
15429+ di_write_unlock(dentry);
15430+ dput(dentry);
15431+ }
15432+ if (inode && try_iput) {
15433+ ii_write_unlock(inode);
15434+ iput(inode);
15435+ }
1facf9fc 15436+
4a4d8108
AM
15437+ ii_write_lock_parent(a->dir);
15438+ args.flags = a->flags[AuHn_PARENT];
15439+ args.dentry = NULL;
15440+ args.inode = a->dir;
15441+ args.h_inode = a->h_dir;
15442+ args.dir = NULL;
15443+ args.h_dir = NULL;
15444+ args.h_name = NULL;
15445+ args.h_nlen = 0;
15446+ err = hn_job(&args);
15447+ ii_write_unlock(a->dir);
1facf9fc 15448+
4f0767ce 15449+out:
4a4d8108
AM
15450+ iput(a->h_child_inode);
15451+ iput(a->h_dir);
15452+ iput(a->dir);
027c5e7a
AM
15453+ si_write_unlock(sb);
15454+ au_nwt_done(&sbinfo->si_nowait);
1308ab2a 15455+ kfree(a);
dece6358 15456+}
1facf9fc 15457+
4a4d8108
AM
15458+/* ---------------------------------------------------------------------- */
15459+
15460+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
15461+ struct qstr *h_child_qstr, struct inode *h_child_inode)
dece6358 15462+{
4a4d8108 15463+ int err, len;
53392da6 15464+ unsigned int flags[AuHnLast], f;
4a4d8108
AM
15465+ unsigned char isdir, isroot, wh;
15466+ struct inode *dir;
15467+ struct au_hnotify_args *args;
15468+ char *p, *h_child_name;
dece6358 15469+
1308ab2a 15470+ err = 0;
4a4d8108
AM
15471+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
15472+ dir = igrab(hnotify->hn_aufs_inode);
15473+ if (!dir)
15474+ goto out;
1facf9fc 15475+
4a4d8108
AM
15476+ isroot = (dir->i_ino == AUFS_ROOT_INO);
15477+ wh = 0;
15478+ h_child_name = (void *)h_child_qstr->name;
15479+ len = h_child_qstr->len;
15480+ if (h_child_name) {
15481+ if (len > AUFS_WH_PFX_LEN
15482+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
15483+ h_child_name += AUFS_WH_PFX_LEN;
15484+ len -= AUFS_WH_PFX_LEN;
15485+ wh = 1;
15486+ }
1facf9fc 15487+ }
dece6358 15488+
4a4d8108
AM
15489+ isdir = 0;
15490+ if (h_child_inode)
15491+ isdir = !!S_ISDIR(h_child_inode->i_mode);
15492+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
15493+ flags[AuHn_CHILD] = 0;
15494+ if (isdir)
15495+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
15496+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
15497+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
15498+ switch (mask & FS_EVENTS_POSS_ON_CHILD) {
15499+ case FS_MOVED_FROM:
15500+ case FS_MOVED_TO:
15501+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
15502+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15503+ /*FALLTHROUGH*/
15504+ case FS_CREATE:
fb47a38f 15505+ AuDebugOn(!h_child_name);
4a4d8108 15506+ break;
1facf9fc 15507+
4a4d8108
AM
15508+ case FS_DELETE:
15509+ /*
15510+ * aufs never be able to get this child inode.
15511+ * revalidation should be in d_revalidate()
15512+ * by checking i_nlink, i_generation or d_unhashed().
15513+ */
15514+ AuDebugOn(!h_child_name);
15515+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
15516+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
15517+ break;
dece6358 15518+
4a4d8108
AM
15519+ default:
15520+ AuDebugOn(1);
15521+ }
1308ab2a 15522+
4a4d8108
AM
15523+ if (wh)
15524+ h_child_inode = NULL;
1308ab2a 15525+
4a4d8108
AM
15526+ err = -ENOMEM;
15527+ /* iput() and kfree() will be called in au_hnotify() */
4a4d8108 15528+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
4a4d8108
AM
15529+ if (unlikely(!args)) {
15530+ AuErr1("no memory\n");
15531+ iput(dir);
15532+ goto out;
15533+ }
15534+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
15535+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
15536+ args->mask = mask;
15537+ args->dir = dir;
15538+ args->h_dir = igrab(h_dir);
15539+ if (h_child_inode)
15540+ h_child_inode = igrab(h_child_inode); /* can be NULL */
15541+ args->h_child_inode = h_child_inode;
15542+ args->h_child_nlen = len;
15543+ if (len) {
15544+ p = (void *)args;
15545+ p += sizeof(*args);
15546+ memcpy(p, h_child_name, len);
15547+ p[len] = 0;
1308ab2a 15548+ }
1308ab2a 15549+
38d290e6 15550+ /* NFS fires the event for silly-renamed one from kworker */
53392da6 15551+ f = 0;
38d290e6
JR
15552+ if (!dir->i_nlink
15553+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
53392da6
AM
15554+ f = AuWkq_NEST;
15555+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
4a4d8108
AM
15556+ if (unlikely(err)) {
15557+ pr_err("wkq %d\n", err);
15558+ iput(args->h_child_inode);
15559+ iput(args->h_dir);
15560+ iput(args->dir);
15561+ kfree(args);
1facf9fc 15562+ }
1facf9fc 15563+
4a4d8108 15564+out:
1facf9fc 15565+ return err;
15566+}
15567+
027c5e7a
AM
15568+/* ---------------------------------------------------------------------- */
15569+
15570+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
15571+{
15572+ int err;
15573+
15574+ AuDebugOn(!(udba & AuOptMask_UDBA));
15575+
15576+ err = 0;
15577+ if (au_hnotify_op.reset_br)
15578+ err = au_hnotify_op.reset_br(udba, br, perm);
15579+
15580+ return err;
15581+}
15582+
15583+int au_hnotify_init_br(struct au_branch *br, int perm)
15584+{
15585+ int err;
15586+
15587+ err = 0;
15588+ if (au_hnotify_op.init_br)
15589+ err = au_hnotify_op.init_br(br, perm);
15590+
15591+ return err;
15592+}
15593+
15594+void au_hnotify_fin_br(struct au_branch *br)
15595+{
15596+ if (au_hnotify_op.fin_br)
15597+ au_hnotify_op.fin_br(br);
15598+}
15599+
4a4d8108
AM
15600+static void au_hn_destroy_cache(void)
15601+{
15602+ kmem_cache_destroy(au_cachep[AuCache_HNOTIFY]);
15603+ au_cachep[AuCache_HNOTIFY] = NULL;
15604+}
1308ab2a 15605+
4a4d8108 15606+int __init au_hnotify_init(void)
1facf9fc 15607+{
1308ab2a 15608+ int err;
1308ab2a 15609+
4a4d8108
AM
15610+ err = -ENOMEM;
15611+ au_cachep[AuCache_HNOTIFY] = AuCache(au_hnotify);
15612+ if (au_cachep[AuCache_HNOTIFY]) {
027c5e7a
AM
15613+ err = 0;
15614+ if (au_hnotify_op.init)
15615+ err = au_hnotify_op.init();
4a4d8108
AM
15616+ if (unlikely(err))
15617+ au_hn_destroy_cache();
1308ab2a 15618+ }
1308ab2a 15619+ AuTraceErr(err);
4a4d8108 15620+ return err;
1308ab2a 15621+}
15622+
4a4d8108 15623+void au_hnotify_fin(void)
1308ab2a 15624+{
027c5e7a
AM
15625+ if (au_hnotify_op.fin)
15626+ au_hnotify_op.fin();
4a4d8108
AM
15627+ /* cf. au_cache_fin() */
15628+ if (au_cachep[AuCache_HNOTIFY])
15629+ au_hn_destroy_cache();
dece6358 15630+}
7f207e10
AM
15631diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
15632--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 15633+++ linux/fs/aufs/iinfo.c 2016-02-28 11:27:01.280579017 +0100
38d290e6 15634@@ -0,0 +1,277 @@
dece6358 15635+/*
7f2ca4b1 15636+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
15637+ *
15638+ * This program, aufs is free software; you can redistribute it and/or modify
15639+ * it under the terms of the GNU General Public License as published by
15640+ * the Free Software Foundation; either version 2 of the License, or
15641+ * (at your option) any later version.
15642+ *
15643+ * This program is distributed in the hope that it will be useful,
15644+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15645+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15646+ * GNU General Public License for more details.
15647+ *
15648+ * You should have received a copy of the GNU General Public License
523b37e3 15649+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358 15650+ */
1facf9fc 15651+
dece6358 15652+/*
4a4d8108 15653+ * inode private data
dece6358 15654+ */
1facf9fc 15655+
1308ab2a 15656+#include "aufs.h"
1facf9fc 15657+
4a4d8108 15658+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 15659+{
4a4d8108 15660+ struct inode *h_inode;
1facf9fc 15661+
4a4d8108 15662+ IiMustAnyLock(inode);
1facf9fc 15663+
4a4d8108
AM
15664+ h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
15665+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15666+ return h_inode;
15667+}
1facf9fc 15668+
4a4d8108
AM
15669+/* todo: hard/soft set? */
15670+void au_hiput(struct au_hinode *hinode)
15671+{
15672+ au_hn_free(hinode);
15673+ dput(hinode->hi_whdentry);
15674+ iput(hinode->hi_inode);
15675+}
1facf9fc 15676+
4a4d8108
AM
15677+unsigned int au_hi_flags(struct inode *inode, int isdir)
15678+{
15679+ unsigned int flags;
15680+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
1facf9fc 15681+
4a4d8108
AM
15682+ flags = 0;
15683+ if (au_opt_test(mnt_flags, XINO))
15684+ au_fset_hi(flags, XINO);
15685+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
15686+ au_fset_hi(flags, HNOTIFY);
15687+ return flags;
1facf9fc 15688+}
15689+
4a4d8108
AM
15690+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
15691+ struct inode *h_inode, unsigned int flags)
1308ab2a 15692+{
4a4d8108
AM
15693+ struct au_hinode *hinode;
15694+ struct inode *hi;
15695+ struct au_iinfo *iinfo = au_ii(inode);
1facf9fc 15696+
4a4d8108 15697+ IiMustWriteLock(inode);
dece6358 15698+
4a4d8108
AM
15699+ hinode = iinfo->ii_hinode + bindex;
15700+ hi = hinode->hi_inode;
15701+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
15702+
15703+ if (hi)
15704+ au_hiput(hinode);
15705+ hinode->hi_inode = h_inode;
15706+ if (h_inode) {
15707+ int err;
15708+ struct super_block *sb = inode->i_sb;
15709+ struct au_branch *br;
15710+
027c5e7a
AM
15711+ AuDebugOn(inode->i_mode
15712+ && (h_inode->i_mode & S_IFMT)
15713+ != (inode->i_mode & S_IFMT));
4a4d8108
AM
15714+ if (bindex == iinfo->ii_bstart)
15715+ au_cpup_igen(inode, h_inode);
15716+ br = au_sbr(sb, bindex);
15717+ hinode->hi_id = br->br_id;
15718+ if (au_ftest_hi(flags, XINO)) {
15719+ err = au_xino_write(sb, bindex, h_inode->i_ino,
15720+ inode->i_ino);
15721+ if (unlikely(err))
15722+ AuIOErr1("failed au_xino_write() %d\n", err);
15723+ }
15724+
15725+ if (au_ftest_hi(flags, HNOTIFY)
15726+ && au_br_hnotifyable(br->br_perm)) {
027c5e7a 15727+ err = au_hn_alloc(hinode, inode);
4a4d8108
AM
15728+ if (unlikely(err))
15729+ AuIOErr1("au_hn_alloc() %d\n", err);
1308ab2a 15730+ }
15731+ }
4a4d8108 15732+}
dece6358 15733+
4a4d8108
AM
15734+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
15735+ struct dentry *h_wh)
15736+{
15737+ struct au_hinode *hinode;
dece6358 15738+
4a4d8108
AM
15739+ IiMustWriteLock(inode);
15740+
15741+ hinode = au_ii(inode)->ii_hinode + bindex;
15742+ AuDebugOn(hinode->hi_whdentry);
15743+ hinode->hi_whdentry = h_wh;
1facf9fc 15744+}
15745+
537831f9 15746+void au_update_iigen(struct inode *inode, int half)
1308ab2a 15747+{
537831f9
AM
15748+ struct au_iinfo *iinfo;
15749+ struct au_iigen *iigen;
15750+ unsigned int sigen;
15751+
15752+ sigen = au_sigen(inode->i_sb);
15753+ iinfo = au_ii(inode);
15754+ iigen = &iinfo->ii_generation;
7f2ca4b1 15755+ spin_lock(&iigen->ig_spin);
537831f9
AM
15756+ iigen->ig_generation = sigen;
15757+ if (half)
15758+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
15759+ else
15760+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
7f2ca4b1 15761+ spin_unlock(&iigen->ig_spin);
4a4d8108 15762+}
1facf9fc 15763+
4a4d8108
AM
15764+/* it may be called at remount time, too */
15765+void au_update_ibrange(struct inode *inode, int do_put_zero)
15766+{
15767+ struct au_iinfo *iinfo;
027c5e7a 15768+ aufs_bindex_t bindex, bend;
1facf9fc 15769+
4a4d8108 15770+ iinfo = au_ii(inode);
027c5e7a 15771+ if (!iinfo)
4a4d8108 15772+ return;
1facf9fc 15773+
4a4d8108 15774+ IiMustWriteLock(inode);
1facf9fc 15775+
027c5e7a 15776+ if (do_put_zero && iinfo->ii_bstart >= 0) {
4a4d8108
AM
15777+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15778+ bindex++) {
15779+ struct inode *h_i;
1facf9fc 15780+
4a4d8108 15781+ h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
38d290e6
JR
15782+ if (h_i
15783+ && !h_i->i_nlink
15784+ && !(h_i->i_state & I_LINKABLE))
027c5e7a
AM
15785+ au_set_h_iptr(inode, bindex, NULL, 0);
15786+ }
4a4d8108
AM
15787+ }
15788+
027c5e7a
AM
15789+ iinfo->ii_bstart = -1;
15790+ iinfo->ii_bend = -1;
15791+ bend = au_sbend(inode->i_sb);
15792+ for (bindex = 0; bindex <= bend; bindex++)
15793+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15794+ iinfo->ii_bstart = bindex;
4a4d8108 15795+ break;
027c5e7a
AM
15796+ }
15797+ if (iinfo->ii_bstart >= 0)
15798+ for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
15799+ if (iinfo->ii_hinode[0 + bindex].hi_inode) {
15800+ iinfo->ii_bend = bindex;
15801+ break;
15802+ }
15803+ AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
1308ab2a 15804+}
1facf9fc 15805+
dece6358 15806+/* ---------------------------------------------------------------------- */
1facf9fc 15807+
4a4d8108 15808+void au_icntnr_init_once(void *_c)
dece6358 15809+{
4a4d8108
AM
15810+ struct au_icntnr *c = _c;
15811+ struct au_iinfo *iinfo = &c->iinfo;
e49829fe 15812+ static struct lock_class_key aufs_ii;
1facf9fc 15813+
7f2ca4b1 15814+ spin_lock_init(&iinfo->ii_generation.ig_spin);
4a4d8108 15815+ au_rw_init(&iinfo->ii_rwsem);
e49829fe 15816+ au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
4a4d8108
AM
15817+ inode_init_once(&c->vfs_inode);
15818+}
1facf9fc 15819+
4a4d8108
AM
15820+int au_iinfo_init(struct inode *inode)
15821+{
15822+ struct au_iinfo *iinfo;
15823+ struct super_block *sb;
15824+ int nbr, i;
1facf9fc 15825+
4a4d8108
AM
15826+ sb = inode->i_sb;
15827+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
15828+ nbr = au_sbend(sb) + 1;
15829+ if (unlikely(nbr <= 0))
15830+ nbr = 1;
15831+ iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
15832+ if (iinfo->ii_hinode) {
7f207e10 15833+ au_ninodes_inc(sb);
4a4d8108
AM
15834+ for (i = 0; i < nbr; i++)
15835+ iinfo->ii_hinode[i].hi_id = -1;
1facf9fc 15836+
537831f9 15837+ iinfo->ii_generation.ig_generation = au_sigen(sb);
4a4d8108
AM
15838+ iinfo->ii_bstart = -1;
15839+ iinfo->ii_bend = -1;
15840+ iinfo->ii_vdir = NULL;
15841+ return 0;
1308ab2a 15842+ }
4a4d8108
AM
15843+ return -ENOMEM;
15844+}
1facf9fc 15845+
4a4d8108
AM
15846+int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
15847+{
15848+ int err, sz;
15849+ struct au_hinode *hip;
1facf9fc 15850+
4a4d8108
AM
15851+ AuRwMustWriteLock(&iinfo->ii_rwsem);
15852+
15853+ err = -ENOMEM;
15854+ sz = sizeof(*hip) * (iinfo->ii_bend + 1);
15855+ if (!sz)
15856+ sz = sizeof(*hip);
15857+ hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
15858+ if (hip) {
15859+ iinfo->ii_hinode = hip;
15860+ err = 0;
1308ab2a 15861+ }
4a4d8108 15862+
1308ab2a 15863+ return err;
1facf9fc 15864+}
15865+
4a4d8108 15866+void au_iinfo_fin(struct inode *inode)
1facf9fc 15867+{
4a4d8108
AM
15868+ struct au_iinfo *iinfo;
15869+ struct au_hinode *hi;
15870+ struct super_block *sb;
b752ccd1
AM
15871+ aufs_bindex_t bindex, bend;
15872+ const unsigned char unlinked = !inode->i_nlink;
1308ab2a 15873+
4a4d8108
AM
15874+ iinfo = au_ii(inode);
15875+ /* bad_inode case */
15876+ if (!iinfo)
15877+ return;
1308ab2a 15878+
b752ccd1 15879+ sb = inode->i_sb;
7f207e10 15880+ au_ninodes_dec(sb);
b752ccd1
AM
15881+ if (si_pid_test(sb))
15882+ au_xino_delete_inode(inode, unlinked);
15883+ else {
15884+ /*
15885+ * it is safe to hide the dependency between sbinfo and
15886+ * sb->s_umount.
15887+ */
15888+ lockdep_off();
15889+ si_noflush_read_lock(sb);
15890+ au_xino_delete_inode(inode, unlinked);
15891+ si_read_unlock(sb);
15892+ lockdep_on();
15893+ }
15894+
4a4d8108
AM
15895+ if (iinfo->ii_vdir)
15896+ au_vdir_free(iinfo->ii_vdir);
1308ab2a 15897+
b752ccd1
AM
15898+ bindex = iinfo->ii_bstart;
15899+ if (bindex >= 0) {
15900+ hi = iinfo->ii_hinode + bindex;
4a4d8108 15901+ bend = iinfo->ii_bend;
b752ccd1
AM
15902+ while (bindex++ <= bend) {
15903+ if (hi->hi_inode)
4a4d8108 15904+ au_hiput(hi);
4a4d8108
AM
15905+ hi++;
15906+ }
15907+ }
4a4d8108 15908+ kfree(iinfo->ii_hinode);
027c5e7a 15909+ iinfo->ii_hinode = NULL;
4a4d8108 15910+ AuRwDestroy(&iinfo->ii_rwsem);
dece6358 15911+}
7f207e10
AM
15912diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
15913--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
15914+++ linux/fs/aufs/inode.c 2016-02-28 11:27:01.280579017 +0100
15915@@ -0,0 +1,522 @@
4a4d8108 15916+/*
7f2ca4b1 15917+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
15918+ *
15919+ * This program, aufs is free software; you can redistribute it and/or modify
15920+ * it under the terms of the GNU General Public License as published by
15921+ * the Free Software Foundation; either version 2 of the License, or
15922+ * (at your option) any later version.
15923+ *
15924+ * This program is distributed in the hope that it will be useful,
15925+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15926+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15927+ * GNU General Public License for more details.
15928+ *
15929+ * You should have received a copy of the GNU General Public License
523b37e3 15930+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 15931+ */
1facf9fc 15932+
4a4d8108
AM
15933+/*
15934+ * inode functions
15935+ */
1facf9fc 15936+
4a4d8108 15937+#include "aufs.h"
1308ab2a 15938+
4a4d8108
AM
15939+struct inode *au_igrab(struct inode *inode)
15940+{
15941+ if (inode) {
15942+ AuDebugOn(!atomic_read(&inode->i_count));
027c5e7a 15943+ ihold(inode);
1facf9fc 15944+ }
4a4d8108
AM
15945+ return inode;
15946+}
1facf9fc 15947+
4a4d8108
AM
15948+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
15949+{
15950+ au_cpup_attr_all(inode, /*force*/0);
537831f9 15951+ au_update_iigen(inode, /*half*/1);
4a4d8108
AM
15952+ if (do_version)
15953+ inode->i_version++;
dece6358 15954+}
1facf9fc 15955+
027c5e7a 15956+static int au_ii_refresh(struct inode *inode, int *update)
dece6358 15957+{
4a4d8108 15958+ int err, e;
027c5e7a 15959+ umode_t type;
4a4d8108 15960+ aufs_bindex_t bindex, new_bindex;
1308ab2a 15961+ struct super_block *sb;
4a4d8108 15962+ struct au_iinfo *iinfo;
027c5e7a 15963+ struct au_hinode *p, *q, tmp;
1facf9fc 15964+
4a4d8108 15965+ IiMustWriteLock(inode);
1facf9fc 15966+
027c5e7a 15967+ *update = 0;
4a4d8108 15968+ sb = inode->i_sb;
027c5e7a 15969+ type = inode->i_mode & S_IFMT;
4a4d8108
AM
15970+ iinfo = au_ii(inode);
15971+ err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
15972+ if (unlikely(err))
1308ab2a 15973+ goto out;
1facf9fc 15974+
027c5e7a 15975+ AuDebugOn(iinfo->ii_bstart < 0);
4a4d8108 15976+ p = iinfo->ii_hinode + iinfo->ii_bstart;
4a4d8108
AM
15977+ for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
15978+ bindex++, p++) {
15979+ if (!p->hi_inode)
15980+ continue;
1facf9fc 15981+
027c5e7a 15982+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
4a4d8108
AM
15983+ new_bindex = au_br_index(sb, p->hi_id);
15984+ if (new_bindex == bindex)
15985+ continue;
1facf9fc 15986+
4a4d8108 15987+ if (new_bindex < 0) {
027c5e7a 15988+ *update = 1;
4a4d8108
AM
15989+ au_hiput(p);
15990+ p->hi_inode = NULL;
15991+ continue;
1308ab2a 15992+ }
4a4d8108
AM
15993+
15994+ if (new_bindex < iinfo->ii_bstart)
15995+ iinfo->ii_bstart = new_bindex;
15996+ if (iinfo->ii_bend < new_bindex)
15997+ iinfo->ii_bend = new_bindex;
15998+ /* swap two lower inode, and loop again */
15999+ q = iinfo->ii_hinode + new_bindex;
16000+ tmp = *q;
16001+ *q = *p;
16002+ *p = tmp;
16003+ if (tmp.hi_inode) {
16004+ bindex--;
16005+ p--;
1308ab2a 16006+ }
16007+ }
4a4d8108
AM
16008+ au_update_ibrange(inode, /*do_put_zero*/0);
16009+ e = au_dy_irefresh(inode);
16010+ if (unlikely(e && !err))
16011+ err = e;
1facf9fc 16012+
4f0767ce 16013+out:
027c5e7a
AM
16014+ AuTraceErr(err);
16015+ return err;
16016+}
16017+
7f2ca4b1
JR
16018+void au_refresh_iop(struct inode *inode, int force_getattr)
16019+{
16020+ int type;
16021+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
16022+ const struct inode_operations *iop
16023+ = force_getattr ? aufs_iop : sbi->si_iop_array;
16024+
16025+ if (inode->i_op == iop)
16026+ return;
16027+
16028+ switch (inode->i_mode & S_IFMT) {
16029+ case S_IFDIR:
16030+ type = AuIop_DIR;
16031+ break;
16032+ case S_IFLNK:
16033+ type = AuIop_SYMLINK;
16034+ break;
16035+ default:
16036+ type = AuIop_OTHER;
16037+ break;
16038+ }
16039+
16040+ inode->i_op = iop + type;
16041+ /* unnecessary smp_wmb() */
16042+}
16043+
027c5e7a
AM
16044+int au_refresh_hinode_self(struct inode *inode)
16045+{
16046+ int err, update;
16047+
16048+ err = au_ii_refresh(inode, &update);
16049+ if (!err)
16050+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
16051+
16052+ AuTraceErr(err);
4a4d8108
AM
16053+ return err;
16054+}
1facf9fc 16055+
4a4d8108
AM
16056+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
16057+{
027c5e7a 16058+ int err, e, update;
4a4d8108 16059+ unsigned int flags;
027c5e7a 16060+ umode_t mode;
4a4d8108 16061+ aufs_bindex_t bindex, bend;
027c5e7a 16062+ unsigned char isdir;
4a4d8108
AM
16063+ struct au_hinode *p;
16064+ struct au_iinfo *iinfo;
1facf9fc 16065+
027c5e7a 16066+ err = au_ii_refresh(inode, &update);
4a4d8108
AM
16067+ if (unlikely(err))
16068+ goto out;
16069+
16070+ update = 0;
16071+ iinfo = au_ii(inode);
16072+ p = iinfo->ii_hinode + iinfo->ii_bstart;
027c5e7a
AM
16073+ mode = (inode->i_mode & S_IFMT);
16074+ isdir = S_ISDIR(mode);
4a4d8108
AM
16075+ flags = au_hi_flags(inode, isdir);
16076+ bend = au_dbend(dentry);
16077+ for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
16078+ struct inode *h_i;
16079+ struct dentry *h_d;
16080+
16081+ h_d = au_h_dptr(dentry, bindex);
16082+ if (!h_d || !h_d->d_inode)
16083+ continue;
16084+
027c5e7a 16085+ AuDebugOn(mode != (h_d->d_inode->i_mode & S_IFMT));
4a4d8108
AM
16086+ if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
16087+ h_i = au_h_iptr(inode, bindex);
16088+ if (h_i) {
16089+ if (h_i == h_d->d_inode)
16090+ continue;
16091+ err = -EIO;
16092+ break;
16093+ }
16094+ }
16095+ if (bindex < iinfo->ii_bstart)
16096+ iinfo->ii_bstart = bindex;
16097+ if (iinfo->ii_bend < bindex)
16098+ iinfo->ii_bend = bindex;
16099+ au_set_h_iptr(inode, bindex, au_igrab(h_d->d_inode), flags);
16100+ update = 1;
1308ab2a 16101+ }
4a4d8108
AM
16102+ au_update_ibrange(inode, /*do_put_zero*/0);
16103+ e = au_dy_irefresh(inode);
16104+ if (unlikely(e && !err))
16105+ err = e;
027c5e7a
AM
16106+ if (!err)
16107+ au_refresh_hinode_attr(inode, update && isdir);
4a4d8108 16108+
4f0767ce 16109+out:
4a4d8108 16110+ AuTraceErr(err);
1308ab2a 16111+ return err;
dece6358
AM
16112+}
16113+
4a4d8108 16114+static int set_inode(struct inode *inode, struct dentry *dentry)
dece6358 16115+{
4a4d8108
AM
16116+ int err;
16117+ unsigned int flags;
16118+ umode_t mode;
16119+ aufs_bindex_t bindex, bstart, btail;
16120+ unsigned char isdir;
16121+ struct dentry *h_dentry;
16122+ struct inode *h_inode;
16123+ struct au_iinfo *iinfo;
7f2ca4b1 16124+ struct inode_operations *iop;
dece6358 16125+
4a4d8108 16126+ IiMustWriteLock(inode);
dece6358 16127+
4a4d8108
AM
16128+ err = 0;
16129+ isdir = 0;
7f2ca4b1 16130+ iop = au_sbi(inode->i_sb)->si_iop_array;
4a4d8108
AM
16131+ bstart = au_dbstart(dentry);
16132+ h_inode = au_h_dptr(dentry, bstart)->d_inode;
16133+ mode = h_inode->i_mode;
16134+ switch (mode & S_IFMT) {
16135+ case S_IFREG:
16136+ btail = au_dbtail(dentry);
7f2ca4b1 16137+ inode->i_op = iop + AuIop_OTHER;
4a4d8108
AM
16138+ inode->i_fop = &aufs_file_fop;
16139+ err = au_dy_iaop(inode, bstart, h_inode);
16140+ if (unlikely(err))
16141+ goto out;
16142+ break;
16143+ case S_IFDIR:
16144+ isdir = 1;
16145+ btail = au_dbtaildir(dentry);
7f2ca4b1 16146+ inode->i_op = iop + AuIop_DIR;
4a4d8108
AM
16147+ inode->i_fop = &aufs_dir_fop;
16148+ break;
16149+ case S_IFLNK:
16150+ btail = au_dbtail(dentry);
7f2ca4b1 16151+ inode->i_op = iop + AuIop_SYMLINK;
4a4d8108
AM
16152+ break;
16153+ case S_IFBLK:
16154+ case S_IFCHR:
16155+ case S_IFIFO:
16156+ case S_IFSOCK:
16157+ btail = au_dbtail(dentry);
7f2ca4b1 16158+ inode->i_op = iop + AuIop_OTHER;
38d290e6 16159+ init_special_inode(inode, mode, h_inode->i_rdev);
4a4d8108
AM
16160+ break;
16161+ default:
16162+ AuIOErr("Unknown file type 0%o\n", mode);
16163+ err = -EIO;
1308ab2a 16164+ goto out;
4a4d8108 16165+ }
dece6358 16166+
4a4d8108
AM
16167+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
16168+ flags = au_hi_flags(inode, isdir);
16169+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
16170+ && au_ftest_hi(flags, HNOTIFY)
16171+ && dentry->d_name.len > AUFS_WH_PFX_LEN
16172+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
16173+ au_fclr_hi(flags, HNOTIFY);
16174+ iinfo = au_ii(inode);
16175+ iinfo->ii_bstart = bstart;
16176+ iinfo->ii_bend = btail;
16177+ for (bindex = bstart; bindex <= btail; bindex++) {
16178+ h_dentry = au_h_dptr(dentry, bindex);
16179+ if (h_dentry)
16180+ au_set_h_iptr(inode, bindex,
16181+ au_igrab(h_dentry->d_inode), flags);
16182+ }
16183+ au_cpup_attr_all(inode, /*force*/1);
c1595e42
JR
16184+ /*
16185+ * to force calling aufs_get_acl() every time,
16186+ * do not call cache_no_acl() for aufs inode.
16187+ */
dece6358 16188+
4f0767ce 16189+out:
4a4d8108
AM
16190+ return err;
16191+}
dece6358 16192+
027c5e7a
AM
16193+/*
16194+ * successful returns with iinfo write_locked
16195+ * minus: errno
16196+ * zero: success, matched
16197+ * plus: no error, but unmatched
16198+ */
16199+static int reval_inode(struct inode *inode, struct dentry *dentry)
4a4d8108
AM
16200+{
16201+ int err;
7f2ca4b1 16202+ unsigned int gen, igflags;
4a4d8108
AM
16203+ aufs_bindex_t bindex, bend;
16204+ struct inode *h_inode, *h_dinode;
dece6358 16205+
4a4d8108
AM
16206+ /*
16207+ * before this function, if aufs got any iinfo lock, it must be only
16208+ * one, the parent dir.
16209+ * it can happen by UDBA and the obsoleted inode number.
16210+ */
16211+ err = -EIO;
16212+ if (unlikely(inode->i_ino == parent_ino(dentry)))
16213+ goto out;
16214+
027c5e7a 16215+ err = 1;
4a4d8108
AM
16216+ ii_write_lock_new_child(inode);
16217+ h_dinode = au_h_dptr(dentry, au_dbstart(dentry))->d_inode;
16218+ bend = au_ibend(inode);
16219+ for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
16220+ h_inode = au_h_iptr(inode, bindex);
537831f9
AM
16221+ if (!h_inode || h_inode != h_dinode)
16222+ continue;
16223+
16224+ err = 0;
7f2ca4b1 16225+ gen = au_iigen(inode, &igflags);
537831f9 16226+ if (gen == au_digen(dentry)
7f2ca4b1 16227+ && !au_ig_ftest(igflags, HALF_REFRESHED))
4a4d8108 16228+ break;
537831f9
AM
16229+
16230+ /* fully refresh inode using dentry */
16231+ err = au_refresh_hinode(inode, dentry);
16232+ if (!err)
16233+ au_update_iigen(inode, /*half*/0);
16234+ break;
1facf9fc 16235+ }
dece6358 16236+
4a4d8108
AM
16237+ if (unlikely(err))
16238+ ii_write_unlock(inode);
4f0767ce 16239+out:
1facf9fc 16240+ return err;
16241+}
1facf9fc 16242+
4a4d8108
AM
16243+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16244+ unsigned int d_type, ino_t *ino)
1facf9fc 16245+{
4a4d8108
AM
16246+ int err;
16247+ struct mutex *mtx;
1facf9fc 16248+
b752ccd1 16249+ /* prevent hardlinked inode number from race condition */
4a4d8108 16250+ mtx = NULL;
b752ccd1 16251+ if (d_type != DT_DIR) {
4a4d8108
AM
16252+ mtx = &au_sbr(sb, bindex)->br_xino.xi_nondir_mtx;
16253+ mutex_lock(mtx);
16254+ }
16255+ err = au_xino_read(sb, bindex, h_ino, ino);
16256+ if (unlikely(err))
16257+ goto out;
1308ab2a 16258+
4a4d8108
AM
16259+ if (!*ino) {
16260+ err = -EIO;
16261+ *ino = au_xino_new_ino(sb);
16262+ if (unlikely(!*ino))
1facf9fc 16263+ goto out;
4a4d8108
AM
16264+ err = au_xino_write(sb, bindex, h_ino, *ino);
16265+ if (unlikely(err))
1308ab2a 16266+ goto out;
1308ab2a 16267+ }
1facf9fc 16268+
4f0767ce 16269+out:
b752ccd1 16270+ if (mtx)
4a4d8108 16271+ mutex_unlock(mtx);
1facf9fc 16272+ return err;
16273+}
16274+
4a4d8108
AM
16275+/* successful returns with iinfo write_locked */
16276+/* todo: return with unlocked? */
16277+struct inode *au_new_inode(struct dentry *dentry, int must_new)
1facf9fc 16278+{
7f2ca4b1 16279+ struct inode *inode;
4a4d8108
AM
16280+ struct dentry *h_dentry;
16281+ struct super_block *sb;
b752ccd1 16282+ struct mutex *mtx;
4a4d8108 16283+ ino_t h_ino, ino;
1716fcea 16284+ int err;
4a4d8108 16285+ aufs_bindex_t bstart;
1facf9fc 16286+
4a4d8108
AM
16287+ sb = dentry->d_sb;
16288+ bstart = au_dbstart(dentry);
16289+ h_dentry = au_h_dptr(dentry, bstart);
7f2ca4b1 16290+ h_ino = h_dentry->d_inode->i_ino;
b752ccd1
AM
16291+
16292+ /*
16293+ * stop 'race'-ing between hardlinks under different
16294+ * parents.
16295+ */
16296+ mtx = NULL;
7f2ca4b1 16297+ if (!d_is_dir(h_dentry))
b752ccd1
AM
16298+ mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
16299+
4f0767ce 16300+new_ino:
b752ccd1
AM
16301+ if (mtx)
16302+ mutex_lock(mtx);
4a4d8108
AM
16303+ err = au_xino_read(sb, bstart, h_ino, &ino);
16304+ inode = ERR_PTR(err);
16305+ if (unlikely(err))
16306+ goto out;
b752ccd1 16307+
4a4d8108
AM
16308+ if (!ino) {
16309+ ino = au_xino_new_ino(sb);
16310+ if (unlikely(!ino)) {
16311+ inode = ERR_PTR(-EIO);
dece6358
AM
16312+ goto out;
16313+ }
16314+ }
1facf9fc 16315+
4a4d8108
AM
16316+ AuDbg("i%lu\n", (unsigned long)ino);
16317+ inode = au_iget_locked(sb, ino);
16318+ err = PTR_ERR(inode);
16319+ if (IS_ERR(inode))
1facf9fc 16320+ goto out;
1facf9fc 16321+
4a4d8108
AM
16322+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
16323+ if (inode->i_state & I_NEW) {
1716fcea 16324+ /* verbose coding for lock class name */
7f2ca4b1 16325+ if (unlikely(d_is_symlink(h_dentry)))
1716fcea
AM
16326+ au_rw_class(&au_ii(inode)->ii_rwsem,
16327+ au_lc_key + AuLcSymlink_IIINFO);
7f2ca4b1 16328+ else if (unlikely(d_is_dir(h_dentry)))
1716fcea
AM
16329+ au_rw_class(&au_ii(inode)->ii_rwsem,
16330+ au_lc_key + AuLcDir_IIINFO);
16331+ else /* likely */
16332+ au_rw_class(&au_ii(inode)->ii_rwsem,
16333+ au_lc_key + AuLcNonDir_IIINFO);
2dfbb274 16334+
4a4d8108
AM
16335+ ii_write_lock_new_child(inode);
16336+ err = set_inode(inode, dentry);
16337+ if (!err) {
16338+ unlock_new_inode(inode);
16339+ goto out; /* success */
16340+ }
1308ab2a 16341+
027c5e7a
AM
16342+ /*
16343+ * iget_failed() calls iput(), but we need to call
16344+ * ii_write_unlock() after iget_failed(). so dirty hack for
16345+ * i_count.
16346+ */
16347+ atomic_inc(&inode->i_count);
4a4d8108 16348+ iget_failed(inode);
027c5e7a
AM
16349+ ii_write_unlock(inode);
16350+ au_xino_write(sb, bstart, h_ino, /*ino*/0);
16351+ /* ignore this error */
16352+ goto out_iput;
16353+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
b752ccd1
AM
16354+ /*
16355+ * horrible race condition between lookup, readdir and copyup
16356+ * (or something).
16357+ */
16358+ if (mtx)
16359+ mutex_unlock(mtx);
027c5e7a
AM
16360+ err = reval_inode(inode, dentry);
16361+ if (unlikely(err < 0)) {
16362+ mtx = NULL;
16363+ goto out_iput;
16364+ }
16365+
b752ccd1
AM
16366+ if (!err) {
16367+ mtx = NULL;
4a4d8108 16368+ goto out; /* success */
b752ccd1
AM
16369+ } else if (mtx)
16370+ mutex_lock(mtx);
4a4d8108
AM
16371+ }
16372+
16373+ if (unlikely(au_test_fs_unique_ino(h_dentry->d_inode)))
16374+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
523b37e3
AM
16375+ " b%d, %s, %pd, hi%lu, i%lu.\n",
16376+ bstart, au_sbtype(h_dentry->d_sb), dentry,
4a4d8108
AM
16377+ (unsigned long)h_ino, (unsigned long)ino);
16378+ ino = 0;
16379+ err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
16380+ if (!err) {
16381+ iput(inode);
b752ccd1
AM
16382+ if (mtx)
16383+ mutex_unlock(mtx);
4a4d8108
AM
16384+ goto new_ino;
16385+ }
1308ab2a 16386+
4f0767ce 16387+out_iput:
4a4d8108 16388+ iput(inode);
4a4d8108 16389+ inode = ERR_PTR(err);
4f0767ce 16390+out:
b752ccd1
AM
16391+ if (mtx)
16392+ mutex_unlock(mtx);
4a4d8108 16393+ return inode;
1facf9fc 16394+}
16395+
4a4d8108 16396+/* ---------------------------------------------------------------------- */
1facf9fc 16397+
4a4d8108
AM
16398+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16399+ struct inode *inode)
16400+{
16401+ int err;
076b876e 16402+ struct inode *hi;
1facf9fc 16403+
4a4d8108 16404+ err = au_br_rdonly(au_sbr(sb, bindex));
1facf9fc 16405+
4a4d8108
AM
16406+ /* pseudo-link after flushed may happen out of bounds */
16407+ if (!err
16408+ && inode
16409+ && au_ibstart(inode) <= bindex
16410+ && bindex <= au_ibend(inode)) {
16411+ /*
16412+ * permission check is unnecessary since vfsub routine
16413+ * will be called later
16414+ */
076b876e 16415+ hi = au_h_iptr(inode, bindex);
4a4d8108
AM
16416+ if (hi)
16417+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
1facf9fc 16418+ }
16419+
4a4d8108
AM
16420+ return err;
16421+}
dece6358 16422+
4a4d8108
AM
16423+int au_test_h_perm(struct inode *h_inode, int mask)
16424+{
2dfbb274 16425+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
4a4d8108
AM
16426+ return 0;
16427+ return inode_permission(h_inode, mask);
16428+}
1facf9fc 16429+
4a4d8108
AM
16430+int au_test_h_perm_sio(struct inode *h_inode, int mask)
16431+{
16432+ if (au_test_nfs(h_inode->i_sb)
16433+ && (mask & MAY_WRITE)
16434+ && S_ISDIR(h_inode->i_mode))
16435+ mask |= MAY_READ; /* force permission check */
16436+ return au_test_h_perm(h_inode, mask);
1facf9fc 16437+}
7f207e10
AM
16438diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
16439--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
16440+++ linux/fs/aufs/inode.h 2016-02-28 11:27:01.280579017 +0100
16441@@ -0,0 +1,685 @@
4a4d8108 16442+/*
7f2ca4b1 16443+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
16444+ *
16445+ * This program, aufs is free software; you can redistribute it and/or modify
16446+ * it under the terms of the GNU General Public License as published by
16447+ * the Free Software Foundation; either version 2 of the License, or
16448+ * (at your option) any later version.
16449+ *
16450+ * This program is distributed in the hope that it will be useful,
16451+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16452+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16453+ * GNU General Public License for more details.
16454+ *
16455+ * You should have received a copy of the GNU General Public License
523b37e3 16456+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 16457+ */
1facf9fc 16458+
1308ab2a 16459+/*
4a4d8108 16460+ * inode operations
1308ab2a 16461+ */
dece6358 16462+
4a4d8108
AM
16463+#ifndef __AUFS_INODE_H__
16464+#define __AUFS_INODE_H__
dece6358 16465+
4a4d8108 16466+#ifdef __KERNEL__
1308ab2a 16467+
4a4d8108 16468+#include <linux/fsnotify.h>
4a4d8108 16469+#include "rwsem.h"
1308ab2a 16470+
4a4d8108 16471+struct vfsmount;
1facf9fc 16472+
4a4d8108
AM
16473+struct au_hnotify {
16474+#ifdef CONFIG_AUFS_HNOTIFY
16475+#ifdef CONFIG_AUFS_HFSNOTIFY
7f207e10 16476+ /* never use fsnotify_add_vfsmount_mark() */
0c5527e5 16477+ struct fsnotify_mark hn_mark;
4a4d8108 16478+#endif
7f207e10 16479+ struct inode *hn_aufs_inode; /* no get/put */
4a4d8108
AM
16480+#endif
16481+} ____cacheline_aligned_in_smp;
1facf9fc 16482+
4a4d8108
AM
16483+struct au_hinode {
16484+ struct inode *hi_inode;
16485+ aufs_bindex_t hi_id;
16486+#ifdef CONFIG_AUFS_HNOTIFY
16487+ struct au_hnotify *hi_notify;
16488+#endif
dece6358 16489+
4a4d8108
AM
16490+ /* reference to the copied-up whiteout with get/put */
16491+ struct dentry *hi_whdentry;
16492+};
dece6358 16493+
537831f9
AM
16494+/* ig_flags */
16495+#define AuIG_HALF_REFRESHED 1
16496+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
16497+#define au_ig_fset(flags, name) \
16498+ do { (flags) |= AuIG_##name; } while (0)
16499+#define au_ig_fclr(flags, name) \
16500+ do { (flags) &= ~AuIG_##name; } while (0)
16501+
16502+struct au_iigen {
7f2ca4b1 16503+ spinlock_t ig_spin;
537831f9
AM
16504+ __u32 ig_generation, ig_flags;
16505+};
16506+
4a4d8108
AM
16507+struct au_vdir;
16508+struct au_iinfo {
7a9e40b8 16509+ struct au_iigen ii_generation;
4a4d8108 16510+ struct super_block *ii_hsb1; /* no get/put */
1facf9fc 16511+
4a4d8108
AM
16512+ struct au_rwsem ii_rwsem;
16513+ aufs_bindex_t ii_bstart, ii_bend;
16514+ __u32 ii_higen;
16515+ struct au_hinode *ii_hinode;
16516+ struct au_vdir *ii_vdir;
16517+};
1facf9fc 16518+
4a4d8108
AM
16519+struct au_icntnr {
16520+ struct au_iinfo iinfo;
16521+ struct inode vfs_inode;
16522+} ____cacheline_aligned_in_smp;
1308ab2a 16523+
4a4d8108
AM
16524+/* au_pin flags */
16525+#define AuPin_DI_LOCKED 1
16526+#define AuPin_MNT_WRITE (1 << 1)
16527+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
7f207e10
AM
16528+#define au_fset_pin(flags, name) \
16529+ do { (flags) |= AuPin_##name; } while (0)
16530+#define au_fclr_pin(flags, name) \
16531+ do { (flags) &= ~AuPin_##name; } while (0)
4a4d8108
AM
16532+
16533+struct au_pin {
16534+ /* input */
16535+ struct dentry *dentry;
16536+ unsigned int udba;
16537+ unsigned char lsc_di, lsc_hi, flags;
16538+ aufs_bindex_t bindex;
16539+
16540+ /* output */
16541+ struct dentry *parent;
16542+ struct au_hinode *hdir;
16543+ struct vfsmount *h_mnt;
86dc4139
AM
16544+
16545+ /* temporary unlock/relock for copyup */
16546+ struct dentry *h_dentry, *h_parent;
16547+ struct au_branch *br;
16548+ struct task_struct *task;
4a4d8108 16549+};
1facf9fc 16550+
86dc4139 16551+void au_pin_hdir_unlock(struct au_pin *p);
c1595e42 16552+int au_pin_hdir_lock(struct au_pin *p);
86dc4139
AM
16553+int au_pin_hdir_relock(struct au_pin *p);
16554+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task);
16555+void au_pin_hdir_acquire_nest(struct au_pin *p);
16556+void au_pin_hdir_release(struct au_pin *p);
16557+
1308ab2a 16558+/* ---------------------------------------------------------------------- */
16559+
4a4d8108 16560+static inline struct au_iinfo *au_ii(struct inode *inode)
1facf9fc 16561+{
4a4d8108 16562+ struct au_iinfo *iinfo;
1facf9fc 16563+
4a4d8108
AM
16564+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
16565+ if (iinfo->ii_hinode)
16566+ return iinfo;
16567+ return NULL; /* debugging bad_inode case */
16568+}
1facf9fc 16569+
4a4d8108 16570+/* ---------------------------------------------------------------------- */
1facf9fc 16571+
4a4d8108
AM
16572+/* inode.c */
16573+struct inode *au_igrab(struct inode *inode);
7f2ca4b1 16574+void au_refresh_iop(struct inode *inode, int force_getattr);
027c5e7a 16575+int au_refresh_hinode_self(struct inode *inode);
4a4d8108
AM
16576+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
16577+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
16578+ unsigned int d_type, ino_t *ino);
16579+struct inode *au_new_inode(struct dentry *dentry, int must_new);
16580+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
16581+ struct inode *inode);
16582+int au_test_h_perm(struct inode *h_inode, int mask);
16583+int au_test_h_perm_sio(struct inode *h_inode, int mask);
1facf9fc 16584+
4a4d8108
AM
16585+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
16586+ ino_t h_ino, unsigned int d_type, ino_t *ino)
16587+{
16588+#ifdef CONFIG_AUFS_SHWH
16589+ return au_ino(sb, bindex, h_ino, d_type, ino);
16590+#else
16591+ return 0;
16592+#endif
16593+}
1facf9fc 16594+
4a4d8108 16595+/* i_op.c */
7f2ca4b1
JR
16596+enum {
16597+ AuIop_SYMLINK,
16598+ AuIop_DIR,
16599+ AuIop_OTHER,
16600+ AuIop_Last
16601+};
16602+extern struct inode_operations aufs_iop[AuIop_Last],
16603+ aufs_iop_nogetattr[AuIop_Last];
1308ab2a 16604+
4a4d8108
AM
16605+/* au_wr_dir flags */
16606+#define AuWrDir_ADD_ENTRY 1
7f2ca4b1
JR
16607+#define AuWrDir_ISDIR (1 << 1)
16608+#define AuWrDir_TMPFILE (1 << 2)
4a4d8108 16609+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
7f207e10
AM
16610+#define au_fset_wrdir(flags, name) \
16611+ do { (flags) |= AuWrDir_##name; } while (0)
16612+#define au_fclr_wrdir(flags, name) \
16613+ do { (flags) &= ~AuWrDir_##name; } while (0)
1facf9fc 16614+
4a4d8108
AM
16615+struct au_wr_dir_args {
16616+ aufs_bindex_t force_btgt;
16617+ unsigned char flags;
16618+};
16619+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
16620+ struct au_wr_dir_args *args);
dece6358 16621+
4a4d8108
AM
16622+struct dentry *au_pinned_h_parent(struct au_pin *pin);
16623+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
16624+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
16625+ unsigned int udba, unsigned char flags);
16626+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
16627+ unsigned int udba, unsigned char flags) __must_check;
16628+int au_do_pin(struct au_pin *pin) __must_check;
16629+void au_unpin(struct au_pin *pin);
c1595e42
JR
16630+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
16631+
16632+#define AuIcpup_DID_CPUP 1
16633+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
16634+#define au_fset_icpup(flags, name) \
16635+ do { (flags) |= AuIcpup_##name; } while (0)
16636+#define au_fclr_icpup(flags, name) \
16637+ do { (flags) &= ~AuIcpup_##name; } while (0)
16638+
16639+struct au_icpup_args {
16640+ unsigned char flags;
16641+ unsigned char pin_flags;
16642+ aufs_bindex_t btgt;
16643+ unsigned int udba;
16644+ struct au_pin pin;
16645+ struct path h_path;
16646+ struct inode *h_inode;
16647+};
16648+
16649+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
16650+ struct au_icpup_args *a);
16651+
16652+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path);
1facf9fc 16653+
4a4d8108
AM
16654+/* i_op_add.c */
16655+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
16656+ struct dentry *h_parent, int isdir);
7eafdf33
AM
16657+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
16658+ dev_t dev);
4a4d8108 16659+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
7eafdf33 16660+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 16661+ bool want_excl);
7f2ca4b1
JR
16662+struct vfsub_aopen_args;
16663+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
16664+ struct vfsub_aopen_args *args);
38d290e6 16665+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
4a4d8108
AM
16666+int aufs_link(struct dentry *src_dentry, struct inode *dir,
16667+ struct dentry *dentry);
7eafdf33 16668+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
1facf9fc 16669+
4a4d8108
AM
16670+/* i_op_del.c */
16671+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
16672+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
16673+ struct dentry *h_parent, int isdir);
16674+int aufs_unlink(struct inode *dir, struct dentry *dentry);
16675+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
1308ab2a 16676+
4a4d8108
AM
16677+/* i_op_ren.c */
16678+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
16679+int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
16680+ struct inode *dir, struct dentry *dentry);
1facf9fc 16681+
4a4d8108
AM
16682+/* iinfo.c */
16683+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
16684+void au_hiput(struct au_hinode *hinode);
16685+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
16686+ struct dentry *h_wh);
16687+unsigned int au_hi_flags(struct inode *inode, int isdir);
1308ab2a 16688+
4a4d8108
AM
16689+/* hinode flags */
16690+#define AuHi_XINO 1
16691+#define AuHi_HNOTIFY (1 << 1)
16692+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
7f207e10
AM
16693+#define au_fset_hi(flags, name) \
16694+ do { (flags) |= AuHi_##name; } while (0)
16695+#define au_fclr_hi(flags, name) \
16696+ do { (flags) &= ~AuHi_##name; } while (0)
1facf9fc 16697+
4a4d8108
AM
16698+#ifndef CONFIG_AUFS_HNOTIFY
16699+#undef AuHi_HNOTIFY
16700+#define AuHi_HNOTIFY 0
16701+#endif
1facf9fc 16702+
4a4d8108
AM
16703+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
16704+ struct inode *h_inode, unsigned int flags);
1facf9fc 16705+
537831f9 16706+void au_update_iigen(struct inode *inode, int half);
4a4d8108 16707+void au_update_ibrange(struct inode *inode, int do_put_zero);
1facf9fc 16708+
4a4d8108
AM
16709+void au_icntnr_init_once(void *_c);
16710+int au_iinfo_init(struct inode *inode);
16711+void au_iinfo_fin(struct inode *inode);
16712+int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
1308ab2a 16713+
e49829fe 16714+#ifdef CONFIG_PROC_FS
4a4d8108 16715+/* plink.c */
e49829fe 16716+int au_plink_maint(struct super_block *sb, int flags);
7f2ca4b1 16717+struct au_sbinfo;
e49829fe
JR
16718+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
16719+int au_plink_maint_enter(struct super_block *sb);
4a4d8108
AM
16720+#ifdef CONFIG_AUFS_DEBUG
16721+void au_plink_list(struct super_block *sb);
16722+#else
16723+AuStubVoid(au_plink_list, struct super_block *sb)
16724+#endif
16725+int au_plink_test(struct inode *inode);
16726+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
16727+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
16728+ struct dentry *h_dentry);
e49829fe
JR
16729+void au_plink_put(struct super_block *sb, int verbose);
16730+void au_plink_clean(struct super_block *sb, int verbose);
4a4d8108 16731+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
e49829fe
JR
16732+#else
16733+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
16734+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
16735+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
16736+AuStubVoid(au_plink_list, struct super_block *sb);
16737+AuStubInt0(au_plink_test, struct inode *inode);
16738+AuStub(struct dentry *, au_plink_lkup, return NULL,
16739+ struct inode *inode, aufs_bindex_t bindex);
16740+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
16741+ struct dentry *h_dentry);
16742+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
16743+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
16744+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
16745+#endif /* CONFIG_PROC_FS */
1facf9fc 16746+
c1595e42
JR
16747+#ifdef CONFIG_AUFS_XATTR
16748+/* xattr.c */
7f2ca4b1
JR
16749+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
16750+ unsigned int verbose);
c1595e42
JR
16751+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
16752+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
16753+ size_t size);
16754+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
16755+ size_t size, int flags);
16756+int aufs_removexattr(struct dentry *dentry, const char *name);
16757+
16758+/* void au_xattr_init(struct super_block *sb); */
16759+#else
16760+AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
7f2ca4b1 16761+ int ignore_flags, unsigned int verbose);
c1595e42
JR
16762+/* AuStubVoid(au_xattr_init, struct super_block *sb); */
16763+#endif
16764+
16765+#ifdef CONFIG_FS_POSIX_ACL
16766+struct posix_acl *aufs_get_acl(struct inode *inode, int type);
16767+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
16768+#endif
16769+
16770+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
16771+enum {
16772+ AU_XATTR_SET,
16773+ AU_XATTR_REMOVE,
16774+ AU_ACL_SET
16775+};
16776+
16777+struct au_srxattr {
16778+ int type;
16779+ union {
16780+ struct {
16781+ const char *name;
16782+ const void *value;
16783+ size_t size;
16784+ int flags;
16785+ } set;
16786+ struct {
16787+ const char *name;
16788+ } remove;
16789+ struct {
16790+ struct posix_acl *acl;
16791+ int type;
16792+ } acl_set;
16793+ } u;
16794+};
16795+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg);
16796+#endif
16797+
4a4d8108 16798+/* ---------------------------------------------------------------------- */
1308ab2a 16799+
4a4d8108
AM
16800+/* lock subclass for iinfo */
16801+enum {
16802+ AuLsc_II_CHILD, /* child first */
16803+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
16804+ AuLsc_II_CHILD3, /* copyup dirs */
16805+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
16806+ AuLsc_II_PARENT2,
16807+ AuLsc_II_PARENT3, /* copyup dirs */
16808+ AuLsc_II_NEW_CHILD
16809+};
1308ab2a 16810+
1facf9fc 16811+/*
4a4d8108
AM
16812+ * ii_read_lock_child, ii_write_lock_child,
16813+ * ii_read_lock_child2, ii_write_lock_child2,
16814+ * ii_read_lock_child3, ii_write_lock_child3,
16815+ * ii_read_lock_parent, ii_write_lock_parent,
16816+ * ii_read_lock_parent2, ii_write_lock_parent2,
16817+ * ii_read_lock_parent3, ii_write_lock_parent3,
16818+ * ii_read_lock_new_child, ii_write_lock_new_child,
1facf9fc 16819+ */
4a4d8108
AM
16820+#define AuReadLockFunc(name, lsc) \
16821+static inline void ii_read_lock_##name(struct inode *i) \
16822+{ \
16823+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16824+}
16825+
16826+#define AuWriteLockFunc(name, lsc) \
16827+static inline void ii_write_lock_##name(struct inode *i) \
16828+{ \
16829+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
16830+}
16831+
16832+#define AuRWLockFuncs(name, lsc) \
16833+ AuReadLockFunc(name, lsc) \
16834+ AuWriteLockFunc(name, lsc)
16835+
16836+AuRWLockFuncs(child, CHILD);
16837+AuRWLockFuncs(child2, CHILD2);
16838+AuRWLockFuncs(child3, CHILD3);
16839+AuRWLockFuncs(parent, PARENT);
16840+AuRWLockFuncs(parent2, PARENT2);
16841+AuRWLockFuncs(parent3, PARENT3);
16842+AuRWLockFuncs(new_child, NEW_CHILD);
16843+
16844+#undef AuReadLockFunc
16845+#undef AuWriteLockFunc
16846+#undef AuRWLockFuncs
1facf9fc 16847+
16848+/*
4a4d8108 16849+ * ii_read_unlock, ii_write_unlock, ii_downgrade_lock
1facf9fc 16850+ */
4a4d8108 16851+AuSimpleUnlockRwsemFuncs(ii, struct inode *i, &au_ii(i)->ii_rwsem);
1facf9fc 16852+
4a4d8108
AM
16853+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
16854+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
16855+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
1facf9fc 16856+
4a4d8108 16857+/* ---------------------------------------------------------------------- */
1308ab2a 16858+
027c5e7a
AM
16859+static inline void au_icntnr_init(struct au_icntnr *c)
16860+{
16861+#ifdef CONFIG_AUFS_DEBUG
16862+ c->vfs_inode.i_mode = 0;
16863+#endif
16864+}
16865+
7f2ca4b1 16866+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
4a4d8108 16867+{
537831f9
AM
16868+ unsigned int gen;
16869+ struct au_iinfo *iinfo;
7f2ca4b1 16870+ struct au_iigen *iigen;
537831f9
AM
16871+
16872+ iinfo = au_ii(inode);
7f2ca4b1
JR
16873+ iigen = &iinfo->ii_generation;
16874+ spin_lock(&iigen->ig_spin);
16875+ if (igflags)
16876+ *igflags = iigen->ig_flags;
16877+ gen = iigen->ig_generation;
16878+ spin_unlock(&iigen->ig_spin);
537831f9
AM
16879+
16880+ return gen;
4a4d8108 16881+}
1308ab2a 16882+
4a4d8108
AM
16883+/* tiny test for inode number */
16884+/* tmpfs generation is too rough */
16885+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
16886+{
16887+ struct au_iinfo *iinfo;
1308ab2a 16888+
4a4d8108
AM
16889+ iinfo = au_ii(inode);
16890+ AuRwMustAnyLock(&iinfo->ii_rwsem);
16891+ return !(iinfo->ii_hsb1 == h_inode->i_sb
16892+ && iinfo->ii_higen == h_inode->i_generation);
16893+}
1308ab2a 16894+
4a4d8108
AM
16895+static inline void au_iigen_dec(struct inode *inode)
16896+{
537831f9 16897+ struct au_iinfo *iinfo;
7f2ca4b1 16898+ struct au_iigen *iigen;
537831f9
AM
16899+
16900+ iinfo = au_ii(inode);
7f2ca4b1
JR
16901+ iigen = &iinfo->ii_generation;
16902+ spin_lock(&iigen->ig_spin);
16903+ iigen->ig_generation--;
16904+ spin_unlock(&iigen->ig_spin);
027c5e7a
AM
16905+}
16906+
16907+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
16908+{
16909+ int err;
16910+
16911+ err = 0;
537831f9 16912+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
027c5e7a
AM
16913+ err = -EIO;
16914+
16915+ return err;
4a4d8108 16916+}
1308ab2a 16917+
4a4d8108 16918+/* ---------------------------------------------------------------------- */
1308ab2a 16919+
4a4d8108
AM
16920+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
16921+ aufs_bindex_t bindex)
16922+{
16923+ IiMustAnyLock(inode);
16924+ return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
16925+}
1308ab2a 16926+
4a4d8108
AM
16927+static inline aufs_bindex_t au_ibstart(struct inode *inode)
16928+{
16929+ IiMustAnyLock(inode);
16930+ return au_ii(inode)->ii_bstart;
16931+}
1308ab2a 16932+
4a4d8108
AM
16933+static inline aufs_bindex_t au_ibend(struct inode *inode)
16934+{
16935+ IiMustAnyLock(inode);
16936+ return au_ii(inode)->ii_bend;
16937+}
1308ab2a 16938+
4a4d8108
AM
16939+static inline struct au_vdir *au_ivdir(struct inode *inode)
16940+{
16941+ IiMustAnyLock(inode);
16942+ return au_ii(inode)->ii_vdir;
16943+}
1308ab2a 16944+
4a4d8108
AM
16945+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
16946+{
16947+ IiMustAnyLock(inode);
16948+ return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
16949+}
1308ab2a 16950+
4a4d8108 16951+static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16952+{
4a4d8108
AM
16953+ IiMustWriteLock(inode);
16954+ au_ii(inode)->ii_bstart = bindex;
16955+}
1308ab2a 16956+
4a4d8108
AM
16957+static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
16958+{
16959+ IiMustWriteLock(inode);
16960+ au_ii(inode)->ii_bend = bindex;
1308ab2a 16961+}
16962+
4a4d8108
AM
16963+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
16964+{
16965+ IiMustWriteLock(inode);
16966+ au_ii(inode)->ii_vdir = vdir;
16967+}
1facf9fc 16968+
4a4d8108 16969+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
1308ab2a 16970+{
4a4d8108
AM
16971+ IiMustAnyLock(inode);
16972+ return au_ii(inode)->ii_hinode + bindex;
16973+}
dece6358 16974+
4a4d8108 16975+/* ---------------------------------------------------------------------- */
1facf9fc 16976+
4a4d8108
AM
16977+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
16978+{
16979+ if (pin)
16980+ return pin->parent;
16981+ return NULL;
1facf9fc 16982+}
16983+
4a4d8108 16984+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
1facf9fc 16985+{
4a4d8108
AM
16986+ if (pin && pin->hdir)
16987+ return pin->hdir->hi_inode;
16988+ return NULL;
1308ab2a 16989+}
1facf9fc 16990+
4a4d8108
AM
16991+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
16992+{
16993+ if (pin)
16994+ return pin->hdir;
16995+ return NULL;
16996+}
1facf9fc 16997+
4a4d8108 16998+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
1308ab2a 16999+{
4a4d8108
AM
17000+ if (pin)
17001+ pin->dentry = dentry;
17002+}
1308ab2a 17003+
4a4d8108
AM
17004+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
17005+ unsigned char lflag)
17006+{
17007+ if (pin) {
7f207e10 17008+ if (lflag)
4a4d8108 17009+ au_fset_pin(pin->flags, DI_LOCKED);
7f207e10 17010+ else
4a4d8108 17011+ au_fclr_pin(pin->flags, DI_LOCKED);
1308ab2a 17012+ }
4a4d8108
AM
17013+}
17014+
7f2ca4b1 17015+#if 0 /* reserved */
4a4d8108
AM
17016+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
17017+{
17018+ if (pin) {
17019+ dput(pin->parent);
17020+ pin->parent = dget(parent);
1facf9fc 17021+ }
4a4d8108 17022+}
7f2ca4b1 17023+#endif
1facf9fc 17024+
4a4d8108
AM
17025+/* ---------------------------------------------------------------------- */
17026+
027c5e7a 17027+struct au_branch;
4a4d8108
AM
17028+#ifdef CONFIG_AUFS_HNOTIFY
17029+struct au_hnotify_op {
17030+ void (*ctl)(struct au_hinode *hinode, int do_set);
027c5e7a 17031+ int (*alloc)(struct au_hinode *hinode);
7eafdf33
AM
17032+
17033+ /*
17034+ * if it returns true, the the caller should free hinode->hi_notify,
17035+ * otherwise ->free() frees it.
17036+ */
17037+ int (*free)(struct au_hinode *hinode,
17038+ struct au_hnotify *hn) __must_check;
4a4d8108
AM
17039+
17040+ void (*fin)(void);
17041+ int (*init)(void);
027c5e7a
AM
17042+
17043+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
17044+ void (*fin_br)(struct au_branch *br);
17045+ int (*init_br)(struct au_branch *br, int perm);
4a4d8108
AM
17046+};
17047+
17048+/* hnotify.c */
027c5e7a 17049+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
4a4d8108
AM
17050+void au_hn_free(struct au_hinode *hinode);
17051+void au_hn_ctl(struct au_hinode *hinode, int do_set);
17052+void au_hn_reset(struct inode *inode, unsigned int flags);
17053+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
17054+ struct qstr *h_child_qstr, struct inode *h_child_inode);
027c5e7a
AM
17055+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
17056+int au_hnotify_init_br(struct au_branch *br, int perm);
17057+void au_hnotify_fin_br(struct au_branch *br);
4a4d8108
AM
17058+int __init au_hnotify_init(void);
17059+void au_hnotify_fin(void);
17060+
7f207e10 17061+/* hfsnotify.c */
4a4d8108
AM
17062+extern const struct au_hnotify_op au_hnotify_op;
17063+
17064+static inline
17065+void au_hn_init(struct au_hinode *hinode)
17066+{
17067+ hinode->hi_notify = NULL;
1308ab2a 17068+}
17069+
53392da6
AM
17070+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
17071+{
17072+ return hinode->hi_notify;
17073+}
17074+
4a4d8108 17075+#else
c1595e42
JR
17076+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
17077+ struct au_hinode *hinode __maybe_unused,
17078+ struct inode *inode __maybe_unused)
17079+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
4a4d8108
AM
17080+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
17081+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
17082+ int do_set __maybe_unused)
17083+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
17084+ unsigned int flags __maybe_unused)
027c5e7a
AM
17085+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
17086+ struct au_branch *br __maybe_unused,
17087+ int perm __maybe_unused)
17088+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
17089+ int perm __maybe_unused)
17090+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
4a4d8108
AM
17091+AuStubInt0(__init au_hnotify_init, void)
17092+AuStubVoid(au_hnotify_fin, void)
17093+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
17094+#endif /* CONFIG_AUFS_HNOTIFY */
17095+
17096+static inline void au_hn_suspend(struct au_hinode *hdir)
17097+{
17098+ au_hn_ctl(hdir, /*do_set*/0);
1308ab2a 17099+}
17100+
4a4d8108 17101+static inline void au_hn_resume(struct au_hinode *hdir)
1308ab2a 17102+{
4a4d8108
AM
17103+ au_hn_ctl(hdir, /*do_set*/1);
17104+}
1308ab2a 17105+
4a4d8108
AM
17106+static inline void au_hn_imtx_lock(struct au_hinode *hdir)
17107+{
17108+ mutex_lock(&hdir->hi_inode->i_mutex);
17109+ au_hn_suspend(hdir);
17110+}
dece6358 17111+
4a4d8108
AM
17112+static inline void au_hn_imtx_lock_nested(struct au_hinode *hdir,
17113+ unsigned int sc __maybe_unused)
17114+{
17115+ mutex_lock_nested(&hdir->hi_inode->i_mutex, sc);
17116+ au_hn_suspend(hdir);
1facf9fc 17117+}
1facf9fc 17118+
4a4d8108
AM
17119+static inline void au_hn_imtx_unlock(struct au_hinode *hdir)
17120+{
17121+ au_hn_resume(hdir);
17122+ mutex_unlock(&hdir->hi_inode->i_mutex);
17123+}
17124+
17125+#endif /* __KERNEL__ */
17126+#endif /* __AUFS_INODE_H__ */
7f207e10
AM
17127diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
17128--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 17129+++ linux/fs/aufs/ioctl.c 2016-02-28 11:27:01.280579017 +0100
c1595e42 17130@@ -0,0 +1,219 @@
4a4d8108 17131+/*
7f2ca4b1 17132+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17133+ *
17134+ * This program, aufs is free software; you can redistribute it and/or modify
17135+ * it under the terms of the GNU General Public License as published by
17136+ * the Free Software Foundation; either version 2 of the License, or
17137+ * (at your option) any later version.
17138+ *
17139+ * This program is distributed in the hope that it will be useful,
17140+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17141+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17142+ * GNU General Public License for more details.
17143+ *
17144+ * You should have received a copy of the GNU General Public License
523b37e3 17145+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17146+ */
17147+
17148+/*
17149+ * ioctl
17150+ * plink-management and readdir in userspace.
17151+ * assist the pathconf(3) wrapper library.
c2b27bf2 17152+ * move-down
076b876e 17153+ * File-based Hierarchical Storage Management.
4a4d8108
AM
17154+ */
17155+
c2b27bf2
AM
17156+#include <linux/compat.h>
17157+#include <linux/file.h>
4a4d8108
AM
17158+#include "aufs.h"
17159+
1e00d052 17160+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
4a4d8108
AM
17161+{
17162+ int err, fd;
17163+ aufs_bindex_t wbi, bindex, bend;
17164+ struct file *h_file;
17165+ struct super_block *sb;
17166+ struct dentry *root;
1e00d052
AM
17167+ struct au_branch *br;
17168+ struct aufs_wbr_fd wbrfd = {
17169+ .oflags = au_dir_roflags,
17170+ .brid = -1
17171+ };
17172+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
17173+ | O_NOATIME | O_CLOEXEC;
4a4d8108 17174+
1e00d052
AM
17175+ AuDebugOn(wbrfd.oflags & ~valid);
17176+
17177+ if (arg) {
17178+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
17179+ if (unlikely(err)) {
17180+ err = -EFAULT;
17181+ goto out;
17182+ }
17183+
17184+ err = -EINVAL;
17185+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
17186+ wbrfd.oflags |= au_dir_roflags;
17187+ AuDbg("0%o\n", wbrfd.oflags);
17188+ if (unlikely(wbrfd.oflags & ~valid))
17189+ goto out;
17190+ }
17191+
17192+ fd = get_unused_fd();
17193+ err = fd;
17194+ if (unlikely(fd < 0))
4a4d8108 17195+ goto out;
4a4d8108 17196+
1e00d052 17197+ h_file = ERR_PTR(-EINVAL);
4a4d8108 17198+ wbi = 0;
1e00d052 17199+ br = NULL;
4a4d8108
AM
17200+ sb = path->dentry->d_sb;
17201+ root = sb->s_root;
17202+ aufs_read_lock(root, AuLock_IR);
1e00d052
AM
17203+ bend = au_sbend(sb);
17204+ if (wbrfd.brid >= 0) {
17205+ wbi = au_br_index(sb, wbrfd.brid);
17206+ if (unlikely(wbi < 0 || wbi > bend))
17207+ goto out_unlock;
17208+ }
17209+
17210+ h_file = ERR_PTR(-ENOENT);
17211+ br = au_sbr(sb, wbi);
17212+ if (!au_br_writable(br->br_perm)) {
17213+ if (arg)
17214+ goto out_unlock;
17215+
17216+ bindex = wbi + 1;
17217+ wbi = -1;
17218+ for (; bindex <= bend; bindex++) {
17219+ br = au_sbr(sb, bindex);
17220+ if (au_br_writable(br->br_perm)) {
4a4d8108 17221+ wbi = bindex;
1e00d052 17222+ br = au_sbr(sb, wbi);
4a4d8108
AM
17223+ break;
17224+ }
17225+ }
4a4d8108
AM
17226+ }
17227+ AuDbg("wbi %d\n", wbi);
1e00d052 17228+ if (wbi >= 0)
392086de
AM
17229+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
17230+ /*force_wr*/0);
1e00d052
AM
17231+
17232+out_unlock:
4a4d8108
AM
17233+ aufs_read_unlock(root, AuLock_IR);
17234+ err = PTR_ERR(h_file);
17235+ if (IS_ERR(h_file))
17236+ goto out_fd;
17237+
1e00d052 17238+ atomic_dec(&br->br_count); /* cf. au_h_open() */
4a4d8108
AM
17239+ fd_install(fd, h_file);
17240+ err = fd;
17241+ goto out; /* success */
17242+
4f0767ce 17243+out_fd:
4a4d8108 17244+ put_unused_fd(fd);
4f0767ce 17245+out:
1e00d052 17246+ AuTraceErr(err);
4a4d8108
AM
17247+ return err;
17248+}
17249+
17250+/* ---------------------------------------------------------------------- */
17251+
17252+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
17253+{
17254+ long err;
c1595e42 17255+ struct dentry *dentry;
4a4d8108
AM
17256+
17257+ switch (cmd) {
4a4d8108
AM
17258+ case AUFS_CTL_RDU:
17259+ case AUFS_CTL_RDU_INO:
17260+ err = au_rdu_ioctl(file, cmd, arg);
17261+ break;
17262+
17263+ case AUFS_CTL_WBR_FD:
1e00d052 17264+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17265+ break;
17266+
027c5e7a
AM
17267+ case AUFS_CTL_IBUSY:
17268+ err = au_ibusy_ioctl(file, arg);
17269+ break;
17270+
076b876e
AM
17271+ case AUFS_CTL_BRINFO:
17272+ err = au_brinfo_ioctl(file, arg);
17273+ break;
17274+
17275+ case AUFS_CTL_FHSM_FD:
c1595e42
JR
17276+ dentry = file->f_dentry;
17277+ if (IS_ROOT(dentry))
17278+ err = au_fhsm_fd(dentry->d_sb, arg);
17279+ else
17280+ err = -ENOTTY;
076b876e
AM
17281+ break;
17282+
4a4d8108
AM
17283+ default:
17284+ /* do not call the lower */
17285+ AuDbg("0x%x\n", cmd);
17286+ err = -ENOTTY;
17287+ }
17288+
17289+ AuTraceErr(err);
17290+ return err;
17291+}
17292+
17293+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
17294+{
17295+ long err;
17296+
17297+ switch (cmd) {
c2b27bf2 17298+ case AUFS_CTL_MVDOWN:
c2b27bf2
AM
17299+ err = au_mvdown(file->f_dentry, (void __user *)arg);
17300+ break;
17301+
4a4d8108 17302+ case AUFS_CTL_WBR_FD:
1e00d052 17303+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
4a4d8108
AM
17304+ break;
17305+
17306+ default:
17307+ /* do not call the lower */
17308+ AuDbg("0x%x\n", cmd);
17309+ err = -ENOTTY;
17310+ }
17311+
17312+ AuTraceErr(err);
17313+ return err;
17314+}
b752ccd1
AM
17315+
17316+#ifdef CONFIG_COMPAT
17317+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
17318+ unsigned long arg)
17319+{
17320+ long err;
17321+
17322+ switch (cmd) {
17323+ case AUFS_CTL_RDU:
17324+ case AUFS_CTL_RDU_INO:
17325+ err = au_rdu_compat_ioctl(file, cmd, arg);
17326+ break;
17327+
027c5e7a
AM
17328+ case AUFS_CTL_IBUSY:
17329+ err = au_ibusy_compat_ioctl(file, arg);
17330+ break;
17331+
076b876e
AM
17332+ case AUFS_CTL_BRINFO:
17333+ err = au_brinfo_compat_ioctl(file, arg);
17334+ break;
17335+
b752ccd1
AM
17336+ default:
17337+ err = aufs_ioctl_dir(file, cmd, arg);
17338+ }
17339+
17340+ AuTraceErr(err);
17341+ return err;
17342+}
17343+
b752ccd1
AM
17344+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
17345+ unsigned long arg)
17346+{
17347+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
17348+}
17349+#endif
7f207e10
AM
17350diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
17351--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
17352+++ linux/fs/aufs/i_op_add.c 2016-02-28 11:27:01.280579017 +0100
17353@@ -0,0 +1,930 @@
4a4d8108 17354+/*
7f2ca4b1 17355+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
17356+ *
17357+ * This program, aufs is free software; you can redistribute it and/or modify
17358+ * it under the terms of the GNU General Public License as published by
17359+ * the Free Software Foundation; either version 2 of the License, or
17360+ * (at your option) any later version.
17361+ *
17362+ * This program is distributed in the hope that it will be useful,
17363+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17364+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17365+ * GNU General Public License for more details.
17366+ *
17367+ * You should have received a copy of the GNU General Public License
523b37e3 17368+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108
AM
17369+ */
17370+
17371+/*
17372+ * inode operations (add entry)
17373+ */
17374+
17375+#include "aufs.h"
17376+
17377+/*
17378+ * final procedure of adding a new entry, except link(2).
17379+ * remove whiteout, instantiate, copyup the parent dir's times and size
17380+ * and update version.
17381+ * if it failed, re-create the removed whiteout.
17382+ */
17383+static int epilog(struct inode *dir, aufs_bindex_t bindex,
17384+ struct dentry *wh_dentry, struct dentry *dentry)
17385+{
17386+ int err, rerr;
17387+ aufs_bindex_t bwh;
17388+ struct path h_path;
076b876e 17389+ struct super_block *sb;
4a4d8108
AM
17390+ struct inode *inode, *h_dir;
17391+ struct dentry *wh;
17392+
17393+ bwh = -1;
076b876e 17394+ sb = dir->i_sb;
4a4d8108
AM
17395+ if (wh_dentry) {
17396+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
17397+ IMustLock(h_dir);
17398+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
17399+ bwh = au_dbwh(dentry);
17400+ h_path.dentry = wh_dentry;
076b876e 17401+ h_path.mnt = au_sbr_mnt(sb, bindex);
4a4d8108
AM
17402+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
17403+ dentry);
17404+ if (unlikely(err))
17405+ goto out;
17406+ }
17407+
17408+ inode = au_new_inode(dentry, /*must_new*/1);
17409+ if (!IS_ERR(inode)) {
17410+ d_instantiate(dentry, inode);
17411+ dir = dentry->d_parent->d_inode; /* dir inode is locked */
17412+ IMustLock(dir);
7f2ca4b1 17413+ au_dir_ts(dir, bindex);
4a4d8108 17414+ dir->i_version++;
076b876e 17415+ au_fhsm_wrote(sb, bindex, /*force*/0);
4a4d8108
AM
17416+ return 0; /* success */
17417+ }
17418+
17419+ err = PTR_ERR(inode);
17420+ if (!wh_dentry)
17421+ goto out;
17422+
17423+ /* revert */
17424+ /* dir inode is locked */
17425+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
17426+ rerr = PTR_ERR(wh);
17427+ if (IS_ERR(wh)) {
523b37e3
AM
17428+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
17429+ dentry, err, rerr);
4a4d8108
AM
17430+ err = -EIO;
17431+ } else
17432+ dput(wh);
17433+
4f0767ce 17434+out:
4a4d8108
AM
17435+ return err;
17436+}
17437+
027c5e7a
AM
17438+static int au_d_may_add(struct dentry *dentry)
17439+{
17440+ int err;
17441+
17442+ err = 0;
17443+ if (unlikely(d_unhashed(dentry)))
17444+ err = -ENOENT;
17445+ if (unlikely(dentry->d_inode))
17446+ err = -EEXIST;
17447+ return err;
17448+}
17449+
4a4d8108
AM
17450+/*
17451+ * simple tests for the adding inode operations.
17452+ * following the checks in vfs, plus the parent-child relationship.
17453+ */
17454+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
17455+ struct dentry *h_parent, int isdir)
17456+{
17457+ int err;
17458+ umode_t h_mode;
17459+ struct dentry *h_dentry;
17460+ struct inode *h_inode;
17461+
17462+ err = -ENAMETOOLONG;
17463+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17464+ goto out;
17465+
17466+ h_dentry = au_h_dptr(dentry, bindex);
17467+ h_inode = h_dentry->d_inode;
17468+ if (!dentry->d_inode) {
17469+ err = -EEXIST;
17470+ if (unlikely(h_inode))
17471+ goto out;
17472+ } else {
17473+ /* rename(2) case */
17474+ err = -EIO;
17475+ if (unlikely(!h_inode || !h_inode->i_nlink))
17476+ goto out;
17477+
17478+ h_mode = h_inode->i_mode;
17479+ if (!isdir) {
17480+ err = -EISDIR;
17481+ if (unlikely(S_ISDIR(h_mode)))
17482+ goto out;
17483+ } else if (unlikely(!S_ISDIR(h_mode))) {
17484+ err = -ENOTDIR;
17485+ goto out;
17486+ }
17487+ }
17488+
17489+ err = 0;
17490+ /* expected parent dir is locked */
17491+ if (unlikely(h_parent != h_dentry->d_parent))
17492+ err = -EIO;
17493+
4f0767ce 17494+out:
4a4d8108
AM
17495+ AuTraceErr(err);
17496+ return err;
17497+}
17498+
17499+/*
17500+ * initial procedure of adding a new entry.
17501+ * prepare writable branch and the parent dir, lock it,
17502+ * and lookup whiteout for the new entry.
17503+ */
17504+static struct dentry*
17505+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
17506+ struct dentry *src_dentry, struct au_pin *pin,
17507+ struct au_wr_dir_args *wr_dir_args)
17508+{
17509+ struct dentry *wh_dentry, *h_parent;
17510+ struct super_block *sb;
17511+ struct au_branch *br;
17512+ int err;
17513+ unsigned int udba;
17514+ aufs_bindex_t bcpup;
17515+
523b37e3 17516+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17517+
17518+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
17519+ bcpup = err;
17520+ wh_dentry = ERR_PTR(err);
17521+ if (unlikely(err < 0))
17522+ goto out;
17523+
17524+ sb = dentry->d_sb;
17525+ udba = au_opt_udba(sb);
17526+ err = au_pin(pin, dentry, bcpup, udba,
17527+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17528+ wh_dentry = ERR_PTR(err);
17529+ if (unlikely(err))
17530+ goto out;
17531+
17532+ h_parent = au_pinned_h_parent(pin);
17533+ if (udba != AuOpt_UDBA_NONE
17534+ && au_dbstart(dentry) == bcpup)
17535+ err = au_may_add(dentry, bcpup, h_parent,
17536+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
17537+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
17538+ err = -ENAMETOOLONG;
17539+ wh_dentry = ERR_PTR(err);
17540+ if (unlikely(err))
17541+ goto out_unpin;
17542+
17543+ br = au_sbr(sb, bcpup);
17544+ if (dt) {
17545+ struct path tmp = {
17546+ .dentry = h_parent,
86dc4139 17547+ .mnt = au_br_mnt(br)
4a4d8108
AM
17548+ };
17549+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
17550+ }
17551+
17552+ wh_dentry = NULL;
17553+ if (bcpup != au_dbwh(dentry))
17554+ goto out; /* success */
17555+
7f2ca4b1
JR
17556+ /*
17557+ * ENAMETOOLONG here means that if we allowed create such name, then it
17558+ * would not be able to removed in the future. So we don't allow such
17559+ * name here and we don't handle ENAMETOOLONG differently here.
17560+ */
4a4d8108
AM
17561+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
17562+
4f0767ce 17563+out_unpin:
4a4d8108
AM
17564+ if (IS_ERR(wh_dentry))
17565+ au_unpin(pin);
4f0767ce 17566+out:
4a4d8108
AM
17567+ return wh_dentry;
17568+}
17569+
17570+/* ---------------------------------------------------------------------- */
17571+
17572+enum { Mknod, Symlink, Creat };
17573+struct simple_arg {
17574+ int type;
17575+ union {
17576+ struct {
7f2ca4b1
JR
17577+ umode_t mode;
17578+ bool want_excl;
17579+ bool try_aopen;
17580+ struct vfsub_aopen_args *aopen;
4a4d8108
AM
17581+ } c;
17582+ struct {
17583+ const char *symname;
17584+ } s;
17585+ struct {
7eafdf33 17586+ umode_t mode;
4a4d8108
AM
17587+ dev_t dev;
17588+ } m;
17589+ } u;
17590+};
17591+
17592+static int add_simple(struct inode *dir, struct dentry *dentry,
17593+ struct simple_arg *arg)
17594+{
076b876e 17595+ int err, rerr;
4a4d8108
AM
17596+ aufs_bindex_t bstart;
17597+ unsigned char created;
7f2ca4b1
JR
17598+ const unsigned char try_aopen
17599+ = (arg->type == Creat && arg->u.c.try_aopen);
4a4d8108
AM
17600+ struct dentry *wh_dentry, *parent;
17601+ struct inode *h_dir;
7f2ca4b1
JR
17602+ struct super_block *sb;
17603+ struct au_branch *br;
c2b27bf2
AM
17604+ /* to reuduce stack size */
17605+ struct {
17606+ struct au_dtime dt;
17607+ struct au_pin pin;
17608+ struct path h_path;
17609+ struct au_wr_dir_args wr_dir_args;
17610+ } *a;
4a4d8108 17611+
523b37e3 17612+ AuDbg("%pd\n", dentry);
4a4d8108
AM
17613+ IMustLock(dir);
17614+
c2b27bf2
AM
17615+ err = -ENOMEM;
17616+ a = kmalloc(sizeof(*a), GFP_NOFS);
17617+ if (unlikely(!a))
17618+ goto out;
17619+ a->wr_dir_args.force_btgt = -1;
17620+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
17621+
4a4d8108 17622+ parent = dentry->d_parent; /* dir inode is locked */
7f2ca4b1
JR
17623+ if (!try_aopen) {
17624+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
17625+ if (unlikely(err))
17626+ goto out_free;
17627+ }
027c5e7a
AM
17628+ err = au_d_may_add(dentry);
17629+ if (unlikely(err))
17630+ goto out_unlock;
7f2ca4b1
JR
17631+ if (!try_aopen)
17632+ di_write_lock_parent(parent);
c2b27bf2
AM
17633+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
17634+ &a->pin, &a->wr_dir_args);
4a4d8108
AM
17635+ err = PTR_ERR(wh_dentry);
17636+ if (IS_ERR(wh_dentry))
027c5e7a 17637+ goto out_parent;
4a4d8108
AM
17638+
17639+ bstart = au_dbstart(dentry);
7f2ca4b1
JR
17640+ sb = dentry->d_sb;
17641+ br = au_sbr(sb, bstart);
c2b27bf2 17642+ a->h_path.dentry = au_h_dptr(dentry, bstart);
7f2ca4b1 17643+ a->h_path.mnt = au_br_mnt(br);
c2b27bf2 17644+ h_dir = au_pinned_h_dir(&a->pin);
4a4d8108
AM
17645+ switch (arg->type) {
17646+ case Creat:
7f2ca4b1
JR
17647+ err = 0;
17648+ if (!try_aopen || !h_dir->i_op->atomic_open)
17649+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
17650+ arg->u.c.want_excl);
17651+ else
17652+ err = vfsub_atomic_open(h_dir, a->h_path.dentry,
17653+ arg->u.c.aopen, br);
4a4d8108
AM
17654+ break;
17655+ case Symlink:
c2b27bf2 17656+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
4a4d8108
AM
17657+ break;
17658+ case Mknod:
c2b27bf2
AM
17659+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
17660+ arg->u.m.dev);
4a4d8108
AM
17661+ break;
17662+ default:
17663+ BUG();
17664+ }
17665+ created = !err;
17666+ if (!err)
17667+ err = epilog(dir, bstart, wh_dentry, dentry);
17668+
17669+ /* revert */
c2b27bf2 17670+ if (unlikely(created && err && a->h_path.dentry->d_inode)) {
523b37e3
AM
17671+ /* no delegation since it is just created */
17672+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
17673+ /*force*/0);
4a4d8108 17674+ if (rerr) {
523b37e3
AM
17675+ AuIOErr("%pd revert failure(%d, %d)\n",
17676+ dentry, err, rerr);
4a4d8108
AM
17677+ err = -EIO;
17678+ }
c2b27bf2 17679+ au_dtime_revert(&a->dt);
4a4d8108
AM
17680+ }
17681+
7f2ca4b1
JR
17682+ if (!err && try_aopen && !h_dir->i_op->atomic_open)
17683+ *arg->u.c.aopen->opened |= FILE_CREATED;
17684+
c2b27bf2 17685+ au_unpin(&a->pin);
4a4d8108
AM
17686+ dput(wh_dentry);
17687+
027c5e7a 17688+out_parent:
7f2ca4b1
JR
17689+ if (!try_aopen)
17690+ di_write_unlock(parent);
027c5e7a 17691+out_unlock:
4a4d8108
AM
17692+ if (unlikely(err)) {
17693+ au_update_dbstart(dentry);
17694+ d_drop(dentry);
17695+ }
7f2ca4b1
JR
17696+ if (!try_aopen)
17697+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
17698+out_free:
17699+ kfree(a);
027c5e7a 17700+out:
4a4d8108
AM
17701+ return err;
17702+}
17703+
7eafdf33
AM
17704+int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
17705+ dev_t dev)
4a4d8108
AM
17706+{
17707+ struct simple_arg arg = {
17708+ .type = Mknod,
17709+ .u.m = {
17710+ .mode = mode,
17711+ .dev = dev
17712+ }
17713+ };
17714+ return add_simple(dir, dentry, &arg);
17715+}
17716+
17717+int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
17718+{
17719+ struct simple_arg arg = {
17720+ .type = Symlink,
17721+ .u.s.symname = symname
17722+ };
17723+ return add_simple(dir, dentry, &arg);
17724+}
17725+
7eafdf33 17726+int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
b4510431 17727+ bool want_excl)
4a4d8108
AM
17728+{
17729+ struct simple_arg arg = {
17730+ .type = Creat,
17731+ .u.c = {
b4510431
AM
17732+ .mode = mode,
17733+ .want_excl = want_excl
4a4d8108
AM
17734+ }
17735+ };
17736+ return add_simple(dir, dentry, &arg);
17737+}
17738+
7f2ca4b1
JR
17739+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
17740+ struct vfsub_aopen_args *aopen_args)
17741+{
17742+ struct simple_arg arg = {
17743+ .type = Creat,
17744+ .u.c = {
17745+ .mode = aopen_args->create_mode,
17746+ .want_excl = aopen_args->open_flag & O_EXCL,
17747+ .try_aopen = true,
17748+ .aopen = aopen_args
17749+ }
17750+ };
17751+ return add_simple(dir, dentry, &arg);
17752+}
17753+
38d290e6
JR
17754+int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
17755+{
17756+ int err;
17757+ aufs_bindex_t bindex;
17758+ struct super_block *sb;
17759+ struct dentry *parent, *h_parent, *h_dentry;
17760+ struct inode *h_dir, *inode;
17761+ struct vfsmount *h_mnt;
17762+ struct au_wr_dir_args wr_dir_args = {
17763+ .force_btgt = -1,
17764+ .flags = AuWrDir_TMPFILE
17765+ };
17766+
17767+ /* copy-up may happen */
17768+ mutex_lock(&dir->i_mutex);
17769+
17770+ sb = dir->i_sb;
17771+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
17772+ if (unlikely(err))
17773+ goto out;
17774+
17775+ err = au_di_init(dentry);
17776+ if (unlikely(err))
17777+ goto out_si;
17778+
17779+ err = -EBUSY;
17780+ parent = d_find_any_alias(dir);
17781+ AuDebugOn(!parent);
17782+ di_write_lock_parent(parent);
17783+ if (unlikely(parent->d_inode != dir))
17784+ goto out_parent;
17785+
17786+ err = au_digen_test(parent, au_sigen(sb));
17787+ if (unlikely(err))
17788+ goto out_parent;
17789+
17790+ bindex = au_dbstart(parent);
17791+ au_set_dbstart(dentry, bindex);
17792+ au_set_dbend(dentry, bindex);
17793+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
17794+ bindex = err;
17795+ if (unlikely(err < 0))
17796+ goto out_parent;
17797+
17798+ err = -EOPNOTSUPP;
17799+ h_dir = au_h_iptr(dir, bindex);
17800+ if (unlikely(!h_dir->i_op->tmpfile))
17801+ goto out_parent;
17802+
17803+ h_mnt = au_sbr_mnt(sb, bindex);
17804+ err = vfsub_mnt_want_write(h_mnt);
17805+ if (unlikely(err))
17806+ goto out_parent;
17807+
17808+ h_parent = au_h_dptr(parent, bindex);
17809+ err = inode_permission(h_parent->d_inode, MAY_WRITE | MAY_EXEC);
17810+ if (unlikely(err))
17811+ goto out_mnt;
17812+
17813+ err = -ENOMEM;
17814+ h_dentry = d_alloc(h_parent, &dentry->d_name);
17815+ if (unlikely(!h_dentry))
17816+ goto out_mnt;
17817+
17818+ err = h_dir->i_op->tmpfile(h_dir, h_dentry, mode);
17819+ if (unlikely(err))
17820+ goto out_dentry;
17821+
17822+ au_set_dbstart(dentry, bindex);
17823+ au_set_dbend(dentry, bindex);
17824+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
17825+ inode = au_new_inode(dentry, /*must_new*/1);
17826+ if (IS_ERR(inode)) {
17827+ err = PTR_ERR(inode);
17828+ au_set_h_dptr(dentry, bindex, NULL);
17829+ au_set_dbstart(dentry, -1);
17830+ au_set_dbend(dentry, -1);
17831+ } else {
17832+ if (!inode->i_nlink)
17833+ set_nlink(inode, 1);
17834+ d_tmpfile(dentry, inode);
17835+ au_di(dentry)->di_tmpfile = 1;
17836+
17837+ /* update without i_mutex */
17838+ if (au_ibstart(dir) == au_dbstart(dentry))
17839+ au_cpup_attr_timesizes(dir);
17840+ }
17841+
17842+out_dentry:
17843+ dput(h_dentry);
17844+out_mnt:
17845+ vfsub_mnt_drop_write(h_mnt);
17846+out_parent:
17847+ di_write_unlock(parent);
17848+ dput(parent);
17849+ di_write_unlock(dentry);
17850+ if (!err)
17851+#if 0
17852+ /* verbose coding for lock class name */
17853+ au_rw_class(&au_di(dentry)->di_rwsem,
17854+ au_lc_key + AuLcNonDir_DIINFO);
17855+#else
17856+ ;
17857+#endif
17858+ else {
17859+ au_di_fin(dentry);
17860+ dentry->d_fsdata = NULL;
17861+ }
17862+out_si:
17863+ si_read_unlock(sb);
17864+out:
17865+ mutex_unlock(&dir->i_mutex);
17866+ return err;
17867+}
17868+
4a4d8108
AM
17869+/* ---------------------------------------------------------------------- */
17870+
17871+struct au_link_args {
17872+ aufs_bindex_t bdst, bsrc;
17873+ struct au_pin pin;
17874+ struct path h_path;
17875+ struct dentry *src_parent, *parent;
17876+};
17877+
17878+static int au_cpup_before_link(struct dentry *src_dentry,
17879+ struct au_link_args *a)
17880+{
17881+ int err;
17882+ struct dentry *h_src_dentry;
c2b27bf2
AM
17883+ struct au_cp_generic cpg = {
17884+ .dentry = src_dentry,
17885+ .bdst = a->bdst,
17886+ .bsrc = a->bsrc,
17887+ .len = -1,
17888+ .pin = &a->pin,
17889+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
17890+ };
4a4d8108
AM
17891+
17892+ di_read_lock_parent(a->src_parent, AuLock_IR);
17893+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
17894+ if (unlikely(err))
17895+ goto out;
17896+
17897+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
4a4d8108
AM
17898+ err = au_pin(&a->pin, src_dentry, a->bdst,
17899+ au_opt_udba(src_dentry->d_sb),
17900+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
17901+ if (unlikely(err))
17902+ goto out;
367653fa 17903+
c2b27bf2 17904+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
17905+ au_unpin(&a->pin);
17906+
4f0767ce 17907+out:
4a4d8108
AM
17908+ di_read_unlock(a->src_parent, AuLock_IR);
17909+ return err;
17910+}
17911+
86dc4139
AM
17912+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
17913+ struct au_link_args *a)
4a4d8108
AM
17914+{
17915+ int err;
17916+ unsigned char plink;
86dc4139 17917+ aufs_bindex_t bend;
4a4d8108 17918+ struct dentry *h_src_dentry;
523b37e3 17919+ struct inode *h_inode, *inode, *delegated;
4a4d8108
AM
17920+ struct super_block *sb;
17921+ struct file *h_file;
17922+
17923+ plink = 0;
17924+ h_inode = NULL;
17925+ sb = src_dentry->d_sb;
17926+ inode = src_dentry->d_inode;
17927+ if (au_ibstart(inode) <= a->bdst)
17928+ h_inode = au_h_iptr(inode, a->bdst);
17929+ if (!h_inode || !h_inode->i_nlink) {
17930+ /* copyup src_dentry as the name of dentry. */
86dc4139
AM
17931+ bend = au_dbend(dentry);
17932+ if (bend < a->bsrc)
17933+ au_set_dbend(dentry, a->bsrc);
17934+ au_set_h_dptr(dentry, a->bsrc,
17935+ dget(au_h_dptr(src_dentry, a->bsrc)));
17936+ dget(a->h_path.dentry);
17937+ au_set_h_dptr(dentry, a->bdst, NULL);
c1595e42
JR
17938+ AuDbg("temporary d_inode...\n");
17939+ spin_lock(&dentry->d_lock);
86dc4139 17940+ dentry->d_inode = src_dentry->d_inode; /* tmp */
c1595e42 17941+ spin_unlock(&dentry->d_lock);
392086de 17942+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
86dc4139 17943+ if (IS_ERR(h_file))
4a4d8108 17944+ err = PTR_ERR(h_file);
86dc4139 17945+ else {
c2b27bf2
AM
17946+ struct au_cp_generic cpg = {
17947+ .dentry = dentry,
17948+ .bdst = a->bdst,
17949+ .bsrc = -1,
17950+ .len = -1,
17951+ .pin = &a->pin,
17952+ .flags = AuCpup_KEEPLINO
17953+ };
17954+ err = au_sio_cpup_simple(&cpg);
86dc4139
AM
17955+ au_h_open_post(dentry, a->bsrc, h_file);
17956+ if (!err) {
17957+ dput(a->h_path.dentry);
17958+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
17959+ } else
17960+ au_set_h_dptr(dentry, a->bdst,
17961+ a->h_path.dentry);
17962+ }
c1595e42 17963+ spin_lock(&dentry->d_lock);
86dc4139 17964+ dentry->d_inode = NULL; /* restore */
c1595e42
JR
17965+ spin_unlock(&dentry->d_lock);
17966+ AuDbg("temporary d_inode...done\n");
86dc4139
AM
17967+ au_set_h_dptr(dentry, a->bsrc, NULL);
17968+ au_set_dbend(dentry, bend);
4a4d8108
AM
17969+ } else {
17970+ /* the inode of src_dentry already exists on a.bdst branch */
17971+ h_src_dentry = d_find_alias(h_inode);
17972+ if (!h_src_dentry && au_plink_test(inode)) {
17973+ plink = 1;
17974+ h_src_dentry = au_plink_lkup(inode, a->bdst);
17975+ err = PTR_ERR(h_src_dentry);
17976+ if (IS_ERR(h_src_dentry))
17977+ goto out;
17978+
17979+ if (unlikely(!h_src_dentry->d_inode)) {
17980+ dput(h_src_dentry);
17981+ h_src_dentry = NULL;
17982+ }
17983+
17984+ }
17985+ if (h_src_dentry) {
523b37e3 17986+ delegated = NULL;
4a4d8108 17987+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
17988+ &a->h_path, &delegated);
17989+ if (unlikely(err == -EWOULDBLOCK)) {
17990+ pr_warn("cannot retry for NFSv4 delegation"
17991+ " for an internal link\n");
17992+ iput(delegated);
17993+ }
4a4d8108
AM
17994+ dput(h_src_dentry);
17995+ } else {
17996+ AuIOErr("no dentry found for hi%lu on b%d\n",
17997+ h_inode->i_ino, a->bdst);
17998+ err = -EIO;
17999+ }
18000+ }
18001+
18002+ if (!err && !plink)
18003+ au_plink_append(inode, a->bdst, a->h_path.dentry);
18004+
18005+out:
2cbb1c4b 18006+ AuTraceErr(err);
4a4d8108
AM
18007+ return err;
18008+}
18009+
18010+int aufs_link(struct dentry *src_dentry, struct inode *dir,
18011+ struct dentry *dentry)
18012+{
18013+ int err, rerr;
18014+ struct au_dtime dt;
18015+ struct au_link_args *a;
18016+ struct dentry *wh_dentry, *h_src_dentry;
523b37e3 18017+ struct inode *inode, *delegated;
4a4d8108
AM
18018+ struct super_block *sb;
18019+ struct au_wr_dir_args wr_dir_args = {
18020+ /* .force_btgt = -1, */
18021+ .flags = AuWrDir_ADD_ENTRY
18022+ };
18023+
18024+ IMustLock(dir);
18025+ inode = src_dentry->d_inode;
18026+ IMustLock(inode);
18027+
4a4d8108
AM
18028+ err = -ENOMEM;
18029+ a = kzalloc(sizeof(*a), GFP_NOFS);
18030+ if (unlikely(!a))
18031+ goto out;
18032+
18033+ a->parent = dentry->d_parent; /* dir inode is locked */
027c5e7a
AM
18034+ err = aufs_read_and_write_lock2(dentry, src_dentry,
18035+ AuLock_NOPLM | AuLock_GEN);
e49829fe
JR
18036+ if (unlikely(err))
18037+ goto out_kfree;
38d290e6 18038+ err = au_d_linkable(src_dentry);
027c5e7a
AM
18039+ if (unlikely(err))
18040+ goto out_unlock;
18041+ err = au_d_may_add(dentry);
18042+ if (unlikely(err))
18043+ goto out_unlock;
e49829fe 18044+
4a4d8108 18045+ a->src_parent = dget_parent(src_dentry);
2cbb1c4b 18046+ wr_dir_args.force_btgt = au_ibstart(inode);
4a4d8108
AM
18047+
18048+ di_write_lock_parent(a->parent);
18049+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
18050+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
18051+ &wr_dir_args);
18052+ err = PTR_ERR(wh_dentry);
18053+ if (IS_ERR(wh_dentry))
027c5e7a 18054+ goto out_parent;
4a4d8108
AM
18055+
18056+ err = 0;
18057+ sb = dentry->d_sb;
18058+ a->bdst = au_dbstart(dentry);
18059+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
18060+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
2cbb1c4b
JR
18061+ a->bsrc = au_ibstart(inode);
18062+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
38d290e6
JR
18063+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
18064+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
2cbb1c4b
JR
18065+ if (!h_src_dentry) {
18066+ a->bsrc = au_dbstart(src_dentry);
18067+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
18068+ AuDebugOn(!h_src_dentry);
38d290e6
JR
18069+ } else if (IS_ERR(h_src_dentry)) {
18070+ err = PTR_ERR(h_src_dentry);
2cbb1c4b 18071+ goto out_parent;
38d290e6 18072+ }
2cbb1c4b 18073+
4a4d8108
AM
18074+ if (au_opt_test(au_mntflags(sb), PLINK)) {
18075+ if (a->bdst < a->bsrc
18076+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
86dc4139 18077+ err = au_cpup_or_link(src_dentry, dentry, a);
523b37e3
AM
18078+ else {
18079+ delegated = NULL;
4a4d8108 18080+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
523b37e3
AM
18081+ &a->h_path, &delegated);
18082+ if (unlikely(err == -EWOULDBLOCK)) {
18083+ pr_warn("cannot retry for NFSv4 delegation"
18084+ " for an internal link\n");
18085+ iput(delegated);
18086+ }
18087+ }
2cbb1c4b 18088+ dput(h_src_dentry);
4a4d8108
AM
18089+ } else {
18090+ /*
18091+ * copyup src_dentry to the branch we process,
18092+ * and then link(2) to it.
18093+ */
2cbb1c4b 18094+ dput(h_src_dentry);
4a4d8108
AM
18095+ if (a->bdst < a->bsrc
18096+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
18097+ au_unpin(&a->pin);
18098+ di_write_unlock(a->parent);
18099+ err = au_cpup_before_link(src_dentry, a);
18100+ di_write_lock_parent(a->parent);
18101+ if (!err)
18102+ err = au_pin(&a->pin, dentry, a->bdst,
18103+ au_opt_udba(sb),
18104+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
18105+ if (unlikely(err))
18106+ goto out_wh;
18107+ }
18108+ if (!err) {
18109+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
18110+ err = -ENOENT;
523b37e3
AM
18111+ if (h_src_dentry && h_src_dentry->d_inode) {
18112+ delegated = NULL;
4a4d8108
AM
18113+ err = vfsub_link(h_src_dentry,
18114+ au_pinned_h_dir(&a->pin),
523b37e3
AM
18115+ &a->h_path, &delegated);
18116+ if (unlikely(err == -EWOULDBLOCK)) {
18117+ pr_warn("cannot retry"
18118+ " for NFSv4 delegation"
18119+ " for an internal link\n");
18120+ iput(delegated);
18121+ }
18122+ }
4a4d8108
AM
18123+ }
18124+ }
18125+ if (unlikely(err))
18126+ goto out_unpin;
18127+
18128+ if (wh_dentry) {
18129+ a->h_path.dentry = wh_dentry;
18130+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
18131+ dentry);
18132+ if (unlikely(err))
18133+ goto out_revert;
18134+ }
18135+
7f2ca4b1 18136+ au_dir_ts(dir, a->bdst);
4a4d8108 18137+ dir->i_version++;
4a4d8108
AM
18138+ inc_nlink(inode);
18139+ inode->i_ctime = dir->i_ctime;
027c5e7a
AM
18140+ d_instantiate(dentry, au_igrab(inode));
18141+ if (d_unhashed(a->h_path.dentry))
4a4d8108
AM
18142+ /* some filesystem calls d_drop() */
18143+ d_drop(dentry);
076b876e
AM
18144+ /* some filesystems consume an inode even hardlink */
18145+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
4a4d8108
AM
18146+ goto out_unpin; /* success */
18147+
4f0767ce 18148+out_revert:
523b37e3
AM
18149+ /* no delegation since it is just created */
18150+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
18151+ /*delegated*/NULL, /*force*/0);
027c5e7a 18152+ if (unlikely(rerr)) {
523b37e3 18153+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
027c5e7a
AM
18154+ err = -EIO;
18155+ }
4a4d8108 18156+ au_dtime_revert(&dt);
4f0767ce 18157+out_unpin:
4a4d8108 18158+ au_unpin(&a->pin);
4f0767ce 18159+out_wh:
4a4d8108 18160+ dput(wh_dentry);
027c5e7a
AM
18161+out_parent:
18162+ di_write_unlock(a->parent);
18163+ dput(a->src_parent);
4f0767ce 18164+out_unlock:
4a4d8108
AM
18165+ if (unlikely(err)) {
18166+ au_update_dbstart(dentry);
18167+ d_drop(dentry);
18168+ }
4a4d8108 18169+ aufs_read_and_write_unlock2(dentry, src_dentry);
e49829fe 18170+out_kfree:
4a4d8108 18171+ kfree(a);
4f0767ce 18172+out:
86dc4139 18173+ AuTraceErr(err);
4a4d8108
AM
18174+ return err;
18175+}
18176+
7eafdf33 18177+int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
4a4d8108
AM
18178+{
18179+ int err, rerr;
18180+ aufs_bindex_t bindex;
18181+ unsigned char diropq;
18182+ struct path h_path;
18183+ struct dentry *wh_dentry, *parent, *opq_dentry;
18184+ struct mutex *h_mtx;
18185+ struct super_block *sb;
18186+ struct {
18187+ struct au_pin pin;
18188+ struct au_dtime dt;
18189+ } *a; /* reduce the stack usage */
18190+ struct au_wr_dir_args wr_dir_args = {
18191+ .force_btgt = -1,
18192+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
18193+ };
18194+
18195+ IMustLock(dir);
18196+
18197+ err = -ENOMEM;
18198+ a = kmalloc(sizeof(*a), GFP_NOFS);
18199+ if (unlikely(!a))
18200+ goto out;
18201+
027c5e7a
AM
18202+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
18203+ if (unlikely(err))
18204+ goto out_free;
18205+ err = au_d_may_add(dentry);
18206+ if (unlikely(err))
18207+ goto out_unlock;
18208+
4a4d8108
AM
18209+ parent = dentry->d_parent; /* dir inode is locked */
18210+ di_write_lock_parent(parent);
18211+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
18212+ &a->pin, &wr_dir_args);
18213+ err = PTR_ERR(wh_dentry);
18214+ if (IS_ERR(wh_dentry))
027c5e7a 18215+ goto out_parent;
4a4d8108
AM
18216+
18217+ sb = dentry->d_sb;
18218+ bindex = au_dbstart(dentry);
18219+ h_path.dentry = au_h_dptr(dentry, bindex);
18220+ h_path.mnt = au_sbr_mnt(sb, bindex);
18221+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
18222+ if (unlikely(err))
027c5e7a 18223+ goto out_unpin;
4a4d8108
AM
18224+
18225+ /* make the dir opaque */
18226+ diropq = 0;
18227+ h_mtx = &h_path.dentry->d_inode->i_mutex;
18228+ if (wh_dentry
18229+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
18230+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18231+ opq_dentry = au_diropq_create(dentry, bindex);
18232+ mutex_unlock(h_mtx);
18233+ err = PTR_ERR(opq_dentry);
18234+ if (IS_ERR(opq_dentry))
18235+ goto out_dir;
18236+ dput(opq_dentry);
18237+ diropq = 1;
18238+ }
18239+
18240+ err = epilog(dir, bindex, wh_dentry, dentry);
18241+ if (!err) {
18242+ inc_nlink(dir);
027c5e7a 18243+ goto out_unpin; /* success */
4a4d8108
AM
18244+ }
18245+
18246+ /* revert */
18247+ if (diropq) {
18248+ AuLabel(revert opq);
18249+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
18250+ rerr = au_diropq_remove(dentry, bindex);
18251+ mutex_unlock(h_mtx);
18252+ if (rerr) {
523b37e3
AM
18253+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
18254+ dentry, err, rerr);
4a4d8108
AM
18255+ err = -EIO;
18256+ }
18257+ }
18258+
4f0767ce 18259+out_dir:
4a4d8108
AM
18260+ AuLabel(revert dir);
18261+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
18262+ if (rerr) {
523b37e3
AM
18263+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
18264+ dentry, err, rerr);
4a4d8108
AM
18265+ err = -EIO;
18266+ }
4a4d8108 18267+ au_dtime_revert(&a->dt);
027c5e7a 18268+out_unpin:
4a4d8108
AM
18269+ au_unpin(&a->pin);
18270+ dput(wh_dentry);
027c5e7a
AM
18271+out_parent:
18272+ di_write_unlock(parent);
18273+out_unlock:
4a4d8108
AM
18274+ if (unlikely(err)) {
18275+ au_update_dbstart(dentry);
18276+ d_drop(dentry);
18277+ }
4a4d8108 18278+ aufs_read_unlock(dentry, AuLock_DW);
027c5e7a 18279+out_free:
4a4d8108 18280+ kfree(a);
4f0767ce 18281+out:
4a4d8108
AM
18282+ return err;
18283+}
7f207e10
AM
18284diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
18285--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
18286+++ linux/fs/aufs/i_op.c 2016-02-28 11:27:01.280579017 +0100
18287@@ -0,0 +1,1460 @@
4a4d8108 18288+/*
7f2ca4b1 18289+ * Copyright (C) 2005-2016 Junjiro R. Okajima
4a4d8108
AM
18290+ *
18291+ * This program, aufs is free software; you can redistribute it and/or modify
18292+ * it under the terms of the GNU General Public License as published by
18293+ * the Free Software Foundation; either version 2 of the License, or
18294+ * (at your option) any later version.
18295+ *
18296+ * This program is distributed in the hope that it will be useful,
18297+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18298+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18299+ * GNU General Public License for more details.
18300+ *
18301+ * You should have received a copy of the GNU General Public License
523b37e3 18302+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4a4d8108 18303+ */
1facf9fc 18304+
1308ab2a 18305+/*
4a4d8108 18306+ * inode operations (except add/del/rename)
1308ab2a 18307+ */
4a4d8108
AM
18308+
18309+#include <linux/device_cgroup.h>
18310+#include <linux/fs_stack.h>
92d182d2 18311+#include <linux/mm.h>
4a4d8108
AM
18312+#include <linux/namei.h>
18313+#include <linux/security.h>
4a4d8108
AM
18314+#include "aufs.h"
18315+
1e00d052 18316+static int h_permission(struct inode *h_inode, int mask,
4a4d8108 18317+ struct vfsmount *h_mnt, int brperm)
1facf9fc 18318+{
1308ab2a 18319+ int err;
4a4d8108 18320+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
1facf9fc 18321+
4a4d8108
AM
18322+ err = -EACCES;
18323+ if ((write_mask && IS_IMMUTABLE(h_inode))
18324+ || ((mask & MAY_EXEC)
18325+ && S_ISREG(h_inode->i_mode)
18326+ && ((h_mnt->mnt_flags & MNT_NOEXEC)
18327+ || !(h_inode->i_mode & S_IXUGO))))
18328+ goto out;
18329+
18330+ /*
18331+ * - skip the lower fs test in the case of write to ro branch.
18332+ * - nfs dir permission write check is optimized, but a policy for
18333+ * link/rename requires a real check.
7f2ca4b1
JR
18334+ * - nfs always sets MS_POSIXACL regardless its mount option 'noacl.'
18335+ * in this case, generic_permission() returns -EOPNOTSUPP.
4a4d8108
AM
18336+ */
18337+ if ((write_mask && !au_br_writable(brperm))
18338+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
18339+ && write_mask && !(mask & MAY_READ))
18340+ || !h_inode->i_op->permission) {
18341+ /* AuLabel(generic_permission); */
7f2ca4b1 18342+ /* AuDbg("get_acl %pf\n", h_inode->i_op->get_acl); */
1e00d052 18343+ err = generic_permission(h_inode, mask);
7f2ca4b1
JR
18344+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
18345+ err = h_inode->i_op->permission(h_inode, mask);
18346+ AuTraceErr(err);
1308ab2a 18347+ } else {
4a4d8108 18348+ /* AuLabel(h_inode->permission); */
1e00d052 18349+ err = h_inode->i_op->permission(h_inode, mask);
4a4d8108
AM
18350+ AuTraceErr(err);
18351+ }
1facf9fc 18352+
4a4d8108
AM
18353+ if (!err)
18354+ err = devcgroup_inode_permission(h_inode, mask);
7f207e10 18355+ if (!err)
4a4d8108 18356+ err = security_inode_permission(h_inode, mask);
4a4d8108
AM
18357+
18358+#if 0
18359+ if (!err) {
18360+ /* todo: do we need to call ima_path_check()? */
18361+ struct path h_path = {
18362+ .dentry =
18363+ .mnt = h_mnt
18364+ };
18365+ err = ima_path_check(&h_path,
18366+ mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
18367+ IMA_COUNT_LEAVE);
1308ab2a 18368+ }
4a4d8108 18369+#endif
dece6358 18370+
4f0767ce 18371+out:
1308ab2a 18372+ return err;
18373+}
dece6358 18374+
1e00d052 18375+static int aufs_permission(struct inode *inode, int mask)
1308ab2a 18376+{
18377+ int err;
4a4d8108
AM
18378+ aufs_bindex_t bindex, bend;
18379+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
18380+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
18381+ struct inode *h_inode;
18382+ struct super_block *sb;
18383+ struct au_branch *br;
1facf9fc 18384+
027c5e7a 18385+ /* todo: support rcu-walk? */
1e00d052 18386+ if (mask & MAY_NOT_BLOCK)
027c5e7a
AM
18387+ return -ECHILD;
18388+
4a4d8108
AM
18389+ sb = inode->i_sb;
18390+ si_read_lock(sb, AuLock_FLUSH);
18391+ ii_read_lock_child(inode);
027c5e7a
AM
18392+#if 0
18393+ err = au_iigen_test(inode, au_sigen(sb));
18394+ if (unlikely(err))
18395+ goto out;
18396+#endif
dece6358 18397+
076b876e
AM
18398+ if (!isdir
18399+ || write_mask
18400+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
4a4d8108
AM
18401+ err = au_busy_or_stale();
18402+ h_inode = au_h_iptr(inode, au_ibstart(inode));
18403+ if (unlikely(!h_inode
18404+ || (h_inode->i_mode & S_IFMT)
18405+ != (inode->i_mode & S_IFMT)))
18406+ goto out;
1facf9fc 18407+
4a4d8108
AM
18408+ err = 0;
18409+ bindex = au_ibstart(inode);
18410+ br = au_sbr(sb, bindex);
86dc4139 18411+ err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm);
4a4d8108
AM
18412+ if (write_mask
18413+ && !err
18414+ && !special_file(h_inode->i_mode)) {
18415+ /* test whether the upper writable branch exists */
18416+ err = -EROFS;
18417+ for (; bindex >= 0; bindex--)
18418+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
18419+ err = 0;
18420+ break;
18421+ }
18422+ }
18423+ goto out;
18424+ }
dece6358 18425+
4a4d8108 18426+ /* non-write to dir */
1308ab2a 18427+ err = 0;
4a4d8108
AM
18428+ bend = au_ibend(inode);
18429+ for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
18430+ h_inode = au_h_iptr(inode, bindex);
18431+ if (h_inode) {
18432+ err = au_busy_or_stale();
18433+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
18434+ break;
18435+
18436+ br = au_sbr(sb, bindex);
86dc4139 18437+ err = h_permission(h_inode, mask, au_br_mnt(br),
4a4d8108
AM
18438+ br->br_perm);
18439+ }
18440+ }
1308ab2a 18441+
4f0767ce 18442+out:
4a4d8108
AM
18443+ ii_read_unlock(inode);
18444+ si_read_unlock(sb);
1308ab2a 18445+ return err;
18446+}
18447+
4a4d8108 18448+/* ---------------------------------------------------------------------- */
1facf9fc 18449+
4a4d8108 18450+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
b4510431 18451+ unsigned int flags)
4a4d8108
AM
18452+{
18453+ struct dentry *ret, *parent;
b752ccd1 18454+ struct inode *inode;
4a4d8108 18455+ struct super_block *sb;
1716fcea 18456+ int err, npositive;
dece6358 18457+
4a4d8108 18458+ IMustLock(dir);
1308ab2a 18459+
537831f9
AM
18460+ /* todo: support rcu-walk? */
18461+ ret = ERR_PTR(-ECHILD);
18462+ if (flags & LOOKUP_RCU)
18463+ goto out;
18464+
18465+ ret = ERR_PTR(-ENAMETOOLONG);
18466+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
18467+ goto out;
18468+
4a4d8108 18469+ sb = dir->i_sb;
7f207e10
AM
18470+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
18471+ ret = ERR_PTR(err);
18472+ if (unlikely(err))
18473+ goto out;
18474+
4a4d8108
AM
18475+ err = au_di_init(dentry);
18476+ ret = ERR_PTR(err);
18477+ if (unlikely(err))
7f207e10 18478+ goto out_si;
1308ab2a 18479+
9dbd164d 18480+ inode = NULL;
027c5e7a 18481+ npositive = 0; /* suppress a warning */
4a4d8108
AM
18482+ parent = dentry->d_parent; /* dir inode is locked */
18483+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
18484+ err = au_alive_dir(parent);
18485+ if (!err)
18486+ err = au_digen_test(parent, au_sigen(sb));
18487+ if (!err) {
18488+ npositive = au_lkup_dentry(dentry, au_dbstart(parent),
537831f9 18489+ /*type*/0);
027c5e7a
AM
18490+ err = npositive;
18491+ }
4a4d8108 18492+ di_read_unlock(parent, AuLock_IR);
4a4d8108
AM
18493+ ret = ERR_PTR(err);
18494+ if (unlikely(err < 0))
18495+ goto out_unlock;
1308ab2a 18496+
4a4d8108 18497+ if (npositive) {
b752ccd1 18498+ inode = au_new_inode(dentry, /*must_new*/0);
c1595e42
JR
18499+ if (IS_ERR(inode)) {
18500+ ret = (void *)inode;
18501+ inode = NULL;
18502+ goto out_unlock;
18503+ }
9dbd164d 18504+ }
4a4d8108 18505+
c1595e42
JR
18506+ if (inode)
18507+ atomic_inc(&inode->i_count);
4a4d8108 18508+ ret = d_splice_alias(inode, dentry);
c1595e42
JR
18509+ if (IS_ERR(ret)
18510+ && PTR_ERR(ret) == -EIO
18511+ && inode
18512+ && S_ISDIR(inode->i_mode)) {
18513+ atomic_inc(&inode->i_count);
18514+ ret = d_materialise_unique(dentry, inode);
18515+ if (!IS_ERR(ret))
18516+ ii_write_unlock(inode);
18517+ }
537831f9
AM
18518+#if 0
18519+ if (unlikely(d_need_lookup(dentry))) {
18520+ spin_lock(&dentry->d_lock);
18521+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
18522+ spin_unlock(&dentry->d_lock);
18523+ } else
18524+#endif
c1595e42
JR
18525+ if (inode) {
18526+ if (!IS_ERR(ret))
18527+ iput(inode);
18528+ else {
18529+ ii_write_unlock(inode);
18530+ iput(inode);
18531+ inode = NULL;
18532+ }
7f207e10 18533+ }
1facf9fc 18534+
4f0767ce 18535+out_unlock:
4a4d8108 18536+ di_write_unlock(dentry);
2dfbb274 18537+ if (inode) {
1716fcea
AM
18538+ /* verbose coding for lock class name */
18539+ if (unlikely(S_ISLNK(inode->i_mode)))
18540+ au_rw_class(&au_di(dentry)->di_rwsem,
18541+ au_lc_key + AuLcSymlink_DIINFO);
18542+ else if (unlikely(S_ISDIR(inode->i_mode)))
18543+ au_rw_class(&au_di(dentry)->di_rwsem,
18544+ au_lc_key + AuLcDir_DIINFO);
18545+ else /* likely */
18546+ au_rw_class(&au_di(dentry)->di_rwsem,
18547+ au_lc_key + AuLcNonDir_DIINFO);
9dbd164d 18548+ }
7f207e10 18549+out_si:
4a4d8108 18550+ si_read_unlock(sb);
7f207e10 18551+out:
4a4d8108
AM
18552+ return ret;
18553+}
1facf9fc 18554+
4a4d8108 18555+/* ---------------------------------------------------------------------- */
1facf9fc 18556+
7f2ca4b1
JR
18557+struct aopen_node {
18558+ struct hlist_node hlist;
18559+ struct file *file, *h_file;
18560+};
18561+
18562+static int au_do_aopen(struct inode *inode, struct file *file)
18563+{
18564+ struct au_sphlhead *aopen;
18565+ struct aopen_node *node;
18566+ struct au_do_open_args args = {
18567+ .no_lock = 1,
18568+ .open = au_do_open_nondir
18569+ };
18570+
18571+ aopen = &au_sbi(inode->i_sb)->si_aopen;
18572+ spin_lock(&aopen->spin);
18573+ hlist_for_each_entry(node, &aopen->head, hlist)
18574+ if (node->file == file) {
18575+ args.h_file = node->h_file;
18576+ break;
18577+ }
18578+ spin_unlock(&aopen->spin);
18579+ /* AuDebugOn(!args.h_file); */
18580+
18581+ return au_do_open(file, &args);
18582+}
18583+
18584+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
18585+ struct file *file, unsigned int open_flag,
18586+ umode_t create_mode, int *opened)
18587+{
18588+ int err, h_opened = *opened;
18589+ struct dentry *parent;
18590+ struct dentry *d;
18591+ struct au_sphlhead *aopen;
18592+ struct vfsub_aopen_args args = {
18593+ .open_flag = open_flag,
18594+ .create_mode = create_mode,
18595+ .opened = &h_opened
18596+ };
18597+ struct aopen_node aopen_node = {
18598+ .file = file
18599+ };
18600+
18601+ IMustLock(dir);
18602+ AuDbg("open_flag 0x%x\n", open_flag);
18603+ AuDbgDentry(dentry);
18604+
18605+ err = 0;
18606+ if (!au_di(dentry)) {
18607+ d = aufs_lookup(dir, dentry, /*flags*/0);
18608+ if (IS_ERR(d)) {
18609+ err = PTR_ERR(d);
18610+ goto out;
18611+ } else if (d) {
18612+ /*
18613+ * obsoleted dentry found.
18614+ * another error will be returned later.
18615+ */
18616+ d_drop(d);
18617+ dput(d);
18618+ AuDbgDentry(d);
18619+ }
18620+ AuDbgDentry(dentry);
18621+ }
18622+
18623+ if (d_is_positive(dentry)
18624+ || d_unhashed(dentry)
18625+ || d_unlinked(dentry)
18626+ || !(open_flag & O_CREAT))
18627+ goto out_no_open;
18628+
18629+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
18630+ if (unlikely(err))
18631+ goto out;
18632+
18633+ parent = dentry->d_parent; /* dir is locked */
18634+ di_write_lock_parent(parent);
18635+ err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
18636+ if (unlikely(err))
18637+ goto out_unlock;
18638+
18639+ AuDbgDentry(dentry);
18640+ if (d_is_positive(dentry))
18641+ goto out_unlock;
18642+
18643+ args.file = get_empty_filp();
18644+ err = PTR_ERR(args.file);
18645+ if (IS_ERR(args.file))
18646+ goto out_unlock;
18647+
18648+ args.file->f_flags = file->f_flags;
18649+ err = au_aopen_or_create(dir, dentry, &args);
18650+ AuTraceErr(err);
18651+ AuDbgFile(args.file);
18652+ if (unlikely(err < 0)) {
18653+ if (h_opened & FILE_OPENED)
18654+ fput(args.file);
18655+ else
18656+ put_filp(args.file);
18657+ goto out_unlock;
18658+ }
18659+
18660+ /* some filesystems don't set FILE_CREATED while succeeded? */
18661+ *opened |= FILE_CREATED;
18662+ if (h_opened & FILE_OPENED)
18663+ aopen_node.h_file = args.file;
18664+ else {
18665+ put_filp(args.file);
18666+ args.file = NULL;
18667+ }
18668+ aopen = &au_sbi(dir->i_sb)->si_aopen;
18669+ au_sphl_add(&aopen_node.hlist, aopen);
18670+ err = finish_open(file, dentry, au_do_aopen, opened);
18671+ au_sphl_del(&aopen_node.hlist, aopen);
18672+ AuTraceErr(err);
18673+ AuDbgFile(file);
18674+ if (aopen_node.h_file)
18675+ fput(aopen_node.h_file);
18676+
18677+out_unlock:
18678+ di_write_unlock(parent);
18679+ aufs_read_unlock(dentry, AuLock_DW);
18680+ AuDbgDentry(dentry);
18681+ if (unlikely(err))
18682+ goto out;
18683+out_no_open:
18684+ if (!err && !(*opened & FILE_CREATED)) {
18685+ AuLabel(out_no_open);
18686+ dget(dentry);
18687+ err = finish_no_open(file, dentry);
18688+ }
18689+out:
18690+ AuDbg("%pd%s%s\n", dentry,
18691+ (*opened & FILE_CREATED) ? " created" : "",
18692+ (*opened & FILE_OPENED) ? " opened" : "");
18693+ AuTraceErr(err);
18694+ return err;
18695+}
18696+
18697+
18698+/* ---------------------------------------------------------------------- */
18699+
4a4d8108
AM
18700+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
18701+ const unsigned char add_entry, aufs_bindex_t bcpup,
18702+ aufs_bindex_t bstart)
18703+{
18704+ int err;
18705+ struct dentry *h_parent;
18706+ struct inode *h_dir;
1facf9fc 18707+
027c5e7a 18708+ if (add_entry)
4a4d8108 18709+ IMustLock(parent->d_inode);
027c5e7a 18710+ else
4a4d8108
AM
18711+ di_write_lock_parent(parent);
18712+
18713+ err = 0;
18714+ if (!au_h_dptr(parent, bcpup)) {
c2b27bf2
AM
18715+ if (bstart > bcpup)
18716+ err = au_cpup_dirs(dentry, bcpup);
18717+ else if (bstart < bcpup)
4a4d8108
AM
18718+ err = au_cpdown_dirs(dentry, bcpup);
18719+ else
c2b27bf2 18720+ BUG();
4a4d8108 18721+ }
38d290e6 18722+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
4a4d8108
AM
18723+ h_parent = au_h_dptr(parent, bcpup);
18724+ h_dir = h_parent->d_inode;
18725+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
7f2ca4b1 18726+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
4a4d8108
AM
18727+ /* todo: no unlock here */
18728+ mutex_unlock(&h_dir->i_mutex);
027c5e7a
AM
18729+
18730+ AuDbg("bcpup %d\n", bcpup);
18731+ if (!err) {
18732+ if (!dentry->d_inode)
18733+ au_set_h_dptr(dentry, bstart, NULL);
4a4d8108
AM
18734+ au_update_dbrange(dentry, /*do_put_zero*/0);
18735+ }
1308ab2a 18736+ }
1facf9fc 18737+
4a4d8108
AM
18738+ if (!add_entry)
18739+ di_write_unlock(parent);
18740+ if (!err)
18741+ err = bcpup; /* success */
1308ab2a 18742+
027c5e7a 18743+ AuTraceErr(err);
4a4d8108
AM
18744+ return err;
18745+}
1facf9fc 18746+
4a4d8108
AM
18747+/*
18748+ * decide the branch and the parent dir where we will create a new entry.
18749+ * returns new bindex or an error.
18750+ * copyup the parent dir if needed.
18751+ */
18752+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
18753+ struct au_wr_dir_args *args)
18754+{
18755+ int err;
392086de 18756+ unsigned int flags;
4a4d8108 18757+ aufs_bindex_t bcpup, bstart, src_bstart;
86dc4139
AM
18758+ const unsigned char add_entry
18759+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
38d290e6 18760+ | au_ftest_wrdir(args->flags, TMPFILE);
4a4d8108
AM
18761+ struct super_block *sb;
18762+ struct dentry *parent;
18763+ struct au_sbinfo *sbinfo;
1facf9fc 18764+
4a4d8108
AM
18765+ sb = dentry->d_sb;
18766+ sbinfo = au_sbi(sb);
18767+ parent = dget_parent(dentry);
18768+ bstart = au_dbstart(dentry);
18769+ bcpup = bstart;
18770+ if (args->force_btgt < 0) {
18771+ if (src_dentry) {
18772+ src_bstart = au_dbstart(src_dentry);
18773+ if (src_bstart < bstart)
18774+ bcpup = src_bstart;
18775+ } else if (add_entry) {
392086de
AM
18776+ flags = 0;
18777+ if (au_ftest_wrdir(args->flags, ISDIR))
18778+ au_fset_wbr(flags, DIR);
18779+ err = AuWbrCreate(sbinfo, dentry, flags);
4a4d8108
AM
18780+ bcpup = err;
18781+ }
1facf9fc 18782+
4a4d8108
AM
18783+ if (bcpup < 0 || au_test_ro(sb, bcpup, dentry->d_inode)) {
18784+ if (add_entry)
18785+ err = AuWbrCopyup(sbinfo, dentry);
18786+ else {
18787+ if (!IS_ROOT(dentry)) {
18788+ di_read_lock_parent(parent, !AuLock_IR);
18789+ err = AuWbrCopyup(sbinfo, dentry);
18790+ di_read_unlock(parent, !AuLock_IR);
18791+ } else
18792+ err = AuWbrCopyup(sbinfo, dentry);
18793+ }
18794+ bcpup = err;
18795+ if (unlikely(err < 0))
18796+ goto out;
18797+ }
18798+ } else {
18799+ bcpup = args->force_btgt;
18800+ AuDebugOn(au_test_ro(sb, bcpup, dentry->d_inode));
1308ab2a 18801+ }
027c5e7a 18802+
4a4d8108
AM
18803+ AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
18804+ err = bcpup;
18805+ if (bcpup == bstart)
18806+ goto out; /* success */
4a4d8108
AM
18807+
18808+ /* copyup the new parent into the branch we process */
18809+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
027c5e7a
AM
18810+ if (err >= 0) {
18811+ if (!dentry->d_inode) {
18812+ au_set_h_dptr(dentry, bstart, NULL);
18813+ au_set_dbstart(dentry, bcpup);
18814+ au_set_dbend(dentry, bcpup);
18815+ }
38d290e6
JR
18816+ AuDebugOn(add_entry
18817+ && !au_ftest_wrdir(args->flags, TMPFILE)
18818+ && !au_h_dptr(dentry, bcpup));
027c5e7a 18819+ }
86dc4139
AM
18820+
18821+out:
18822+ dput(parent);
18823+ return err;
18824+}
18825+
18826+/* ---------------------------------------------------------------------- */
18827+
18828+void au_pin_hdir_unlock(struct au_pin *p)
18829+{
18830+ if (p->hdir)
18831+ au_hn_imtx_unlock(p->hdir);
18832+}
18833+
c1595e42 18834+int au_pin_hdir_lock(struct au_pin *p)
86dc4139
AM
18835+{
18836+ int err;
18837+
18838+ err = 0;
18839+ if (!p->hdir)
18840+ goto out;
18841+
18842+ /* even if an error happens later, keep this lock */
18843+ au_hn_imtx_lock_nested(p->hdir, p->lsc_hi);
18844+
18845+ err = -EBUSY;
18846+ if (unlikely(p->hdir->hi_inode != p->h_parent->d_inode))
18847+ goto out;
18848+
18849+ err = 0;
18850+ if (p->h_dentry)
18851+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
18852+ p->h_parent, p->br);
18853+
18854+out:
18855+ return err;
18856+}
18857+
18858+int au_pin_hdir_relock(struct au_pin *p)
18859+{
18860+ int err, i;
18861+ struct inode *h_i;
18862+ struct dentry *h_d[] = {
18863+ p->h_dentry,
18864+ p->h_parent
18865+ };
18866+
18867+ err = au_pin_hdir_lock(p);
18868+ if (unlikely(err))
18869+ goto out;
18870+
18871+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
18872+ if (!h_d[i])
18873+ continue;
18874+ h_i = h_d[i]->d_inode;
18875+ if (h_i)
18876+ err = !h_i->i_nlink;
18877+ }
18878+
18879+out:
18880+ return err;
18881+}
18882+
18883+void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
18884+{
18885+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
18886+ p->hdir->hi_inode->i_mutex.owner = task;
18887+#endif
18888+}
18889+
18890+void au_pin_hdir_acquire_nest(struct au_pin *p)
18891+{
18892+ if (p->hdir) {
18893+ mutex_acquire_nest(&p->hdir->hi_inode->i_mutex.dep_map,
18894+ p->lsc_hi, 0, NULL, _RET_IP_);
18895+ au_pin_hdir_set_owner(p, current);
18896+ }
dece6358 18897+}
1facf9fc 18898+
86dc4139
AM
18899+void au_pin_hdir_release(struct au_pin *p)
18900+{
18901+ if (p->hdir) {
18902+ au_pin_hdir_set_owner(p, p->task);
18903+ mutex_release(&p->hdir->hi_inode->i_mutex.dep_map, 1, _RET_IP_);
18904+ }
18905+}
1308ab2a 18906+
4a4d8108 18907+struct dentry *au_pinned_h_parent(struct au_pin *pin)
1308ab2a 18908+{
4a4d8108
AM
18909+ if (pin && pin->parent)
18910+ return au_h_dptr(pin->parent, pin->bindex);
18911+ return NULL;
dece6358 18912+}
1facf9fc 18913+
4a4d8108 18914+void au_unpin(struct au_pin *p)
dece6358 18915+{
86dc4139
AM
18916+ if (p->hdir)
18917+ au_pin_hdir_unlock(p);
e49829fe 18918+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
b4510431 18919+ vfsub_mnt_drop_write(p->h_mnt);
4a4d8108
AM
18920+ if (!p->hdir)
18921+ return;
1facf9fc 18922+
4a4d8108
AM
18923+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18924+ di_read_unlock(p->parent, AuLock_IR);
18925+ iput(p->hdir->hi_inode);
18926+ dput(p->parent);
18927+ p->parent = NULL;
18928+ p->hdir = NULL;
18929+ p->h_mnt = NULL;
86dc4139 18930+ /* do not clear p->task */
4a4d8108 18931+}
1308ab2a 18932+
4a4d8108
AM
18933+int au_do_pin(struct au_pin *p)
18934+{
18935+ int err;
18936+ struct super_block *sb;
4a4d8108
AM
18937+ struct inode *h_dir;
18938+
18939+ err = 0;
18940+ sb = p->dentry->d_sb;
86dc4139 18941+ p->br = au_sbr(sb, p->bindex);
4a4d8108
AM
18942+ if (IS_ROOT(p->dentry)) {
18943+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18944+ p->h_mnt = au_br_mnt(p->br);
b4510431 18945+ err = vfsub_mnt_want_write(p->h_mnt);
4a4d8108
AM
18946+ if (unlikely(err)) {
18947+ au_fclr_pin(p->flags, MNT_WRITE);
18948+ goto out_err;
18949+ }
18950+ }
dece6358 18951+ goto out;
1facf9fc 18952+ }
18953+
86dc4139 18954+ p->h_dentry = NULL;
4a4d8108 18955+ if (p->bindex <= au_dbend(p->dentry))
86dc4139 18956+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
dece6358 18957+
4a4d8108
AM
18958+ p->parent = dget_parent(p->dentry);
18959+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18960+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
dece6358 18961+
4a4d8108 18962+ h_dir = NULL;
86dc4139 18963+ p->h_parent = au_h_dptr(p->parent, p->bindex);
4a4d8108
AM
18964+ p->hdir = au_hi(p->parent->d_inode, p->bindex);
18965+ if (p->hdir)
18966+ h_dir = p->hdir->hi_inode;
dece6358 18967+
b752ccd1
AM
18968+ /*
18969+ * udba case, or
18970+ * if DI_LOCKED is not set, then p->parent may be different
18971+ * and h_parent can be NULL.
18972+ */
86dc4139 18973+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
e49829fe 18974+ err = -EBUSY;
4a4d8108
AM
18975+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18976+ di_read_unlock(p->parent, AuLock_IR);
18977+ dput(p->parent);
18978+ p->parent = NULL;
18979+ goto out_err;
18980+ }
1308ab2a 18981+
4a4d8108 18982+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
86dc4139 18983+ p->h_mnt = au_br_mnt(p->br);
b4510431 18984+ err = vfsub_mnt_want_write(p->h_mnt);
dece6358 18985+ if (unlikely(err)) {
4a4d8108 18986+ au_fclr_pin(p->flags, MNT_WRITE);
86dc4139
AM
18987+ if (!au_ftest_pin(p->flags, DI_LOCKED))
18988+ di_read_unlock(p->parent, AuLock_IR);
18989+ dput(p->parent);
18990+ p->parent = NULL;
18991+ goto out_err;
dece6358
AM
18992+ }
18993+ }
4a4d8108 18994+
86dc4139
AM
18995+ au_igrab(h_dir);
18996+ err = au_pin_hdir_lock(p);
18997+ if (!err)
18998+ goto out; /* success */
18999+
076b876e
AM
19000+ au_unpin(p);
19001+
4f0767ce 19002+out_err:
4a4d8108
AM
19003+ pr_err("err %d\n", err);
19004+ err = au_busy_or_stale();
4f0767ce 19005+out:
1facf9fc 19006+ return err;
19007+}
19008+
4a4d8108
AM
19009+void au_pin_init(struct au_pin *p, struct dentry *dentry,
19010+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
19011+ unsigned int udba, unsigned char flags)
19012+{
19013+ p->dentry = dentry;
19014+ p->udba = udba;
19015+ p->lsc_di = lsc_di;
19016+ p->lsc_hi = lsc_hi;
19017+ p->flags = flags;
19018+ p->bindex = bindex;
19019+
19020+ p->parent = NULL;
19021+ p->hdir = NULL;
19022+ p->h_mnt = NULL;
86dc4139
AM
19023+
19024+ p->h_dentry = NULL;
19025+ p->h_parent = NULL;
19026+ p->br = NULL;
19027+ p->task = current;
4a4d8108
AM
19028+}
19029+
19030+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
19031+ unsigned int udba, unsigned char flags)
19032+{
19033+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
19034+ udba, flags);
19035+ return au_do_pin(pin);
19036+}
19037+
dece6358
AM
19038+/* ---------------------------------------------------------------------- */
19039+
1308ab2a 19040+/*
4a4d8108
AM
19041+ * ->setattr() and ->getattr() are called in various cases.
19042+ * chmod, stat: dentry is revalidated.
19043+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
19044+ * unhashed.
19045+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
1308ab2a 19046+ */
027c5e7a 19047+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
c1595e42 19048+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
1facf9fc 19049+{
4a4d8108
AM
19050+ int err;
19051+ struct inode *inode;
19052+ struct dentry *parent;
1facf9fc 19053+
1308ab2a 19054+ err = 0;
4a4d8108 19055+ inode = dentry->d_inode;
027c5e7a 19056+ if (au_digen_test(dentry, sigen)) {
4a4d8108
AM
19057+ parent = dget_parent(dentry);
19058+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a 19059+ err = au_refresh_dentry(dentry, parent);
4a4d8108
AM
19060+ di_read_unlock(parent, AuLock_IR);
19061+ dput(parent);
dece6358 19062+ }
1facf9fc 19063+
4a4d8108 19064+ AuTraceErr(err);
1308ab2a 19065+ return err;
19066+}
dece6358 19067+
c1595e42
JR
19068+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
19069+ struct au_icpup_args *a)
1308ab2a 19070+{
19071+ int err;
4a4d8108 19072+ loff_t sz;
e49829fe 19073+ aufs_bindex_t bstart, ibstart;
4a4d8108
AM
19074+ struct dentry *hi_wh, *parent;
19075+ struct inode *inode;
4a4d8108
AM
19076+ struct au_wr_dir_args wr_dir_args = {
19077+ .force_btgt = -1,
19078+ .flags = 0
19079+ };
19080+
7f2ca4b1 19081+ if (d_is_dir(dentry))
4a4d8108
AM
19082+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
19083+ /* plink or hi_wh() case */
7f2ca4b1
JR
19084+ bstart = au_dbstart(dentry);
19085+ inode = dentry->d_inode;
e49829fe 19086+ ibstart = au_ibstart(inode);
027c5e7a 19087+ if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
e49829fe 19088+ wr_dir_args.force_btgt = ibstart;
4a4d8108
AM
19089+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
19090+ if (unlikely(err < 0))
19091+ goto out;
19092+ a->btgt = err;
19093+ if (err != bstart)
19094+ au_fset_icpup(a->flags, DID_CPUP);
19095+
19096+ err = 0;
19097+ a->pin_flags = AuPin_MNT_WRITE;
19098+ parent = NULL;
19099+ if (!IS_ROOT(dentry)) {
19100+ au_fset_pin(a->pin_flags, DI_LOCKED);
19101+ parent = dget_parent(dentry);
19102+ di_write_lock_parent(parent);
19103+ }
19104+
19105+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
19106+ if (unlikely(err))
19107+ goto out_parent;
19108+
19109+ a->h_path.dentry = au_h_dptr(dentry, bstart);
19110+ a->h_inode = a->h_path.dentry->d_inode;
4a4d8108 19111+ sz = -1;
c1595e42
JR
19112+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
19113+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
19114+ if (ia->ia_size < i_size_read(a->h_inode))
19115+ sz = ia->ia_size;
19116+ mutex_unlock(&a->h_inode->i_mutex);
19117+ }
4a4d8108 19118+
4a4d8108 19119+ hi_wh = NULL;
027c5e7a 19120+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
4a4d8108
AM
19121+ hi_wh = au_hi_wh(inode, a->btgt);
19122+ if (!hi_wh) {
c2b27bf2
AM
19123+ struct au_cp_generic cpg = {
19124+ .dentry = dentry,
19125+ .bdst = a->btgt,
19126+ .bsrc = -1,
19127+ .len = sz,
19128+ .pin = &a->pin
19129+ };
19130+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
4a4d8108
AM
19131+ if (unlikely(err))
19132+ goto out_unlock;
19133+ hi_wh = au_hi_wh(inode, a->btgt);
19134+ /* todo: revalidate hi_wh? */
19135+ }
19136+ }
19137+
19138+ if (parent) {
19139+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
19140+ di_downgrade_lock(parent, AuLock_IR);
19141+ dput(parent);
19142+ parent = NULL;
19143+ }
19144+ if (!au_ftest_icpup(a->flags, DID_CPUP))
19145+ goto out; /* success */
19146+
19147+ if (!d_unhashed(dentry)) {
c2b27bf2
AM
19148+ struct au_cp_generic cpg = {
19149+ .dentry = dentry,
19150+ .bdst = a->btgt,
19151+ .bsrc = bstart,
19152+ .len = sz,
19153+ .pin = &a->pin,
19154+ .flags = AuCpup_DTIME | AuCpup_HOPEN
19155+ };
19156+ err = au_sio_cpup_simple(&cpg);
4a4d8108
AM
19157+ if (!err)
19158+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19159+ } else if (!hi_wh)
19160+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
19161+ else
19162+ a->h_path.dentry = hi_wh; /* do not dget here */
1308ab2a 19163+
4f0767ce 19164+out_unlock:
4a4d8108 19165+ a->h_inode = a->h_path.dentry->d_inode;
86dc4139 19166+ if (!err)
dece6358 19167+ goto out; /* success */
4a4d8108 19168+ au_unpin(&a->pin);
4f0767ce 19169+out_parent:
4a4d8108
AM
19170+ if (parent) {
19171+ di_write_unlock(parent);
19172+ dput(parent);
19173+ }
4f0767ce 19174+out:
86dc4139
AM
19175+ if (!err)
19176+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
1facf9fc 19177+ return err;
19178+}
19179+
4a4d8108 19180+static int aufs_setattr(struct dentry *dentry, struct iattr *ia)
1facf9fc 19181+{
4a4d8108 19182+ int err;
523b37e3 19183+ struct inode *inode, *delegated;
4a4d8108
AM
19184+ struct super_block *sb;
19185+ struct file *file;
19186+ struct au_icpup_args *a;
1facf9fc 19187+
4a4d8108
AM
19188+ inode = dentry->d_inode;
19189+ IMustLock(inode);
dece6358 19190+
4a4d8108
AM
19191+ err = -ENOMEM;
19192+ a = kzalloc(sizeof(*a), GFP_NOFS);
19193+ if (unlikely(!a))
19194+ goto out;
1facf9fc 19195+
4a4d8108
AM
19196+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
19197+ ia->ia_valid &= ~ATTR_MODE;
dece6358 19198+
4a4d8108
AM
19199+ file = NULL;
19200+ sb = dentry->d_sb;
e49829fe
JR
19201+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19202+ if (unlikely(err))
19203+ goto out_kfree;
19204+
4a4d8108
AM
19205+ if (ia->ia_valid & ATTR_FILE) {
19206+ /* currently ftruncate(2) only */
19207+ AuDebugOn(!S_ISREG(inode->i_mode));
19208+ file = ia->ia_file;
19209+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1);
19210+ if (unlikely(err))
19211+ goto out_si;
19212+ ia->ia_file = au_hf_top(file);
19213+ a->udba = AuOpt_UDBA_NONE;
19214+ } else {
19215+ /* fchmod() doesn't pass ia_file */
19216+ a->udba = au_opt_udba(sb);
027c5e7a
AM
19217+ di_write_lock_child(dentry);
19218+ /* no d_unlinked(), to set UDBA_NONE for root */
4a4d8108
AM
19219+ if (d_unhashed(dentry))
19220+ a->udba = AuOpt_UDBA_NONE;
4a4d8108
AM
19221+ if (a->udba != AuOpt_UDBA_NONE) {
19222+ AuDebugOn(IS_ROOT(dentry));
19223+ err = au_reval_for_attr(dentry, au_sigen(sb));
19224+ if (unlikely(err))
19225+ goto out_dentry;
19226+ }
dece6358 19227+ }
dece6358 19228+
4a4d8108
AM
19229+ err = au_pin_and_icpup(dentry, ia, a);
19230+ if (unlikely(err < 0))
19231+ goto out_dentry;
19232+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
19233+ ia->ia_file = NULL;
19234+ ia->ia_valid &= ~ATTR_FILE;
1308ab2a 19235+ }
dece6358 19236+
4a4d8108
AM
19237+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
19238+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
19239+ == (ATTR_MODE | ATTR_CTIME)) {
7eafdf33 19240+ err = security_path_chmod(&a->h_path, ia->ia_mode);
4a4d8108
AM
19241+ if (unlikely(err))
19242+ goto out_unlock;
19243+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
19244+ && (ia->ia_valid & ATTR_CTIME)) {
86dc4139 19245+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
4a4d8108
AM
19246+ if (unlikely(err))
19247+ goto out_unlock;
19248+ }
dece6358 19249+
4a4d8108
AM
19250+ if (ia->ia_valid & ATTR_SIZE) {
19251+ struct file *f;
1308ab2a 19252+
953406b4 19253+ if (ia->ia_size < i_size_read(inode))
4a4d8108 19254+ /* unmap only */
953406b4 19255+ truncate_setsize(inode, ia->ia_size);
1308ab2a 19256+
4a4d8108
AM
19257+ f = NULL;
19258+ if (ia->ia_valid & ATTR_FILE)
19259+ f = ia->ia_file;
19260+ mutex_unlock(&a->h_inode->i_mutex);
19261+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
19262+ mutex_lock_nested(&a->h_inode->i_mutex, AuLsc_I_CHILD);
523b37e3
AM
19263+ } else {
19264+ delegated = NULL;
19265+ while (1) {
19266+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
19267+ if (delegated) {
19268+ err = break_deleg_wait(&delegated);
19269+ if (!err)
19270+ continue;
19271+ }
19272+ break;
19273+ }
19274+ }
7f2ca4b1
JR
19275+ /*
19276+ * regardless aufs 'acl' option setting.
19277+ * why don't all acl-aware fs call this func from their ->setattr()?
19278+ */
19279+ if (!err && (ia->ia_valid & ATTR_MODE))
19280+ err = vfsub_acl_chmod(a->h_inode, ia->ia_mode);
4a4d8108
AM
19281+ if (!err)
19282+ au_cpup_attr_changeable(inode);
1308ab2a 19283+
4f0767ce 19284+out_unlock:
4a4d8108
AM
19285+ mutex_unlock(&a->h_inode->i_mutex);
19286+ au_unpin(&a->pin);
027c5e7a
AM
19287+ if (unlikely(err))
19288+ au_update_dbstart(dentry);
4f0767ce 19289+out_dentry:
4a4d8108
AM
19290+ di_write_unlock(dentry);
19291+ if (file) {
19292+ fi_write_unlock(file);
19293+ ia->ia_file = file;
19294+ ia->ia_valid |= ATTR_FILE;
19295+ }
4f0767ce 19296+out_si:
4a4d8108 19297+ si_read_unlock(sb);
e49829fe 19298+out_kfree:
4a4d8108 19299+ kfree(a);
4f0767ce 19300+out:
4a4d8108
AM
19301+ AuTraceErr(err);
19302+ return err;
1facf9fc 19303+}
19304+
c1595e42
JR
19305+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
19306+static int au_h_path_to_set_attr(struct dentry *dentry,
19307+ struct au_icpup_args *a, struct path *h_path)
19308+{
19309+ int err;
19310+ struct super_block *sb;
19311+
19312+ sb = dentry->d_sb;
19313+ a->udba = au_opt_udba(sb);
19314+ /* no d_unlinked(), to set UDBA_NONE for root */
19315+ if (d_unhashed(dentry))
19316+ a->udba = AuOpt_UDBA_NONE;
19317+ if (a->udba != AuOpt_UDBA_NONE) {
19318+ AuDebugOn(IS_ROOT(dentry));
19319+ err = au_reval_for_attr(dentry, au_sigen(sb));
19320+ if (unlikely(err))
19321+ goto out;
19322+ }
19323+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
19324+ if (unlikely(err < 0))
19325+ goto out;
19326+
19327+ h_path->dentry = a->h_path.dentry;
19328+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
19329+
19330+out:
19331+ return err;
19332+}
19333+
19334+ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg)
19335+{
19336+ int err;
19337+ struct path h_path;
19338+ struct super_block *sb;
19339+ struct au_icpup_args *a;
19340+ struct inode *inode, *h_inode;
19341+
19342+ inode = dentry->d_inode;
19343+ IMustLock(inode);
19344+
19345+ err = -ENOMEM;
19346+ a = kzalloc(sizeof(*a), GFP_NOFS);
19347+ if (unlikely(!a))
19348+ goto out;
19349+
19350+ sb = dentry->d_sb;
19351+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19352+ if (unlikely(err))
19353+ goto out_kfree;
19354+
19355+ h_path.dentry = NULL; /* silence gcc */
19356+ di_write_lock_child(dentry);
19357+ err = au_h_path_to_set_attr(dentry, a, &h_path);
19358+ if (unlikely(err))
19359+ goto out_di;
19360+
19361+ mutex_unlock(&a->h_inode->i_mutex);
19362+ switch (arg->type) {
19363+ case AU_XATTR_SET:
19364+ err = vfsub_setxattr(h_path.dentry,
19365+ arg->u.set.name, arg->u.set.value,
19366+ arg->u.set.size, arg->u.set.flags);
19367+ break;
19368+ case AU_XATTR_REMOVE:
19369+ err = vfsub_removexattr(h_path.dentry, arg->u.remove.name);
19370+ break;
19371+ case AU_ACL_SET:
19372+ err = -EOPNOTSUPP;
19373+ h_inode = h_path.dentry->d_inode;
19374+ if (h_inode->i_op->set_acl)
19375+ err = h_inode->i_op->set_acl(h_inode,
19376+ arg->u.acl_set.acl,
19377+ arg->u.acl_set.type);
19378+ break;
19379+ }
19380+ if (!err)
19381+ au_cpup_attr_timesizes(inode);
19382+
19383+ au_unpin(&a->pin);
19384+ if (unlikely(err))
19385+ au_update_dbstart(dentry);
19386+
19387+out_di:
19388+ di_write_unlock(dentry);
19389+ si_read_unlock(sb);
19390+out_kfree:
19391+ kfree(a);
19392+out:
19393+ AuTraceErr(err);
19394+ return err;
19395+}
19396+#endif
19397+
4a4d8108
AM
19398+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
19399+ unsigned int nlink)
1facf9fc 19400+{
9dbd164d
AM
19401+ unsigned int n;
19402+
4a4d8108 19403+ inode->i_mode = st->mode;
86dc4139
AM
19404+ /* don't i_[ug]id_write() here */
19405+ inode->i_uid = st->uid;
19406+ inode->i_gid = st->gid;
4a4d8108
AM
19407+ inode->i_atime = st->atime;
19408+ inode->i_mtime = st->mtime;
19409+ inode->i_ctime = st->ctime;
1facf9fc 19410+
4a4d8108
AM
19411+ au_cpup_attr_nlink(inode, /*force*/0);
19412+ if (S_ISDIR(inode->i_mode)) {
9dbd164d
AM
19413+ n = inode->i_nlink;
19414+ n -= nlink;
19415+ n += st->nlink;
f6b6e03d 19416+ smp_mb(); /* for i_nlink */
7eafdf33 19417+ /* 0 can happen */
92d182d2 19418+ set_nlink(inode, n);
4a4d8108 19419+ }
1facf9fc 19420+
4a4d8108
AM
19421+ spin_lock(&inode->i_lock);
19422+ inode->i_blocks = st->blocks;
19423+ i_size_write(inode, st->size);
19424+ spin_unlock(&inode->i_lock);
1facf9fc 19425+}
19426+
c1595e42
JR
19427+/*
19428+ * common routine for aufs_getattr() and aufs_getxattr().
19429+ * returns zero or negative (an error).
19430+ * @dentry will be read-locked in success.
19431+ */
19432+int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
1facf9fc 19433+{
4a4d8108 19434+ int err;
076b876e 19435+ unsigned int mnt_flags, sigen;
c1595e42 19436+ unsigned char udba_none;
4a4d8108 19437+ aufs_bindex_t bindex;
4a4d8108
AM
19438+ struct super_block *sb, *h_sb;
19439+ struct inode *inode;
1facf9fc 19440+
c1595e42
JR
19441+ h_path->mnt = NULL;
19442+ h_path->dentry = NULL;
19443+
19444+ err = 0;
4a4d8108 19445+ sb = dentry->d_sb;
4a4d8108
AM
19446+ mnt_flags = au_mntflags(sb);
19447+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
1facf9fc 19448+
4a4d8108 19449+ /* support fstat(2) */
027c5e7a 19450+ if (!d_unlinked(dentry) && !udba_none) {
076b876e 19451+ sigen = au_sigen(sb);
027c5e7a
AM
19452+ err = au_digen_test(dentry, sigen);
19453+ if (!err) {
4a4d8108 19454+ di_read_lock_child(dentry, AuLock_IR);
027c5e7a 19455+ err = au_dbrange_test(dentry);
c1595e42
JR
19456+ if (unlikely(err)) {
19457+ di_read_unlock(dentry, AuLock_IR);
19458+ goto out;
19459+ }
027c5e7a 19460+ } else {
4a4d8108
AM
19461+ AuDebugOn(IS_ROOT(dentry));
19462+ di_write_lock_child(dentry);
027c5e7a
AM
19463+ err = au_dbrange_test(dentry);
19464+ if (!err)
19465+ err = au_reval_for_attr(dentry, sigen);
c1595e42
JR
19466+ if (!err)
19467+ di_downgrade_lock(dentry, AuLock_IR);
19468+ else {
19469+ di_write_unlock(dentry);
19470+ goto out;
19471+ }
4a4d8108
AM
19472+ }
19473+ } else
19474+ di_read_lock_child(dentry, AuLock_IR);
1facf9fc 19475+
c1595e42 19476+ inode = dentry->d_inode;
4a4d8108 19477+ bindex = au_ibstart(inode);
c1595e42
JR
19478+ h_path->mnt = au_sbr_mnt(sb, bindex);
19479+ h_sb = h_path->mnt->mnt_sb;
19480+ if (!force
19481+ && !au_test_fs_bad_iattr(h_sb)
19482+ && udba_none)
19483+ goto out; /* success */
1facf9fc 19484+
4a4d8108 19485+ if (au_dbstart(dentry) == bindex)
c1595e42 19486+ h_path->dentry = au_h_dptr(dentry, bindex);
4a4d8108 19487+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
c1595e42
JR
19488+ h_path->dentry = au_plink_lkup(inode, bindex);
19489+ if (IS_ERR(h_path->dentry))
19490+ /* pretending success */
19491+ h_path->dentry = NULL;
19492+ else
19493+ dput(h_path->dentry);
4a4d8108 19494+ }
c1595e42
JR
19495+
19496+out:
19497+ return err;
19498+}
19499+
19500+static int aufs_getattr(struct vfsmount *mnt __maybe_unused,
19501+ struct dentry *dentry, struct kstat *st)
19502+{
19503+ int err;
19504+ unsigned char positive;
19505+ struct path h_path;
19506+ struct inode *inode;
19507+ struct super_block *sb;
19508+
19509+ inode = dentry->d_inode;
19510+ sb = dentry->d_sb;
19511+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
19512+ if (unlikely(err))
19513+ goto out;
19514+ err = au_h_path_getattr(dentry, /*force*/0, &h_path);
19515+ if (unlikely(err))
19516+ goto out_si;
c06a8ce3 19517+ if (unlikely(!h_path.dentry))
c1595e42 19518+ /* illegally overlapped or something */
4a4d8108
AM
19519+ goto out_fill; /* pretending success */
19520+
c06a8ce3 19521+ positive = !!h_path.dentry->d_inode;
4a4d8108 19522+ if (positive)
c06a8ce3 19523+ err = vfs_getattr(&h_path, st);
4a4d8108
AM
19524+ if (!err) {
19525+ if (positive)
c06a8ce3
AM
19526+ au_refresh_iattr(inode, st,
19527+ h_path.dentry->d_inode->i_nlink);
4a4d8108 19528+ goto out_fill; /* success */
1facf9fc 19529+ }
7f207e10 19530+ AuTraceErr(err);
c1595e42 19531+ goto out_di;
4a4d8108 19532+
4f0767ce 19533+out_fill:
4a4d8108 19534+ generic_fillattr(inode, st);
c1595e42 19535+out_di:
4a4d8108 19536+ di_read_unlock(dentry, AuLock_IR);
c1595e42 19537+out_si:
4a4d8108 19538+ si_read_unlock(sb);
7f207e10
AM
19539+out:
19540+ AuTraceErr(err);
4a4d8108 19541+ return err;
1facf9fc 19542+}
19543+
19544+/* ---------------------------------------------------------------------- */
19545+
4a4d8108
AM
19546+static int h_readlink(struct dentry *dentry, int bindex, char __user *buf,
19547+ int bufsiz)
1facf9fc 19548+{
19549+ int err;
4a4d8108
AM
19550+ struct super_block *sb;
19551+ struct dentry *h_dentry;
1facf9fc 19552+
4a4d8108
AM
19553+ err = -EINVAL;
19554+ h_dentry = au_h_dptr(dentry, bindex);
19555+ if (unlikely(!h_dentry->d_inode->i_op->readlink))
19556+ goto out;
1facf9fc 19557+
4a4d8108
AM
19558+ err = security_inode_readlink(h_dentry);
19559+ if (unlikely(err))
dece6358 19560+ goto out;
1facf9fc 19561+
4a4d8108
AM
19562+ sb = dentry->d_sb;
19563+ if (!au_test_ro(sb, bindex, dentry->d_inode)) {
19564+ vfsub_touch_atime(au_sbr_mnt(sb, bindex), h_dentry);
19565+ fsstack_copy_attr_atime(dentry->d_inode, h_dentry->d_inode);
1facf9fc 19566+ }
4a4d8108 19567+ err = h_dentry->d_inode->i_op->readlink(h_dentry, buf, bufsiz);
1facf9fc 19568+
4f0767ce 19569+out:
4a4d8108
AM
19570+ return err;
19571+}
1facf9fc 19572+
4a4d8108
AM
19573+static int aufs_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
19574+{
19575+ int err;
1facf9fc 19576+
027c5e7a
AM
19577+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19578+ if (unlikely(err))
19579+ goto out;
19580+ err = au_d_hashed_positive(dentry);
19581+ if (!err)
19582+ err = h_readlink(dentry, au_dbstart(dentry), buf, bufsiz);
4a4d8108 19583+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 19584+
027c5e7a 19585+out:
4a4d8108
AM
19586+ return err;
19587+}
1facf9fc 19588+
4a4d8108
AM
19589+static void *aufs_follow_link(struct dentry *dentry, struct nameidata *nd)
19590+{
19591+ int err;
4a4d8108 19592+ mm_segment_t old_fs;
b752ccd1
AM
19593+ union {
19594+ char *k;
19595+ char __user *u;
19596+ } buf;
1facf9fc 19597+
4a4d8108 19598+ err = -ENOMEM;
537831f9 19599+ buf.k = (void *)__get_free_page(GFP_NOFS);
b752ccd1 19600+ if (unlikely(!buf.k))
4a4d8108 19601+ goto out;
1facf9fc 19602+
027c5e7a
AM
19603+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
19604+ if (unlikely(err))
19605+ goto out_name;
19606+
19607+ err = au_d_hashed_positive(dentry);
19608+ if (!err) {
19609+ old_fs = get_fs();
19610+ set_fs(KERNEL_DS);
19611+ err = h_readlink(dentry, au_dbstart(dentry), buf.u, PATH_MAX);
19612+ set_fs(old_fs);
19613+ }
4a4d8108 19614+ aufs_read_unlock(dentry, AuLock_IR);
1facf9fc 19615+
4a4d8108 19616+ if (err >= 0) {
b752ccd1 19617+ buf.k[err] = 0;
4a4d8108 19618+ /* will be freed by put_link */
b752ccd1 19619+ nd_set_link(nd, buf.k);
4a4d8108 19620+ return NULL; /* success */
1308ab2a 19621+ }
1facf9fc 19622+
027c5e7a 19623+out_name:
537831f9 19624+ free_page((unsigned long)buf.k);
4f0767ce 19625+out:
4a4d8108
AM
19626+ AuTraceErr(err);
19627+ return ERR_PTR(err);
19628+}
1facf9fc 19629+
4a4d8108
AM
19630+static void aufs_put_link(struct dentry *dentry __maybe_unused,
19631+ struct nameidata *nd, void *cookie __maybe_unused)
19632+{
537831f9
AM
19633+ char *p;
19634+
19635+ p = nd_get_link(nd);
19636+ if (!IS_ERR_OR_NULL(p))
19637+ free_page((unsigned long)p);
4a4d8108 19638+}
1facf9fc 19639+
4a4d8108 19640+/* ---------------------------------------------------------------------- */
1facf9fc 19641+
0c3ec466 19642+static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
4a4d8108 19643+{
0c3ec466
AM
19644+ int err;
19645+ struct super_block *sb;
19646+ struct inode *h_inode;
19647+
19648+ sb = inode->i_sb;
19649+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
19650+ lockdep_off();
19651+ si_read_lock(sb, AuLock_FLUSH);
19652+ ii_write_lock_child(inode);
19653+ lockdep_on();
19654+ h_inode = au_h_iptr(inode, au_ibstart(inode));
19655+ err = vfsub_update_time(h_inode, ts, flags);
19656+ lockdep_off();
38d290e6
JR
19657+ if (!err)
19658+ au_cpup_attr_timesizes(inode);
0c3ec466
AM
19659+ ii_write_unlock(inode);
19660+ si_read_unlock(sb);
19661+ lockdep_on();
38d290e6
JR
19662+
19663+ if (!err && (flags & S_VERSION))
19664+ inode_inc_iversion(inode);
19665+
0c3ec466 19666+ return err;
4a4d8108 19667+}
1facf9fc 19668+
4a4d8108 19669+/* ---------------------------------------------------------------------- */
1308ab2a 19670+
7f2ca4b1
JR
19671+/* no getattr version will be set by module.c:aufs_init() */
19672+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
19673+ aufs_iop[] = {
19674+ [AuIop_SYMLINK] = {
19675+ .permission = aufs_permission,
c1595e42 19676+#ifdef CONFIG_FS_POSIX_ACL
7f2ca4b1
JR
19677+ .get_acl = aufs_get_acl,
19678+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
c1595e42
JR
19679+#endif
19680+
7f2ca4b1
JR
19681+ .setattr = aufs_setattr,
19682+ .getattr = aufs_getattr,
0c3ec466 19683+
c1595e42 19684+#ifdef CONFIG_AUFS_XATTR
7f2ca4b1
JR
19685+ .setxattr = aufs_setxattr,
19686+ .getxattr = aufs_getxattr,
19687+ .listxattr = aufs_listxattr,
19688+ .removexattr = aufs_removexattr,
c1595e42
JR
19689+#endif
19690+
7f2ca4b1
JR
19691+ .readlink = aufs_readlink,
19692+ .follow_link = aufs_follow_link,
19693+ .put_link = aufs_put_link,
4a4d8108 19694+
7f2ca4b1
JR
19695+ /* .update_time = aufs_update_time */
19696+ },
19697+ [AuIop_DIR] = {
19698+ .create = aufs_create,
19699+ .lookup = aufs_lookup,
19700+ .link = aufs_link,
19701+ .unlink = aufs_unlink,
19702+ .symlink = aufs_symlink,
19703+ .mkdir = aufs_mkdir,
19704+ .rmdir = aufs_rmdir,
19705+ .mknod = aufs_mknod,
19706+ .rename = aufs_rename,
19707+
19708+ .permission = aufs_permission,
c1595e42 19709+#ifdef CONFIG_FS_POSIX_ACL
7f2ca4b1
JR
19710+ .get_acl = aufs_get_acl,
19711+ .set_acl = aufs_set_acl,
c1595e42
JR
19712+#endif
19713+
7f2ca4b1
JR
19714+ .setattr = aufs_setattr,
19715+ .getattr = aufs_getattr,
0c3ec466 19716+
c1595e42 19717+#ifdef CONFIG_AUFS_XATTR
7f2ca4b1
JR
19718+ .setxattr = aufs_setxattr,
19719+ .getxattr = aufs_getxattr,
19720+ .listxattr = aufs_listxattr,
19721+ .removexattr = aufs_removexattr,
c1595e42
JR
19722+#endif
19723+
7f2ca4b1
JR
19724+ .update_time = aufs_update_time,
19725+ .atomic_open = aufs_atomic_open,
19726+ .tmpfile = aufs_tmpfile
19727+ },
19728+ [AuIop_OTHER] = {
19729+ .permission = aufs_permission,
c1595e42 19730+#ifdef CONFIG_FS_POSIX_ACL
7f2ca4b1
JR
19731+ .get_acl = aufs_get_acl,
19732+ .set_acl = aufs_set_acl,
c1595e42
JR
19733+#endif
19734+
7f2ca4b1
JR
19735+ .setattr = aufs_setattr,
19736+ .getattr = aufs_getattr,
0c3ec466 19737+
c1595e42 19738+#ifdef CONFIG_AUFS_XATTR
7f2ca4b1
JR
19739+ .setxattr = aufs_setxattr,
19740+ .getxattr = aufs_getxattr,
19741+ .listxattr = aufs_listxattr,
19742+ .removexattr = aufs_removexattr,
c1595e42
JR
19743+#endif
19744+
7f2ca4b1
JR
19745+ .update_time = aufs_update_time
19746+ }
4a4d8108 19747+};
7f207e10
AM
19748diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
19749--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
19750+++ linux/fs/aufs/i_op_del.c 2016-02-28 11:27:01.280579017 +0100
19751@@ -0,0 +1,506 @@
1facf9fc 19752+/*
7f2ca4b1 19753+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 19754+ *
19755+ * This program, aufs is free software; you can redistribute it and/or modify
19756+ * it under the terms of the GNU General Public License as published by
19757+ * the Free Software Foundation; either version 2 of the License, or
19758+ * (at your option) any later version.
dece6358
AM
19759+ *
19760+ * This program is distributed in the hope that it will be useful,
19761+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19762+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19763+ * GNU General Public License for more details.
19764+ *
19765+ * You should have received a copy of the GNU General Public License
523b37e3 19766+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 19767+ */
19768+
19769+/*
4a4d8108 19770+ * inode operations (del entry)
1308ab2a 19771+ */
dece6358 19772+
1308ab2a 19773+#include "aufs.h"
dece6358 19774+
4a4d8108
AM
19775+/*
19776+ * decide if a new whiteout for @dentry is necessary or not.
19777+ * when it is necessary, prepare the parent dir for the upper branch whose
19778+ * branch index is @bcpup for creation. the actual creation of the whiteout will
19779+ * be done by caller.
19780+ * return value:
19781+ * 0: wh is unnecessary
19782+ * plus: wh is necessary
19783+ * minus: error
19784+ */
19785+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
1308ab2a 19786+{
4a4d8108
AM
19787+ int need_wh, err;
19788+ aufs_bindex_t bstart;
19789+ struct super_block *sb;
dece6358 19790+
4a4d8108
AM
19791+ sb = dentry->d_sb;
19792+ bstart = au_dbstart(dentry);
19793+ if (*bcpup < 0) {
19794+ *bcpup = bstart;
19795+ if (au_test_ro(sb, bstart, dentry->d_inode)) {
19796+ err = AuWbrCopyup(au_sbi(sb), dentry);
19797+ *bcpup = err;
19798+ if (unlikely(err < 0))
19799+ goto out;
19800+ }
19801+ } else
19802+ AuDebugOn(bstart < *bcpup
19803+ || au_test_ro(sb, *bcpup, dentry->d_inode));
19804+ AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
1308ab2a 19805+
4a4d8108
AM
19806+ if (*bcpup != bstart) {
19807+ err = au_cpup_dirs(dentry, *bcpup);
19808+ if (unlikely(err))
19809+ goto out;
19810+ need_wh = 1;
19811+ } else {
027c5e7a 19812+ struct au_dinfo *dinfo, *tmp;
4a4d8108 19813+
027c5e7a
AM
19814+ need_wh = -ENOMEM;
19815+ dinfo = au_di(dentry);
19816+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
19817+ if (tmp) {
19818+ au_di_cp(tmp, dinfo);
19819+ au_di_swap(tmp, dinfo);
19820+ /* returns the number of positive dentries */
537831f9 19821+ need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0);
027c5e7a
AM
19822+ au_di_swap(tmp, dinfo);
19823+ au_rw_write_unlock(&tmp->di_rwsem);
19824+ au_di_free(tmp);
4a4d8108
AM
19825+ }
19826+ }
19827+ AuDbg("need_wh %d\n", need_wh);
19828+ err = need_wh;
19829+
4f0767ce 19830+out:
4a4d8108 19831+ return err;
1facf9fc 19832+}
19833+
4a4d8108
AM
19834+/*
19835+ * simple tests for the del-entry operations.
19836+ * following the checks in vfs, plus the parent-child relationship.
19837+ */
19838+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
19839+ struct dentry *h_parent, int isdir)
1facf9fc 19840+{
4a4d8108
AM
19841+ int err;
19842+ umode_t h_mode;
19843+ struct dentry *h_dentry, *h_latest;
1308ab2a 19844+ struct inode *h_inode;
1facf9fc 19845+
4a4d8108
AM
19846+ h_dentry = au_h_dptr(dentry, bindex);
19847+ h_inode = h_dentry->d_inode;
19848+ if (dentry->d_inode) {
19849+ err = -ENOENT;
19850+ if (unlikely(!h_inode || !h_inode->i_nlink))
19851+ goto out;
1facf9fc 19852+
4a4d8108
AM
19853+ h_mode = h_inode->i_mode;
19854+ if (!isdir) {
19855+ err = -EISDIR;
19856+ if (unlikely(S_ISDIR(h_mode)))
19857+ goto out;
19858+ } else if (unlikely(!S_ISDIR(h_mode))) {
19859+ err = -ENOTDIR;
19860+ goto out;
19861+ }
19862+ } else {
19863+ /* rename(2) case */
19864+ err = -EIO;
19865+ if (unlikely(h_inode))
19866+ goto out;
19867+ }
1facf9fc 19868+
4a4d8108
AM
19869+ err = -ENOENT;
19870+ /* expected parent dir is locked */
19871+ if (unlikely(h_parent != h_dentry->d_parent))
19872+ goto out;
19873+ err = 0;
19874+
19875+ /*
19876+ * rmdir a dir may break the consistency on some filesystem.
19877+ * let's try heavy test.
19878+ */
19879+ err = -EACCES;
076b876e
AM
19880+ if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
19881+ && au_test_h_perm(h_parent->d_inode,
19882+ MAY_EXEC | MAY_WRITE)))
4a4d8108
AM
19883+ goto out;
19884+
076b876e 19885+ h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
4a4d8108
AM
19886+ err = -EIO;
19887+ if (IS_ERR(h_latest))
19888+ goto out;
19889+ if (h_latest == h_dentry)
19890+ err = 0;
19891+ dput(h_latest);
19892+
4f0767ce 19893+out:
4a4d8108 19894+ return err;
1308ab2a 19895+}
1facf9fc 19896+
4a4d8108
AM
19897+/*
19898+ * decide the branch where we operate for @dentry. the branch index will be set
19899+ * @rbcpup. after diciding it, 'pin' it and store the timestamps of the parent
19900+ * dir for reverting.
19901+ * when a new whiteout is necessary, create it.
19902+ */
19903+static struct dentry*
19904+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
19905+ struct au_dtime *dt, struct au_pin *pin)
1308ab2a 19906+{
4a4d8108
AM
19907+ struct dentry *wh_dentry;
19908+ struct super_block *sb;
19909+ struct path h_path;
19910+ int err, need_wh;
19911+ unsigned int udba;
19912+ aufs_bindex_t bcpup;
dece6358 19913+
4a4d8108
AM
19914+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
19915+ wh_dentry = ERR_PTR(need_wh);
19916+ if (unlikely(need_wh < 0))
19917+ goto out;
19918+
19919+ sb = dentry->d_sb;
19920+ udba = au_opt_udba(sb);
19921+ bcpup = *rbcpup;
19922+ err = au_pin(pin, dentry, bcpup, udba,
19923+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
19924+ wh_dentry = ERR_PTR(err);
19925+ if (unlikely(err))
19926+ goto out;
19927+
19928+ h_path.dentry = au_pinned_h_parent(pin);
19929+ if (udba != AuOpt_UDBA_NONE
19930+ && au_dbstart(dentry) == bcpup) {
19931+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
19932+ wh_dentry = ERR_PTR(err);
19933+ if (unlikely(err))
19934+ goto out_unpin;
19935+ }
19936+
19937+ h_path.mnt = au_sbr_mnt(sb, bcpup);
19938+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
19939+ wh_dentry = NULL;
19940+ if (!need_wh)
19941+ goto out; /* success, no need to create whiteout */
19942+
19943+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
19944+ if (IS_ERR(wh_dentry))
19945+ goto out_unpin;
19946+
19947+ /* returns with the parent is locked and wh_dentry is dget-ed */
19948+ goto out; /* success */
19949+
4f0767ce 19950+out_unpin:
4a4d8108 19951+ au_unpin(pin);
4f0767ce 19952+out:
4a4d8108 19953+ return wh_dentry;
1facf9fc 19954+}
19955+
4a4d8108
AM
19956+/*
19957+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
19958+ * in order to be revertible and save time for removing many child whiteouts
19959+ * under the dir.
19960+ * returns 1 when there are too many child whiteout and caller should remove
19961+ * them asynchronously. returns 0 when the number of children is enough small to
19962+ * remove now or the branch fs is a remote fs.
19963+ * otherwise return an error.
19964+ */
19965+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
19966+ struct au_nhash *whlist, struct inode *dir)
1facf9fc 19967+{
4a4d8108
AM
19968+ int rmdir_later, err, dirwh;
19969+ struct dentry *h_dentry;
19970+ struct super_block *sb;
19971+
19972+ sb = dentry->d_sb;
19973+ SiMustAnyLock(sb);
19974+ h_dentry = au_h_dptr(dentry, bindex);
19975+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
19976+ if (unlikely(err))
19977+ goto out;
19978+
19979+ /* stop monitoring */
19980+ au_hn_free(au_hi(dentry->d_inode, bindex));
19981+
19982+ if (!au_test_fs_remote(h_dentry->d_sb)) {
19983+ dirwh = au_sbi(sb)->si_dirwh;
19984+ rmdir_later = (dirwh <= 1);
19985+ if (!rmdir_later)
19986+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
19987+ dirwh);
19988+ if (rmdir_later)
19989+ return rmdir_later;
19990+ }
1facf9fc 19991+
4a4d8108
AM
19992+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
19993+ if (unlikely(err)) {
523b37e3
AM
19994+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
19995+ h_dentry, bindex, err);
4a4d8108
AM
19996+ err = 0;
19997+ }
dece6358 19998+
4f0767ce 19999+out:
4a4d8108
AM
20000+ AuTraceErr(err);
20001+ return err;
20002+}
1308ab2a 20003+
4a4d8108
AM
20004+/*
20005+ * final procedure for deleting a entry.
20006+ * maintain dentry and iattr.
20007+ */
20008+static void epilog(struct inode *dir, struct dentry *dentry,
20009+ aufs_bindex_t bindex)
20010+{
20011+ struct inode *inode;
1308ab2a 20012+
4a4d8108
AM
20013+ inode = dentry->d_inode;
20014+ d_drop(dentry);
20015+ inode->i_ctime = dir->i_ctime;
1308ab2a 20016+
7f2ca4b1 20017+ au_dir_ts(dir, bindex);
4a4d8108 20018+ dir->i_version++;
1facf9fc 20019+}
20020+
4a4d8108
AM
20021+/*
20022+ * when an error happened, remove the created whiteout and revert everything.
20023+ */
7f207e10
AM
20024+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
20025+ aufs_bindex_t bwh, struct dentry *wh_dentry,
20026+ struct dentry *dentry, struct au_dtime *dt)
1facf9fc 20027+{
4a4d8108
AM
20028+ int rerr;
20029+ struct path h_path = {
20030+ .dentry = wh_dentry,
7f207e10 20031+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
4a4d8108 20032+ };
dece6358 20033+
7f207e10 20034+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
4a4d8108
AM
20035+ if (!rerr) {
20036+ au_set_dbwh(dentry, bwh);
20037+ au_dtime_revert(dt);
20038+ return 0;
20039+ }
dece6358 20040+
523b37e3 20041+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
4a4d8108 20042+ return -EIO;
1facf9fc 20043+}
20044+
4a4d8108 20045+/* ---------------------------------------------------------------------- */
1facf9fc 20046+
4a4d8108 20047+int aufs_unlink(struct inode *dir, struct dentry *dentry)
1308ab2a 20048+{
4a4d8108
AM
20049+ int err;
20050+ aufs_bindex_t bwh, bindex, bstart;
523b37e3 20051+ struct inode *inode, *h_dir, *delegated;
4a4d8108 20052+ struct dentry *parent, *wh_dentry;
c2b27bf2
AM
20053+ /* to reuduce stack size */
20054+ struct {
20055+ struct au_dtime dt;
20056+ struct au_pin pin;
20057+ struct path h_path;
20058+ } *a;
1facf9fc 20059+
4a4d8108 20060+ IMustLock(dir);
027c5e7a 20061+
c2b27bf2
AM
20062+ err = -ENOMEM;
20063+ a = kmalloc(sizeof(*a), GFP_NOFS);
20064+ if (unlikely(!a))
20065+ goto out;
20066+
027c5e7a
AM
20067+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
20068+ if (unlikely(err))
c2b27bf2 20069+ goto out_free;
027c5e7a
AM
20070+ err = au_d_hashed_positive(dentry);
20071+ if (unlikely(err))
20072+ goto out_unlock;
4a4d8108 20073+ inode = dentry->d_inode;
4a4d8108 20074+ IMustLock(inode);
027c5e7a 20075+ err = -EISDIR;
7f2ca4b1 20076+ if (unlikely(d_is_dir(dentry)))
027c5e7a 20077+ goto out_unlock; /* possible? */
1facf9fc 20078+
4a4d8108
AM
20079+ bstart = au_dbstart(dentry);
20080+ bwh = au_dbwh(dentry);
20081+ bindex = -1;
027c5e7a
AM
20082+ parent = dentry->d_parent; /* dir inode is locked */
20083+ di_write_lock_parent(parent);
c2b27bf2
AM
20084+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
20085+ &a->pin);
4a4d8108
AM
20086+ err = PTR_ERR(wh_dentry);
20087+ if (IS_ERR(wh_dentry))
027c5e7a 20088+ goto out_parent;
1facf9fc 20089+
c2b27bf2
AM
20090+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
20091+ a->h_path.dentry = au_h_dptr(dentry, bstart);
20092+ dget(a->h_path.dentry);
4a4d8108 20093+ if (bindex == bstart) {
c2b27bf2 20094+ h_dir = au_pinned_h_dir(&a->pin);
523b37e3
AM
20095+ delegated = NULL;
20096+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
20097+ if (unlikely(err == -EWOULDBLOCK)) {
20098+ pr_warn("cannot retry for NFSv4 delegation"
20099+ " for an internal unlink\n");
20100+ iput(delegated);
20101+ }
4a4d8108
AM
20102+ } else {
20103+ /* dir inode is locked */
20104+ h_dir = wh_dentry->d_parent->d_inode;
20105+ IMustLock(h_dir);
20106+ err = 0;
20107+ }
dece6358 20108+
4a4d8108 20109+ if (!err) {
7f207e10 20110+ vfsub_drop_nlink(inode);
4a4d8108
AM
20111+ epilog(dir, dentry, bindex);
20112+
20113+ /* update target timestamps */
20114+ if (bindex == bstart) {
c2b27bf2
AM
20115+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
20116+ /*ignore*/
20117+ inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
4a4d8108
AM
20118+ } else
20119+ /* todo: this timestamp may be reverted later */
20120+ inode->i_ctime = h_dir->i_ctime;
027c5e7a 20121+ goto out_unpin; /* success */
1facf9fc 20122+ }
20123+
4a4d8108
AM
20124+ /* revert */
20125+ if (wh_dentry) {
20126+ int rerr;
20127+
c2b27bf2
AM
20128+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20129+ &a->dt);
4a4d8108
AM
20130+ if (rerr)
20131+ err = rerr;
dece6358 20132+ }
1facf9fc 20133+
027c5e7a 20134+out_unpin:
c2b27bf2 20135+ au_unpin(&a->pin);
4a4d8108 20136+ dput(wh_dentry);
c2b27bf2 20137+ dput(a->h_path.dentry);
027c5e7a 20138+out_parent:
4a4d8108 20139+ di_write_unlock(parent);
027c5e7a 20140+out_unlock:
4a4d8108 20141+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20142+out_free:
20143+ kfree(a);
027c5e7a 20144+out:
4a4d8108 20145+ return err;
dece6358
AM
20146+}
20147+
4a4d8108 20148+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
1308ab2a 20149+{
4a4d8108
AM
20150+ int err, rmdir_later;
20151+ aufs_bindex_t bwh, bindex, bstart;
4a4d8108
AM
20152+ struct inode *inode;
20153+ struct dentry *parent, *wh_dentry, *h_dentry;
20154+ struct au_whtmp_rmdir *args;
c2b27bf2
AM
20155+ /* to reuduce stack size */
20156+ struct {
20157+ struct au_dtime dt;
20158+ struct au_pin pin;
20159+ } *a;
1facf9fc 20160+
4a4d8108 20161+ IMustLock(dir);
027c5e7a 20162+
c2b27bf2
AM
20163+ err = -ENOMEM;
20164+ a = kmalloc(sizeof(*a), GFP_NOFS);
20165+ if (unlikely(!a))
20166+ goto out;
20167+
027c5e7a
AM
20168+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
20169+ if (unlikely(err))
c2b27bf2 20170+ goto out_free;
53392da6
AM
20171+ err = au_alive_dir(dentry);
20172+ if (unlikely(err))
027c5e7a 20173+ goto out_unlock;
53392da6 20174+ inode = dentry->d_inode;
4a4d8108 20175+ IMustLock(inode);
027c5e7a 20176+ err = -ENOTDIR;
7f2ca4b1 20177+ if (unlikely(!d_is_dir(dentry)))
027c5e7a 20178+ goto out_unlock; /* possible? */
dece6358 20179+
4a4d8108
AM
20180+ err = -ENOMEM;
20181+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
20182+ if (unlikely(!args))
20183+ goto out_unlock;
dece6358 20184+
4a4d8108
AM
20185+ parent = dentry->d_parent; /* dir inode is locked */
20186+ di_write_lock_parent(parent);
20187+ err = au_test_empty(dentry, &args->whlist);
20188+ if (unlikely(err))
027c5e7a 20189+ goto out_parent;
1facf9fc 20190+
4a4d8108
AM
20191+ bstart = au_dbstart(dentry);
20192+ bwh = au_dbwh(dentry);
20193+ bindex = -1;
c2b27bf2
AM
20194+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
20195+ &a->pin);
4a4d8108
AM
20196+ err = PTR_ERR(wh_dentry);
20197+ if (IS_ERR(wh_dentry))
027c5e7a 20198+ goto out_parent;
1facf9fc 20199+
4a4d8108
AM
20200+ h_dentry = au_h_dptr(dentry, bstart);
20201+ dget(h_dentry);
20202+ rmdir_later = 0;
20203+ if (bindex == bstart) {
20204+ err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
20205+ if (err > 0) {
20206+ rmdir_later = err;
20207+ err = 0;
20208+ }
20209+ } else {
20210+ /* stop monitoring */
20211+ au_hn_free(au_hi(inode, bstart));
20212+
20213+ /* dir inode is locked */
20214+ IMustLock(wh_dentry->d_parent->d_inode);
1facf9fc 20215+ err = 0;
20216+ }
20217+
4a4d8108 20218+ if (!err) {
027c5e7a 20219+ vfsub_dead_dir(inode);
4a4d8108
AM
20220+ au_set_dbdiropq(dentry, -1);
20221+ epilog(dir, dentry, bindex);
1308ab2a 20222+
4a4d8108
AM
20223+ if (rmdir_later) {
20224+ au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
20225+ args = NULL;
20226+ }
1308ab2a 20227+
4a4d8108 20228+ goto out_unpin; /* success */
1facf9fc 20229+ }
20230+
4a4d8108
AM
20231+ /* revert */
20232+ AuLabel(revert);
20233+ if (wh_dentry) {
20234+ int rerr;
1308ab2a 20235+
c2b27bf2
AM
20236+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
20237+ &a->dt);
4a4d8108
AM
20238+ if (rerr)
20239+ err = rerr;
1facf9fc 20240+ }
20241+
4f0767ce 20242+out_unpin:
c2b27bf2 20243+ au_unpin(&a->pin);
4a4d8108
AM
20244+ dput(wh_dentry);
20245+ dput(h_dentry);
027c5e7a 20246+out_parent:
4a4d8108
AM
20247+ di_write_unlock(parent);
20248+ if (args)
20249+ au_whtmp_rmdir_free(args);
4f0767ce 20250+out_unlock:
4a4d8108 20251+ aufs_read_unlock(dentry, AuLock_DW);
c2b27bf2
AM
20252+out_free:
20253+ kfree(a);
4f0767ce 20254+out:
4a4d8108
AM
20255+ AuTraceErr(err);
20256+ return err;
dece6358 20257+}
7f207e10
AM
20258diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
20259--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
20260+++ linux/fs/aufs/i_op_ren.c 2016-02-28 11:27:01.280579017 +0100
20261@@ -0,0 +1,1013 @@
1facf9fc 20262+/*
7f2ca4b1 20263+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 20264+ *
20265+ * This program, aufs is free software; you can redistribute it and/or modify
20266+ * it under the terms of the GNU General Public License as published by
20267+ * the Free Software Foundation; either version 2 of the License, or
20268+ * (at your option) any later version.
dece6358
AM
20269+ *
20270+ * This program is distributed in the hope that it will be useful,
20271+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20272+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20273+ * GNU General Public License for more details.
20274+ *
20275+ * You should have received a copy of the GNU General Public License
523b37e3 20276+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 20277+ */
20278+
20279+/*
4a4d8108
AM
20280+ * inode operation (rename entry)
20281+ * todo: this is crazy monster
1facf9fc 20282+ */
20283+
20284+#include "aufs.h"
20285+
4a4d8108
AM
20286+enum { AuSRC, AuDST, AuSrcDst };
20287+enum { AuPARENT, AuCHILD, AuParentChild };
1facf9fc 20288+
4a4d8108
AM
20289+#define AuRen_ISDIR 1
20290+#define AuRen_ISSAMEDIR (1 << 1)
20291+#define AuRen_WHSRC (1 << 2)
20292+#define AuRen_WHDST (1 << 3)
20293+#define AuRen_MNT_WRITE (1 << 4)
20294+#define AuRen_DT_DSTDIR (1 << 5)
20295+#define AuRen_DIROPQ (1 << 6)
4a4d8108 20296+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
7f207e10
AM
20297+#define au_fset_ren(flags, name) \
20298+ do { (flags) |= AuRen_##name; } while (0)
20299+#define au_fclr_ren(flags, name) \
20300+ do { (flags) &= ~AuRen_##name; } while (0)
1facf9fc 20301+
4a4d8108
AM
20302+struct au_ren_args {
20303+ struct {
20304+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
20305+ *wh_dentry;
20306+ struct inode *dir, *inode;
20307+ struct au_hinode *hdir;
20308+ struct au_dtime dt[AuParentChild];
20309+ aufs_bindex_t bstart;
20310+ } sd[AuSrcDst];
1facf9fc 20311+
4a4d8108
AM
20312+#define src_dentry sd[AuSRC].dentry
20313+#define src_dir sd[AuSRC].dir
20314+#define src_inode sd[AuSRC].inode
20315+#define src_h_dentry sd[AuSRC].h_dentry
20316+#define src_parent sd[AuSRC].parent
20317+#define src_h_parent sd[AuSRC].h_parent
20318+#define src_wh_dentry sd[AuSRC].wh_dentry
20319+#define src_hdir sd[AuSRC].hdir
20320+#define src_h_dir sd[AuSRC].hdir->hi_inode
20321+#define src_dt sd[AuSRC].dt
20322+#define src_bstart sd[AuSRC].bstart
1facf9fc 20323+
4a4d8108
AM
20324+#define dst_dentry sd[AuDST].dentry
20325+#define dst_dir sd[AuDST].dir
20326+#define dst_inode sd[AuDST].inode
20327+#define dst_h_dentry sd[AuDST].h_dentry
20328+#define dst_parent sd[AuDST].parent
20329+#define dst_h_parent sd[AuDST].h_parent
20330+#define dst_wh_dentry sd[AuDST].wh_dentry
20331+#define dst_hdir sd[AuDST].hdir
20332+#define dst_h_dir sd[AuDST].hdir->hi_inode
20333+#define dst_dt sd[AuDST].dt
20334+#define dst_bstart sd[AuDST].bstart
20335+
20336+ struct dentry *h_trap;
20337+ struct au_branch *br;
20338+ struct au_hinode *src_hinode;
20339+ struct path h_path;
20340+ struct au_nhash whlist;
027c5e7a 20341+ aufs_bindex_t btgt, src_bwh, src_bdiropq;
1facf9fc 20342+
1308ab2a 20343+ unsigned int flags;
1facf9fc 20344+
4a4d8108
AM
20345+ struct au_whtmp_rmdir *thargs;
20346+ struct dentry *h_dst;
20347+};
1308ab2a 20348+
4a4d8108 20349+/* ---------------------------------------------------------------------- */
1308ab2a 20350+
4a4d8108
AM
20351+/*
20352+ * functions for reverting.
20353+ * when an error happened in a single rename systemcall, we should revert
7f2ca4b1 20354+ * everything as if nothing happened.
4a4d8108
AM
20355+ * we don't need to revert the copied-up/down the parent dir since they are
20356+ * harmless.
20357+ */
1facf9fc 20358+
4a4d8108
AM
20359+#define RevertFailure(fmt, ...) do { \
20360+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
20361+ ##__VA_ARGS__, err, rerr); \
20362+ err = -EIO; \
20363+} while (0)
1facf9fc 20364+
4a4d8108 20365+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
1facf9fc 20366+{
4a4d8108 20367+ int rerr;
1facf9fc 20368+
4a4d8108
AM
20369+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20370+ rerr = au_diropq_remove(a->src_dentry, a->btgt);
20371+ au_hn_imtx_unlock(a->src_hinode);
027c5e7a 20372+ au_set_dbdiropq(a->src_dentry, a->src_bdiropq);
4a4d8108 20373+ if (rerr)
523b37e3 20374+ RevertFailure("remove diropq %pd", a->src_dentry);
4a4d8108 20375+}
1facf9fc 20376+
4a4d8108
AM
20377+static void au_ren_rev_rename(int err, struct au_ren_args *a)
20378+{
20379+ int rerr;
523b37e3 20380+ struct inode *delegated;
1facf9fc 20381+
b4510431
AM
20382+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name,
20383+ a->src_h_parent);
4a4d8108
AM
20384+ rerr = PTR_ERR(a->h_path.dentry);
20385+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20386+ RevertFailure("lkup one %pd", a->src_dentry);
4a4d8108 20387+ return;
1facf9fc 20388+ }
20389+
523b37e3 20390+ delegated = NULL;
4a4d8108
AM
20391+ rerr = vfsub_rename(a->dst_h_dir,
20392+ au_h_dptr(a->src_dentry, a->btgt),
523b37e3
AM
20393+ a->src_h_dir, &a->h_path, &delegated);
20394+ if (unlikely(rerr == -EWOULDBLOCK)) {
20395+ pr_warn("cannot retry for NFSv4 delegation"
20396+ " for an internal rename\n");
20397+ iput(delegated);
20398+ }
4a4d8108
AM
20399+ d_drop(a->h_path.dentry);
20400+ dput(a->h_path.dentry);
20401+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
20402+ if (rerr)
523b37e3 20403+ RevertFailure("rename %pd", a->src_dentry);
1facf9fc 20404+}
20405+
4a4d8108 20406+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
1facf9fc 20407+{
4a4d8108 20408+ int rerr;
523b37e3 20409+ struct inode *delegated;
dece6358 20410+
b4510431
AM
20411+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name,
20412+ a->dst_h_parent);
4a4d8108
AM
20413+ rerr = PTR_ERR(a->h_path.dentry);
20414+ if (IS_ERR(a->h_path.dentry)) {
523b37e3 20415+ RevertFailure("lkup one %pd", a->dst_dentry);
4a4d8108
AM
20416+ return;
20417+ }
20418+ if (a->h_path.dentry->d_inode) {
20419+ d_drop(a->h_path.dentry);
20420+ dput(a->h_path.dentry);
20421+ return;
dece6358
AM
20422+ }
20423+
523b37e3
AM
20424+ delegated = NULL;
20425+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
20426+ &delegated);
20427+ if (unlikely(rerr == -EWOULDBLOCK)) {
20428+ pr_warn("cannot retry for NFSv4 delegation"
20429+ " for an internal rename\n");
20430+ iput(delegated);
20431+ }
4a4d8108
AM
20432+ d_drop(a->h_path.dentry);
20433+ dput(a->h_path.dentry);
20434+ if (!rerr)
20435+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
20436+ else
523b37e3 20437+ RevertFailure("rename %pd", a->h_dst);
4a4d8108 20438+}
1308ab2a 20439+
4a4d8108
AM
20440+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
20441+{
20442+ int rerr;
1308ab2a 20443+
4a4d8108
AM
20444+ a->h_path.dentry = a->src_wh_dentry;
20445+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
027c5e7a 20446+ au_set_dbwh(a->src_dentry, a->src_bwh);
4a4d8108 20447+ if (rerr)
523b37e3 20448+ RevertFailure("unlink %pd", a->src_wh_dentry);
4a4d8108 20449+}
4a4d8108 20450+#undef RevertFailure
1facf9fc 20451+
1308ab2a 20452+/* ---------------------------------------------------------------------- */
20453+
4a4d8108
AM
20454+/*
20455+ * when we have to copyup the renaming entry, do it with the rename-target name
20456+ * in order to minimize the cost (the later actual rename is unnecessary).
20457+ * otherwise rename it on the target branch.
20458+ */
20459+static int au_ren_or_cpup(struct au_ren_args *a)
1facf9fc 20460+{
dece6358 20461+ int err;
4a4d8108 20462+ struct dentry *d;
523b37e3 20463+ struct inode *delegated;
1facf9fc 20464+
4a4d8108
AM
20465+ d = a->src_dentry;
20466+ if (au_dbstart(d) == a->btgt) {
20467+ a->h_path.dentry = a->dst_h_dentry;
20468+ if (au_ftest_ren(a->flags, DIROPQ)
20469+ && au_dbdiropq(d) == a->btgt)
20470+ au_fclr_ren(a->flags, DIROPQ);
20471+ AuDebugOn(au_dbstart(d) != a->btgt);
523b37e3 20472+ delegated = NULL;
4a4d8108 20473+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
523b37e3
AM
20474+ a->dst_h_dir, &a->h_path, &delegated);
20475+ if (unlikely(err == -EWOULDBLOCK)) {
20476+ pr_warn("cannot retry for NFSv4 delegation"
20477+ " for an internal rename\n");
20478+ iput(delegated);
20479+ }
c2b27bf2 20480+ } else
86dc4139 20481+ BUG();
1308ab2a 20482+
027c5e7a
AM
20483+ if (!err && a->h_dst)
20484+ /* it will be set to dinfo later */
20485+ dget(a->h_dst);
1facf9fc 20486+
dece6358
AM
20487+ return err;
20488+}
1facf9fc 20489+
4a4d8108
AM
20490+/* cf. aufs_rmdir() */
20491+static int au_ren_del_whtmp(struct au_ren_args *a)
dece6358 20492+{
4a4d8108
AM
20493+ int err;
20494+ struct inode *dir;
1facf9fc 20495+
4a4d8108
AM
20496+ dir = a->dst_dir;
20497+ SiMustAnyLock(dir->i_sb);
20498+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
20499+ au_sbi(dir->i_sb)->si_dirwh)
20500+ || au_test_fs_remote(a->h_dst->d_sb)) {
20501+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
20502+ if (unlikely(err))
523b37e3
AM
20503+ pr_warn("failed removing whtmp dir %pd (%d), "
20504+ "ignored.\n", a->h_dst, err);
4a4d8108
AM
20505+ } else {
20506+ au_nhash_wh_free(&a->thargs->whlist);
20507+ a->thargs->whlist = a->whlist;
20508+ a->whlist.nh_num = 0;
20509+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
20510+ dput(a->h_dst);
20511+ a->thargs = NULL;
20512+ }
20513+
20514+ return 0;
1308ab2a 20515+}
1facf9fc 20516+
4a4d8108
AM
20517+/* make it 'opaque' dir. */
20518+static int au_ren_diropq(struct au_ren_args *a)
20519+{
20520+ int err;
20521+ struct dentry *diropq;
1facf9fc 20522+
4a4d8108 20523+ err = 0;
027c5e7a 20524+ a->src_bdiropq = au_dbdiropq(a->src_dentry);
4a4d8108
AM
20525+ a->src_hinode = au_hi(a->src_inode, a->btgt);
20526+ au_hn_imtx_lock_nested(a->src_hinode, AuLsc_I_CHILD);
20527+ diropq = au_diropq_create(a->src_dentry, a->btgt);
20528+ au_hn_imtx_unlock(a->src_hinode);
20529+ if (IS_ERR(diropq))
20530+ err = PTR_ERR(diropq);
076b876e
AM
20531+ else
20532+ dput(diropq);
1facf9fc 20533+
4a4d8108
AM
20534+ return err;
20535+}
1facf9fc 20536+
4a4d8108
AM
20537+static int do_rename(struct au_ren_args *a)
20538+{
20539+ int err;
20540+ struct dentry *d, *h_d;
1facf9fc 20541+
4a4d8108
AM
20542+ /* prepare workqueue args for asynchronous rmdir */
20543+ h_d = a->dst_h_dentry;
20544+ if (au_ftest_ren(a->flags, ISDIR) && h_d->d_inode) {
20545+ err = -ENOMEM;
20546+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb, GFP_NOFS);
20547+ if (unlikely(!a->thargs))
20548+ goto out;
20549+ a->h_dst = dget(h_d);
20550+ }
1facf9fc 20551+
4a4d8108
AM
20552+ /* create whiteout for src_dentry */
20553+ if (au_ftest_ren(a->flags, WHSRC)) {
027c5e7a
AM
20554+ a->src_bwh = au_dbwh(a->src_dentry);
20555+ AuDebugOn(a->src_bwh >= 0);
4a4d8108
AM
20556+ a->src_wh_dentry
20557+ = au_wh_create(a->src_dentry, a->btgt, a->src_h_parent);
20558+ err = PTR_ERR(a->src_wh_dentry);
20559+ if (IS_ERR(a->src_wh_dentry))
20560+ goto out_thargs;
20561+ }
1facf9fc 20562+
4a4d8108
AM
20563+ /* lookup whiteout for dentry */
20564+ if (au_ftest_ren(a->flags, WHDST)) {
20565+ h_d = au_wh_lkup(a->dst_h_parent, &a->dst_dentry->d_name,
20566+ a->br);
20567+ err = PTR_ERR(h_d);
20568+ if (IS_ERR(h_d))
20569+ goto out_whsrc;
20570+ if (!h_d->d_inode)
20571+ dput(h_d);
20572+ else
20573+ a->dst_wh_dentry = h_d;
20574+ }
1facf9fc 20575+
4a4d8108
AM
20576+ /* rename dentry to tmpwh */
20577+ if (a->thargs) {
20578+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
20579+ if (unlikely(err))
20580+ goto out_whdst;
dece6358 20581+
4a4d8108
AM
20582+ d = a->dst_dentry;
20583+ au_set_h_dptr(d, a->btgt, NULL);
86dc4139 20584+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
4a4d8108
AM
20585+ if (unlikely(err))
20586+ goto out_whtmp;
20587+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
20588+ }
1facf9fc 20589+
c2b27bf2 20590+ BUG_ON(a->dst_h_dentry->d_inode && a->src_bstart != a->btgt);
1facf9fc 20591+
4a4d8108
AM
20592+ /* rename by vfs_rename or cpup */
20593+ d = a->dst_dentry;
20594+ if (au_ftest_ren(a->flags, ISDIR)
20595+ && (a->dst_wh_dentry
20596+ || au_dbdiropq(d) == a->btgt
20597+ /* hide the lower to keep xino */
20598+ || a->btgt < au_dbend(d)
20599+ || au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
20600+ au_fset_ren(a->flags, DIROPQ);
20601+ err = au_ren_or_cpup(a);
20602+ if (unlikely(err))
20603+ /* leave the copied-up one */
20604+ goto out_whtmp;
1308ab2a 20605+
4a4d8108
AM
20606+ /* make dir opaque */
20607+ if (au_ftest_ren(a->flags, DIROPQ)) {
20608+ err = au_ren_diropq(a);
20609+ if (unlikely(err))
20610+ goto out_rename;
20611+ }
1308ab2a 20612+
4a4d8108
AM
20613+ /* update target timestamps */
20614+ AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
20615+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
20616+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
20617+ a->src_inode->i_ctime = a->h_path.dentry->d_inode->i_ctime;
1facf9fc 20618+
4a4d8108
AM
20619+ /* remove whiteout for dentry */
20620+ if (a->dst_wh_dentry) {
20621+ a->h_path.dentry = a->dst_wh_dentry;
20622+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
20623+ a->dst_dentry);
20624+ if (unlikely(err))
20625+ goto out_diropq;
20626+ }
1facf9fc 20627+
4a4d8108
AM
20628+ /* remove whtmp */
20629+ if (a->thargs)
20630+ au_ren_del_whtmp(a); /* ignore this error */
1308ab2a 20631+
076b876e 20632+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
4a4d8108
AM
20633+ err = 0;
20634+ goto out_success;
20635+
4f0767ce 20636+out_diropq:
4a4d8108
AM
20637+ if (au_ftest_ren(a->flags, DIROPQ))
20638+ au_ren_rev_diropq(err, a);
4f0767ce 20639+out_rename:
7f2ca4b1 20640+ au_ren_rev_rename(err, a);
027c5e7a 20641+ dput(a->h_dst);
4f0767ce 20642+out_whtmp:
4a4d8108
AM
20643+ if (a->thargs)
20644+ au_ren_rev_whtmp(err, a);
4f0767ce 20645+out_whdst:
4a4d8108
AM
20646+ dput(a->dst_wh_dentry);
20647+ a->dst_wh_dentry = NULL;
4f0767ce 20648+out_whsrc:
4a4d8108
AM
20649+ if (a->src_wh_dentry)
20650+ au_ren_rev_whsrc(err, a);
4f0767ce 20651+out_success:
4a4d8108
AM
20652+ dput(a->src_wh_dentry);
20653+ dput(a->dst_wh_dentry);
4f0767ce 20654+out_thargs:
4a4d8108
AM
20655+ if (a->thargs) {
20656+ dput(a->h_dst);
20657+ au_whtmp_rmdir_free(a->thargs);
20658+ a->thargs = NULL;
20659+ }
4f0767ce 20660+out:
4a4d8108 20661+ return err;
dece6358 20662+}
1facf9fc 20663+
1308ab2a 20664+/* ---------------------------------------------------------------------- */
1facf9fc 20665+
4a4d8108
AM
20666+/*
20667+ * test if @dentry dir can be rename destination or not.
20668+ * success means, it is a logically empty dir.
20669+ */
20670+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
1308ab2a 20671+{
4a4d8108 20672+ return au_test_empty(dentry, whlist);
1308ab2a 20673+}
1facf9fc 20674+
4a4d8108
AM
20675+/*
20676+ * test if @dentry dir can be rename source or not.
20677+ * if it can, return 0 and @children is filled.
20678+ * success means,
20679+ * - it is a logically empty dir.
20680+ * - or, it exists on writable branch and has no children including whiteouts
20681+ * on the lower branch.
20682+ */
20683+static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
20684+{
20685+ int err;
20686+ unsigned int rdhash;
20687+ aufs_bindex_t bstart;
1facf9fc 20688+
4a4d8108
AM
20689+ bstart = au_dbstart(dentry);
20690+ if (bstart != btgt) {
20691+ struct au_nhash whlist;
dece6358 20692+
4a4d8108
AM
20693+ SiMustAnyLock(dentry->d_sb);
20694+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
20695+ if (!rdhash)
20696+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
20697+ dentry));
20698+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
20699+ if (unlikely(err))
20700+ goto out;
20701+ err = au_test_empty(dentry, &whlist);
20702+ au_nhash_wh_free(&whlist);
20703+ goto out;
20704+ }
dece6358 20705+
4a4d8108
AM
20706+ if (bstart == au_dbtaildir(dentry))
20707+ return 0; /* success */
dece6358 20708+
4a4d8108 20709+ err = au_test_empty_lower(dentry);
1facf9fc 20710+
4f0767ce 20711+out:
4a4d8108
AM
20712+ if (err == -ENOTEMPTY) {
20713+ AuWarn1("renaming dir who has child(ren) on multiple branches,"
20714+ " is not supported\n");
20715+ err = -EXDEV;
20716+ }
20717+ return err;
20718+}
1308ab2a 20719+
4a4d8108
AM
20720+/* side effect: sets whlist and h_dentry */
20721+static int au_ren_may_dir(struct au_ren_args *a)
1308ab2a 20722+{
4a4d8108
AM
20723+ int err;
20724+ unsigned int rdhash;
20725+ struct dentry *d;
1facf9fc 20726+
4a4d8108
AM
20727+ d = a->dst_dentry;
20728+ SiMustAnyLock(d->d_sb);
1facf9fc 20729+
4a4d8108
AM
20730+ err = 0;
20731+ if (au_ftest_ren(a->flags, ISDIR) && a->dst_inode) {
20732+ rdhash = au_sbi(d->d_sb)->si_rdhash;
20733+ if (!rdhash)
20734+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
20735+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
20736+ if (unlikely(err))
20737+ goto out;
1308ab2a 20738+
4a4d8108
AM
20739+ au_set_dbstart(d, a->dst_bstart);
20740+ err = may_rename_dstdir(d, &a->whlist);
20741+ au_set_dbstart(d, a->btgt);
20742+ }
20743+ a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
20744+ if (unlikely(err))
20745+ goto out;
20746+
20747+ d = a->src_dentry;
20748+ a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
20749+ if (au_ftest_ren(a->flags, ISDIR)) {
20750+ err = may_rename_srcdir(d, a->btgt);
20751+ if (unlikely(err)) {
20752+ au_nhash_wh_free(&a->whlist);
20753+ a->whlist.nh_num = 0;
20754+ }
20755+ }
4f0767ce 20756+out:
4a4d8108 20757+ return err;
1facf9fc 20758+}
20759+
4a4d8108 20760+/* ---------------------------------------------------------------------- */
1facf9fc 20761+
4a4d8108
AM
20762+/*
20763+ * simple tests for rename.
20764+ * following the checks in vfs, plus the parent-child relationship.
20765+ */
20766+static int au_may_ren(struct au_ren_args *a)
20767+{
20768+ int err, isdir;
20769+ struct inode *h_inode;
1facf9fc 20770+
4a4d8108
AM
20771+ if (a->src_bstart == a->btgt) {
20772+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
20773+ au_ftest_ren(a->flags, ISDIR));
20774+ if (unlikely(err))
20775+ goto out;
20776+ err = -EINVAL;
20777+ if (unlikely(a->src_h_dentry == a->h_trap))
20778+ goto out;
20779+ }
1facf9fc 20780+
4a4d8108
AM
20781+ err = 0;
20782+ if (a->dst_bstart != a->btgt)
20783+ goto out;
1facf9fc 20784+
027c5e7a
AM
20785+ err = -ENOTEMPTY;
20786+ if (unlikely(a->dst_h_dentry == a->h_trap))
20787+ goto out;
20788+
4a4d8108
AM
20789+ err = -EIO;
20790+ h_inode = a->dst_h_dentry->d_inode;
20791+ isdir = !!au_ftest_ren(a->flags, ISDIR);
20792+ if (!a->dst_dentry->d_inode) {
20793+ if (unlikely(h_inode))
20794+ goto out;
20795+ err = au_may_add(a->dst_dentry, a->btgt, a->dst_h_parent,
20796+ isdir);
20797+ } else {
20798+ if (unlikely(!h_inode || !h_inode->i_nlink))
20799+ goto out;
20800+ err = au_may_del(a->dst_dentry, a->btgt, a->dst_h_parent,
20801+ isdir);
20802+ if (unlikely(err))
20803+ goto out;
4a4d8108 20804+ }
1facf9fc 20805+
4f0767ce 20806+out:
4a4d8108
AM
20807+ if (unlikely(err == -ENOENT || err == -EEXIST))
20808+ err = -EIO;
20809+ AuTraceErr(err);
20810+ return err;
20811+}
1facf9fc 20812+
1308ab2a 20813+/* ---------------------------------------------------------------------- */
1facf9fc 20814+
4a4d8108
AM
20815+/*
20816+ * locking order
20817+ * (VFS)
20818+ * - src_dir and dir by lock_rename()
20819+ * - inode if exitsts
20820+ * (aufs)
20821+ * - lock all
20822+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
20823+ * + si_read_lock
20824+ * + di_write_lock2_child()
20825+ * + di_write_lock_child()
20826+ * + ii_write_lock_child()
20827+ * + di_write_lock_child2()
20828+ * + ii_write_lock_child2()
20829+ * + src_parent and parent
20830+ * + di_write_lock_parent()
20831+ * + ii_write_lock_parent()
20832+ * + di_write_lock_parent2()
20833+ * + ii_write_lock_parent2()
20834+ * + lower src_dir and dir by vfsub_lock_rename()
20835+ * + verify the every relationships between child and parent. if any
20836+ * of them failed, unlock all and return -EBUSY.
20837+ */
20838+static void au_ren_unlock(struct au_ren_args *a)
1308ab2a 20839+{
4a4d8108
AM
20840+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
20841+ a->dst_h_parent, a->dst_hdir);
86dc4139
AM
20842+ if (au_ftest_ren(a->flags, MNT_WRITE))
20843+ vfsub_mnt_drop_write(au_br_mnt(a->br));
1308ab2a 20844+}
20845+
4a4d8108 20846+static int au_ren_lock(struct au_ren_args *a)
1308ab2a 20847+{
4a4d8108
AM
20848+ int err;
20849+ unsigned int udba;
1308ab2a 20850+
4a4d8108
AM
20851+ err = 0;
20852+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
20853+ a->src_hdir = au_hi(a->src_dir, a->btgt);
20854+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
20855+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
86dc4139
AM
20856+
20857+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
20858+ if (unlikely(err))
20859+ goto out;
20860+ au_fset_ren(a->flags, MNT_WRITE);
4a4d8108
AM
20861+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
20862+ a->dst_h_parent, a->dst_hdir);
20863+ udba = au_opt_udba(a->src_dentry->d_sb);
20864+ if (unlikely(a->src_hdir->hi_inode != a->src_h_parent->d_inode
20865+ || a->dst_hdir->hi_inode != a->dst_h_parent->d_inode))
20866+ err = au_busy_or_stale();
20867+ if (!err && au_dbstart(a->src_dentry) == a->btgt)
20868+ err = au_h_verify(a->src_h_dentry, udba,
20869+ a->src_h_parent->d_inode, a->src_h_parent,
20870+ a->br);
20871+ if (!err && au_dbstart(a->dst_dentry) == a->btgt)
20872+ err = au_h_verify(a->dst_h_dentry, udba,
20873+ a->dst_h_parent->d_inode, a->dst_h_parent,
20874+ a->br);
86dc4139 20875+ if (!err)
4a4d8108 20876+ goto out; /* success */
4a4d8108
AM
20877+
20878+ err = au_busy_or_stale();
4a4d8108 20879+ au_ren_unlock(a);
86dc4139 20880+
4f0767ce 20881+out:
4a4d8108 20882+ return err;
1facf9fc 20883+}
20884+
20885+/* ---------------------------------------------------------------------- */
20886+
4a4d8108 20887+static void au_ren_refresh_dir(struct au_ren_args *a)
1facf9fc 20888+{
4a4d8108 20889+ struct inode *dir;
dece6358 20890+
4a4d8108
AM
20891+ dir = a->dst_dir;
20892+ dir->i_version++;
20893+ if (au_ftest_ren(a->flags, ISDIR)) {
20894+ /* is this updating defined in POSIX? */
20895+ au_cpup_attr_timesizes(a->src_inode);
20896+ au_cpup_attr_nlink(dir, /*force*/1);
4a4d8108 20897+ }
027c5e7a 20898+
7f2ca4b1 20899+ au_dir_ts(dir, a->btgt);
dece6358 20900+
4a4d8108
AM
20901+ if (au_ftest_ren(a->flags, ISSAMEDIR))
20902+ return;
dece6358 20903+
4a4d8108
AM
20904+ dir = a->src_dir;
20905+ dir->i_version++;
20906+ if (au_ftest_ren(a->flags, ISDIR))
20907+ au_cpup_attr_nlink(dir, /*force*/1);
7f2ca4b1 20908+ au_dir_ts(dir, a->btgt);
1facf9fc 20909+}
20910+
4a4d8108 20911+static void au_ren_refresh(struct au_ren_args *a)
1facf9fc 20912+{
4a4d8108
AM
20913+ aufs_bindex_t bend, bindex;
20914+ struct dentry *d, *h_d;
20915+ struct inode *i, *h_i;
20916+ struct super_block *sb;
dece6358 20917+
027c5e7a
AM
20918+ d = a->dst_dentry;
20919+ d_drop(d);
20920+ if (a->h_dst)
20921+ /* already dget-ed by au_ren_or_cpup() */
20922+ au_set_h_dptr(d, a->btgt, a->h_dst);
20923+
20924+ i = a->dst_inode;
20925+ if (i) {
20926+ if (!au_ftest_ren(a->flags, ISDIR))
20927+ vfsub_drop_nlink(i);
20928+ else {
20929+ vfsub_dead_dir(i);
20930+ au_cpup_attr_timesizes(i);
20931+ }
20932+ au_update_dbrange(d, /*do_put_zero*/1);
20933+ } else {
20934+ bend = a->btgt;
20935+ for (bindex = au_dbstart(d); bindex < bend; bindex++)
20936+ au_set_h_dptr(d, bindex, NULL);
20937+ bend = au_dbend(d);
20938+ for (bindex = a->btgt + 1; bindex <= bend; bindex++)
20939+ au_set_h_dptr(d, bindex, NULL);
20940+ au_update_dbrange(d, /*do_put_zero*/0);
20941+ }
20942+
4a4d8108
AM
20943+ d = a->src_dentry;
20944+ au_set_dbwh(d, -1);
20945+ bend = au_dbend(d);
20946+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20947+ h_d = au_h_dptr(d, bindex);
20948+ if (h_d)
20949+ au_set_h_dptr(d, bindex, NULL);
20950+ }
20951+ au_set_dbend(d, a->btgt);
20952+
20953+ sb = d->d_sb;
20954+ i = a->src_inode;
20955+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
20956+ return; /* success */
20957+
20958+ bend = au_ibend(i);
20959+ for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
20960+ h_i = au_h_iptr(i, bindex);
20961+ if (h_i) {
20962+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
20963+ /* ignore this error */
20964+ au_set_h_iptr(i, bindex, NULL, 0);
20965+ }
20966+ }
20967+ au_set_ibend(i, a->btgt);
1308ab2a 20968+}
dece6358 20969+
4a4d8108
AM
20970+/* ---------------------------------------------------------------------- */
20971+
20972+/* mainly for link(2) and rename(2) */
20973+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
1308ab2a 20974+{
4a4d8108
AM
20975+ aufs_bindex_t bdiropq, bwh;
20976+ struct dentry *parent;
20977+ struct au_branch *br;
20978+
20979+ parent = dentry->d_parent;
20980+ IMustLock(parent->d_inode); /* dir is locked */
20981+
20982+ bdiropq = au_dbdiropq(parent);
20983+ bwh = au_dbwh(dentry);
20984+ br = au_sbr(dentry->d_sb, btgt);
20985+ if (au_br_rdonly(br)
20986+ || (0 <= bdiropq && bdiropq < btgt)
20987+ || (0 <= bwh && bwh < btgt))
20988+ btgt = -1;
20989+
20990+ AuDbg("btgt %d\n", btgt);
20991+ return btgt;
1facf9fc 20992+}
20993+
4a4d8108
AM
20994+/* sets src_bstart, dst_bstart and btgt */
20995+static int au_ren_wbr(struct au_ren_args *a)
1facf9fc 20996+{
4a4d8108
AM
20997+ int err;
20998+ struct au_wr_dir_args wr_dir_args = {
20999+ /* .force_btgt = -1, */
21000+ .flags = AuWrDir_ADD_ENTRY
21001+ };
dece6358 21002+
4a4d8108
AM
21003+ a->src_bstart = au_dbstart(a->src_dentry);
21004+ a->dst_bstart = au_dbstart(a->dst_dentry);
21005+ if (au_ftest_ren(a->flags, ISDIR))
21006+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
21007+ wr_dir_args.force_btgt = a->src_bstart;
21008+ if (a->dst_inode && a->dst_bstart < a->src_bstart)
21009+ wr_dir_args.force_btgt = a->dst_bstart;
21010+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
21011+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
21012+ a->btgt = err;
dece6358 21013+
4a4d8108 21014+ return err;
1facf9fc 21015+}
21016+
4a4d8108 21017+static void au_ren_dt(struct au_ren_args *a)
1facf9fc 21018+{
4a4d8108
AM
21019+ a->h_path.dentry = a->src_h_parent;
21020+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
21021+ if (!au_ftest_ren(a->flags, ISSAMEDIR)) {
21022+ a->h_path.dentry = a->dst_h_parent;
21023+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
21024+ }
1facf9fc 21025+
4a4d8108
AM
21026+ au_fclr_ren(a->flags, DT_DSTDIR);
21027+ if (!au_ftest_ren(a->flags, ISDIR))
21028+ return;
dece6358 21029+
4a4d8108
AM
21030+ a->h_path.dentry = a->src_h_dentry;
21031+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
21032+ if (a->dst_h_dentry->d_inode) {
21033+ au_fset_ren(a->flags, DT_DSTDIR);
21034+ a->h_path.dentry = a->dst_h_dentry;
21035+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
21036+ }
1308ab2a 21037+}
dece6358 21038+
4a4d8108 21039+static void au_ren_rev_dt(int err, struct au_ren_args *a)
1308ab2a 21040+{
4a4d8108
AM
21041+ struct dentry *h_d;
21042+ struct mutex *h_mtx;
21043+
21044+ au_dtime_revert(a->src_dt + AuPARENT);
21045+ if (!au_ftest_ren(a->flags, ISSAMEDIR))
21046+ au_dtime_revert(a->dst_dt + AuPARENT);
21047+
21048+ if (au_ftest_ren(a->flags, ISDIR) && err != -EIO) {
21049+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
21050+ h_mtx = &h_d->d_inode->i_mutex;
21051+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
21052+ au_dtime_revert(a->src_dt + AuCHILD);
21053+ mutex_unlock(h_mtx);
21054+
21055+ if (au_ftest_ren(a->flags, DT_DSTDIR)) {
21056+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
21057+ h_mtx = &h_d->d_inode->i_mutex;
21058+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD);
21059+ au_dtime_revert(a->dst_dt + AuCHILD);
21060+ mutex_unlock(h_mtx);
1facf9fc 21061+ }
21062+ }
21063+}
21064+
4a4d8108
AM
21065+/* ---------------------------------------------------------------------- */
21066+
21067+int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
21068+ struct inode *_dst_dir, struct dentry *_dst_dentry)
1facf9fc 21069+{
e49829fe 21070+ int err, flags;
4a4d8108
AM
21071+ /* reduce stack space */
21072+ struct au_ren_args *a;
21073+
523b37e3 21074+ AuDbg("%pd, %pd\n", _src_dentry, _dst_dentry);
4a4d8108
AM
21075+ IMustLock(_src_dir);
21076+ IMustLock(_dst_dir);
21077+
21078+ err = -ENOMEM;
21079+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
21080+ a = kzalloc(sizeof(*a), GFP_NOFS);
21081+ if (unlikely(!a))
21082+ goto out;
21083+
21084+ a->src_dir = _src_dir;
21085+ a->src_dentry = _src_dentry;
21086+ a->src_inode = a->src_dentry->d_inode;
21087+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
21088+ a->dst_dir = _dst_dir;
21089+ a->dst_dentry = _dst_dentry;
21090+ a->dst_inode = a->dst_dentry->d_inode;
21091+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
21092+ if (a->dst_inode) {
21093+ IMustLock(a->dst_inode);
21094+ au_igrab(a->dst_inode);
1facf9fc 21095+ }
1facf9fc 21096+
4a4d8108 21097+ err = -ENOTDIR;
027c5e7a 21098+ flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
7f2ca4b1 21099+ if (d_is_dir(a->src_dentry)) {
4a4d8108 21100+ au_fset_ren(a->flags, ISDIR);
7f2ca4b1
JR
21101+ if (unlikely(d_is_positive(a->dst_dentry)
21102+ && !d_is_dir(a->dst_dentry)))
4a4d8108 21103+ goto out_free;
7f2ca4b1
JR
21104+ flags |= AuLock_DIRS;
21105+ }
21106+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry, flags);
e49829fe
JR
21107+ if (unlikely(err))
21108+ goto out_free;
1facf9fc 21109+
027c5e7a
AM
21110+ err = au_d_hashed_positive(a->src_dentry);
21111+ if (unlikely(err))
21112+ goto out_unlock;
21113+ err = -ENOENT;
21114+ if (a->dst_inode) {
21115+ /*
21116+ * If it is a dir, VFS unhash dst_dentry before this
21117+ * function. It means we cannot rely upon d_unhashed().
21118+ */
21119+ if (unlikely(!a->dst_inode->i_nlink))
21120+ goto out_unlock;
21121+ if (!S_ISDIR(a->dst_inode->i_mode)) {
21122+ err = au_d_hashed_positive(a->dst_dentry);
21123+ if (unlikely(err))
21124+ goto out_unlock;
21125+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
21126+ goto out_unlock;
21127+ } else if (unlikely(d_unhashed(a->dst_dentry)))
21128+ goto out_unlock;
21129+
7eafdf33
AM
21130+ /*
21131+ * is it possible?
7f2ca4b1 21132+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
7eafdf33
AM
21133+ * there may exist a problem somewhere else.
21134+ */
21135+ err = -EINVAL;
21136+ if (unlikely(a->dst_parent->d_inode == a->src_dentry->d_inode))
21137+ goto out_unlock;
21138+
4a4d8108
AM
21139+ au_fset_ren(a->flags, ISSAMEDIR); /* temporary */
21140+ di_write_lock_parent(a->dst_parent);
1facf9fc 21141+
4a4d8108
AM
21142+ /* which branch we process */
21143+ err = au_ren_wbr(a);
21144+ if (unlikely(err < 0))
027c5e7a 21145+ goto out_parent;
4a4d8108 21146+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
86dc4139 21147+ a->h_path.mnt = au_br_mnt(a->br);
1facf9fc 21148+
4a4d8108
AM
21149+ /* are they available to be renamed */
21150+ err = au_ren_may_dir(a);
21151+ if (unlikely(err))
21152+ goto out_children;
1facf9fc 21153+
4a4d8108
AM
21154+ /* prepare the writable parent dir on the same branch */
21155+ if (a->dst_bstart == a->btgt) {
21156+ au_fset_ren(a->flags, WHDST);
21157+ } else {
21158+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
21159+ if (unlikely(err))
21160+ goto out_children;
21161+ }
1facf9fc 21162+
4a4d8108
AM
21163+ if (a->src_dir != a->dst_dir) {
21164+ /*
21165+ * this temporary unlock is safe,
21166+ * because both dir->i_mutex are locked.
21167+ */
21168+ di_write_unlock(a->dst_parent);
21169+ di_write_lock_parent(a->src_parent);
21170+ err = au_wr_dir_need_wh(a->src_dentry,
21171+ au_ftest_ren(a->flags, ISDIR),
21172+ &a->btgt);
21173+ di_write_unlock(a->src_parent);
21174+ di_write_lock2_parent(a->src_parent, a->dst_parent, /*isdir*/1);
21175+ au_fclr_ren(a->flags, ISSAMEDIR);
21176+ } else
21177+ err = au_wr_dir_need_wh(a->src_dentry,
21178+ au_ftest_ren(a->flags, ISDIR),
21179+ &a->btgt);
21180+ if (unlikely(err < 0))
21181+ goto out_children;
21182+ if (err)
21183+ au_fset_ren(a->flags, WHSRC);
1facf9fc 21184+
86dc4139
AM
21185+ /* cpup src */
21186+ if (a->src_bstart != a->btgt) {
86dc4139
AM
21187+ struct au_pin pin;
21188+
21189+ err = au_pin(&pin, a->src_dentry, a->btgt,
21190+ au_opt_udba(a->src_dentry->d_sb),
21191+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
367653fa 21192+ if (!err) {
c2b27bf2
AM
21193+ struct au_cp_generic cpg = {
21194+ .dentry = a->src_dentry,
21195+ .bdst = a->btgt,
21196+ .bsrc = a->src_bstart,
21197+ .len = -1,
21198+ .pin = &pin,
21199+ .flags = AuCpup_DTIME | AuCpup_HOPEN
21200+ };
367653fa 21201+ AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
c2b27bf2 21202+ err = au_sio_cpup_simple(&cpg);
367653fa 21203+ au_unpin(&pin);
86dc4139 21204+ }
86dc4139
AM
21205+ if (unlikely(err))
21206+ goto out_children;
21207+ a->src_bstart = a->btgt;
21208+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
21209+ au_fset_ren(a->flags, WHSRC);
21210+ }
21211+
4a4d8108
AM
21212+ /* lock them all */
21213+ err = au_ren_lock(a);
21214+ if (unlikely(err))
86dc4139 21215+ /* leave the copied-up one */
4a4d8108 21216+ goto out_children;
1facf9fc 21217+
4a4d8108
AM
21218+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
21219+ err = au_may_ren(a);
21220+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
21221+ err = -ENAMETOOLONG;
21222+ if (unlikely(err))
21223+ goto out_hdir;
1facf9fc 21224+
4a4d8108
AM
21225+ /* store timestamps to be revertible */
21226+ au_ren_dt(a);
1facf9fc 21227+
4a4d8108
AM
21228+ /* here we go */
21229+ err = do_rename(a);
21230+ if (unlikely(err))
21231+ goto out_dt;
21232+
21233+ /* update dir attributes */
21234+ au_ren_refresh_dir(a);
21235+
21236+ /* dput/iput all lower dentries */
21237+ au_ren_refresh(a);
21238+
21239+ goto out_hdir; /* success */
21240+
4f0767ce 21241+out_dt:
4a4d8108 21242+ au_ren_rev_dt(err, a);
4f0767ce 21243+out_hdir:
4a4d8108 21244+ au_ren_unlock(a);
4f0767ce 21245+out_children:
4a4d8108 21246+ au_nhash_wh_free(&a->whlist);
027c5e7a
AM
21247+ if (err && a->dst_inode && a->dst_bstart != a->btgt) {
21248+ AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
21249+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
21250+ au_set_dbstart(a->dst_dentry, a->dst_bstart);
4a4d8108 21251+ }
027c5e7a 21252+out_parent:
4a4d8108
AM
21253+ if (!err)
21254+ d_move(a->src_dentry, a->dst_dentry);
027c5e7a
AM
21255+ else {
21256+ au_update_dbstart(a->dst_dentry);
21257+ if (!a->dst_inode)
21258+ d_drop(a->dst_dentry);
21259+ }
4a4d8108
AM
21260+ if (au_ftest_ren(a->flags, ISSAMEDIR))
21261+ di_write_unlock(a->dst_parent);
21262+ else
21263+ di_write_unlock2(a->src_parent, a->dst_parent);
027c5e7a 21264+out_unlock:
4a4d8108 21265+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
4f0767ce 21266+out_free:
4a4d8108
AM
21267+ iput(a->dst_inode);
21268+ if (a->thargs)
21269+ au_whtmp_rmdir_free(a->thargs);
21270+ kfree(a);
4f0767ce 21271+out:
4a4d8108
AM
21272+ AuTraceErr(err);
21273+ return err;
1308ab2a 21274+}
7f207e10
AM
21275diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
21276--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 21277+++ linux/fs/aufs/Kconfig 2016-02-28 11:27:01.277245613 +0100
c1595e42 21278@@ -0,0 +1,185 @@
4a4d8108
AM
21279+config AUFS_FS
21280+ tristate "Aufs (Advanced multi layered unification filesystem) support"
4a4d8108
AM
21281+ help
21282+ Aufs is a stackable unification filesystem such as Unionfs,
21283+ which unifies several directories and provides a merged single
21284+ directory.
21285+ In the early days, aufs was entirely re-designed and
21286+ re-implemented Unionfs Version 1.x series. Introducing many
21287+ original ideas, approaches and improvements, it becomes totally
21288+ different from Unionfs while keeping the basic features.
1facf9fc 21289+
4a4d8108
AM
21290+if AUFS_FS
21291+choice
21292+ prompt "Maximum number of branches"
21293+ default AUFS_BRANCH_MAX_127
21294+ help
21295+ Specifies the maximum number of branches (or member directories)
21296+ in a single aufs. The larger value consumes more system
21297+ resources and has a minor impact to performance.
21298+config AUFS_BRANCH_MAX_127
21299+ bool "127"
21300+ help
21301+ Specifies the maximum number of branches (or member directories)
21302+ in a single aufs. The larger value consumes more system
21303+ resources and has a minor impact to performance.
21304+config AUFS_BRANCH_MAX_511
21305+ bool "511"
21306+ help
21307+ Specifies the maximum number of branches (or member directories)
21308+ in a single aufs. The larger value consumes more system
21309+ resources and has a minor impact to performance.
21310+config AUFS_BRANCH_MAX_1023
21311+ bool "1023"
21312+ help
21313+ Specifies the maximum number of branches (or member directories)
21314+ in a single aufs. The larger value consumes more system
21315+ resources and has a minor impact to performance.
21316+config AUFS_BRANCH_MAX_32767
21317+ bool "32767"
21318+ help
21319+ Specifies the maximum number of branches (or member directories)
21320+ in a single aufs. The larger value consumes more system
21321+ resources and has a minor impact to performance.
21322+endchoice
1facf9fc 21323+
e49829fe
JR
21324+config AUFS_SBILIST
21325+ bool
21326+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
21327+ default y
21328+ help
21329+ Automatic configuration for internal use.
21330+ When aufs supports Magic SysRq or /proc, enabled automatically.
21331+
4a4d8108
AM
21332+config AUFS_HNOTIFY
21333+ bool "Detect direct branch access (bypassing aufs)"
21334+ help
21335+ If you want to modify files on branches directly, eg. bypassing aufs,
21336+ and want aufs to detect the changes of them fully, then enable this
21337+ option and use 'udba=notify' mount option.
7f207e10 21338+ Currently there is only one available configuration, "fsnotify".
4a4d8108
AM
21339+ It will have a negative impact to the performance.
21340+ See detail in aufs.5.
dece6358 21341+
4a4d8108
AM
21342+choice
21343+ prompt "method" if AUFS_HNOTIFY
21344+ default AUFS_HFSNOTIFY
21345+config AUFS_HFSNOTIFY
21346+ bool "fsnotify"
21347+ select FSNOTIFY
4a4d8108 21348+endchoice
1facf9fc 21349+
4a4d8108
AM
21350+config AUFS_EXPORT
21351+ bool "NFS-exportable aufs"
2cbb1c4b 21352+ depends on EXPORTFS
4a4d8108
AM
21353+ help
21354+ If you want to export your mounted aufs via NFS, then enable this
21355+ option. There are several requirements for this configuration.
21356+ See detail in aufs.5.
1facf9fc 21357+
4a4d8108
AM
21358+config AUFS_INO_T_64
21359+ bool
21360+ depends on AUFS_EXPORT
21361+ depends on 64BIT && !(ALPHA || S390)
21362+ default y
21363+ help
21364+ Automatic configuration for internal use.
21365+ /* typedef unsigned long/int __kernel_ino_t */
21366+ /* alpha and s390x are int */
1facf9fc 21367+
c1595e42
JR
21368+config AUFS_XATTR
21369+ bool "support for XATTR/EA (including Security Labels)"
21370+ help
21371+ If your branch fs supports XATTR/EA and you want to make them
21372+ available in aufs too, then enable this opsion and specify the
21373+ branch attributes for EA.
21374+ See detail in aufs.5.
21375+
076b876e
AM
21376+config AUFS_FHSM
21377+ bool "File-based Hierarchical Storage Management"
21378+ help
21379+ Hierarchical Storage Management (or HSM) is a well-known feature
21380+ in the storage world. Aufs provides this feature as file-based.
21381+ with multiple branches.
21382+ These multiple branches are prioritized, ie. the topmost one
21383+ should be the fastest drive and be used heavily.
21384+
4a4d8108
AM
21385+config AUFS_RDU
21386+ bool "Readdir in userspace"
21387+ help
21388+ Aufs has two methods to provide a merged view for a directory,
21389+ by a user-space library and by kernel-space natively. The latter
21390+ is always enabled but sometimes large and slow.
21391+ If you enable this option, install the library in aufs2-util
21392+ package, and set some environment variables for your readdir(3),
21393+ then the work will be handled in user-space which generally
21394+ shows better performance in most cases.
21395+ See detail in aufs.5.
1facf9fc 21396+
4a4d8108
AM
21397+config AUFS_SHWH
21398+ bool "Show whiteouts"
21399+ help
21400+ If you want to make the whiteouts in aufs visible, then enable
21401+ this option and specify 'shwh' mount option. Although it may
21402+ sounds like philosophy or something, but in technically it
21403+ simply shows the name of whiteout with keeping its behaviour.
1facf9fc 21404+
4a4d8108
AM
21405+config AUFS_BR_RAMFS
21406+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
21407+ help
21408+ If you want to use ramfs as an aufs branch fs, then enable this
21409+ option. Generally tmpfs is recommended.
21410+ Aufs prohibited them to be a branch fs by default, because
21411+ initramfs becomes unusable after switch_root or something
21412+ generally. If you sets initramfs as an aufs branch and boot your
21413+ system by switch_root, you will meet a problem easily since the
21414+ files in initramfs may be inaccessible.
21415+ Unless you are going to use ramfs as an aufs branch fs without
21416+ switch_root or something, leave it N.
1facf9fc 21417+
4a4d8108
AM
21418+config AUFS_BR_FUSE
21419+ bool "Fuse fs as an aufs branch"
21420+ depends on FUSE_FS
21421+ select AUFS_POLL
21422+ help
21423+ If you want to use fuse-based userspace filesystem as an aufs
21424+ branch fs, then enable this option.
21425+ It implements the internal poll(2) operation which is
21426+ implemented by fuse only (curretnly).
1facf9fc 21427+
4a4d8108
AM
21428+config AUFS_POLL
21429+ bool
21430+ help
21431+ Automatic configuration for internal use.
1facf9fc 21432+
4a4d8108
AM
21433+config AUFS_BR_HFSPLUS
21434+ bool "Hfsplus as an aufs branch"
21435+ depends on HFSPLUS_FS
21436+ default y
21437+ help
21438+ If you want to use hfsplus fs as an aufs branch fs, then enable
21439+ this option. This option introduces a small overhead at
21440+ copying-up a file on hfsplus.
1facf9fc 21441+
4a4d8108
AM
21442+config AUFS_BDEV_LOOP
21443+ bool
21444+ depends on BLK_DEV_LOOP
21445+ default y
21446+ help
21447+ Automatic configuration for internal use.
21448+ Convert =[ym] into =y.
1308ab2a 21449+
4a4d8108
AM
21450+config AUFS_DEBUG
21451+ bool "Debug aufs"
21452+ help
21453+ Enable this to compile aufs internal debug code.
21454+ It will have a negative impact to the performance.
21455+
21456+config AUFS_MAGIC_SYSRQ
21457+ bool
21458+ depends on AUFS_DEBUG && MAGIC_SYSRQ
21459+ default y
21460+ help
21461+ Automatic configuration for internal use.
21462+ When aufs supports Magic SysRq, enabled automatically.
21463+endif
7f207e10
AM
21464diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
21465--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
21466+++ linux/fs/aufs/loop.c 2016-02-28 11:27:01.280579017 +0100
21467@@ -0,0 +1,146 @@
1facf9fc 21468+/*
7f2ca4b1 21469+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21470+ *
21471+ * This program, aufs is free software; you can redistribute it and/or modify
21472+ * it under the terms of the GNU General Public License as published by
21473+ * the Free Software Foundation; either version 2 of the License, or
21474+ * (at your option) any later version.
dece6358
AM
21475+ *
21476+ * This program is distributed in the hope that it will be useful,
21477+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21478+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21479+ * GNU General Public License for more details.
21480+ *
21481+ * You should have received a copy of the GNU General Public License
523b37e3 21482+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21483+ */
21484+
21485+/*
21486+ * support for loopback block device as a branch
21487+ */
21488+
1facf9fc 21489+#include "aufs.h"
21490+
392086de
AM
21491+/* added into drivers/block/loop.c */
21492+static struct file *(*backing_file_func)(struct super_block *sb);
21493+
1facf9fc 21494+/*
21495+ * test if two lower dentries have overlapping branches.
21496+ */
b752ccd1 21497+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
1facf9fc 21498+{
b752ccd1 21499+ struct super_block *h_sb;
392086de
AM
21500+ struct file *backing_file;
21501+
21502+ if (unlikely(!backing_file_func)) {
21503+ /* don't load "loop" module here */
21504+ backing_file_func = symbol_get(loop_backing_file);
21505+ if (unlikely(!backing_file_func))
21506+ /* "loop" module is not loaded */
21507+ return 0;
21508+ }
1facf9fc 21509+
b752ccd1 21510+ h_sb = h_adding->d_sb;
392086de
AM
21511+ backing_file = backing_file_func(h_sb);
21512+ if (!backing_file)
1facf9fc 21513+ return 0;
21514+
392086de 21515+ h_adding = backing_file->f_dentry;
b752ccd1
AM
21516+ /*
21517+ * h_adding can be local NFS.
21518+ * in this case aufs cannot detect the loop.
21519+ */
21520+ if (unlikely(h_adding->d_sb == sb))
1facf9fc 21521+ return 1;
b752ccd1 21522+ return !!au_test_subdir(h_adding, sb->s_root);
1facf9fc 21523+}
21524+
21525+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
21526+int au_test_loopback_kthread(void)
21527+{
b752ccd1
AM
21528+ int ret;
21529+ struct task_struct *tsk = current;
a2a7ad62 21530+ char c, comm[sizeof(tsk->comm)];
b752ccd1
AM
21531+
21532+ ret = 0;
21533+ if (tsk->flags & PF_KTHREAD) {
a2a7ad62
AM
21534+ get_task_comm(comm, tsk);
21535+ c = comm[4];
b752ccd1 21536+ ret = ('0' <= c && c <= '9'
a2a7ad62 21537+ && !strncmp(comm, "loop", 4));
b752ccd1 21538+ }
1facf9fc 21539+
b752ccd1 21540+ return ret;
1facf9fc 21541+}
87a755f4
AM
21542+
21543+/* ---------------------------------------------------------------------- */
21544+
21545+#define au_warn_loopback_step 16
21546+static int au_warn_loopback_nelem = au_warn_loopback_step;
21547+static unsigned long *au_warn_loopback_array;
21548+
21549+void au_warn_loopback(struct super_block *h_sb)
21550+{
21551+ int i, new_nelem;
21552+ unsigned long *a, magic;
21553+ static DEFINE_SPINLOCK(spin);
21554+
21555+ magic = h_sb->s_magic;
21556+ spin_lock(&spin);
21557+ a = au_warn_loopback_array;
21558+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
21559+ if (a[i] == magic) {
21560+ spin_unlock(&spin);
21561+ return;
21562+ }
21563+
21564+ /* h_sb is new to us, print it */
21565+ if (i < au_warn_loopback_nelem) {
21566+ a[i] = magic;
21567+ goto pr;
21568+ }
21569+
21570+ /* expand the array */
21571+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
21572+ a = au_kzrealloc(au_warn_loopback_array,
21573+ au_warn_loopback_nelem * sizeof(unsigned long),
21574+ new_nelem * sizeof(unsigned long), GFP_ATOMIC);
21575+ if (a) {
21576+ au_warn_loopback_nelem = new_nelem;
21577+ au_warn_loopback_array = a;
21578+ a[i] = magic;
21579+ goto pr;
21580+ }
21581+
21582+ spin_unlock(&spin);
21583+ AuWarn1("realloc failed, ignored\n");
21584+ return;
21585+
21586+pr:
21587+ spin_unlock(&spin);
0c3ec466
AM
21588+ pr_warn("you may want to try another patch for loopback file "
21589+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
87a755f4
AM
21590+}
21591+
21592+int au_loopback_init(void)
21593+{
21594+ int err;
21595+ struct super_block *sb __maybe_unused;
21596+
7f2ca4b1 21597+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(unsigned long));
87a755f4
AM
21598+
21599+ err = 0;
21600+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
21601+ sizeof(unsigned long), GFP_NOFS);
21602+ if (unlikely(!au_warn_loopback_array))
21603+ err = -ENOMEM;
21604+
21605+ return err;
21606+}
21607+
21608+void au_loopback_fin(void)
21609+{
7f2ca4b1
JR
21610+ if (backing_file_func)
21611+ symbol_put(loop_backing_file);
87a755f4
AM
21612+ kfree(au_warn_loopback_array);
21613+}
7f207e10
AM
21614diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
21615--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 21616+++ linux/fs/aufs/loop.h 2016-02-28 11:27:01.280579017 +0100
523b37e3 21617@@ -0,0 +1,52 @@
1facf9fc 21618+/*
7f2ca4b1 21619+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21620+ *
21621+ * This program, aufs is free software; you can redistribute it and/or modify
21622+ * it under the terms of the GNU General Public License as published by
21623+ * the Free Software Foundation; either version 2 of the License, or
21624+ * (at your option) any later version.
dece6358
AM
21625+ *
21626+ * This program is distributed in the hope that it will be useful,
21627+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21628+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21629+ * GNU General Public License for more details.
21630+ *
21631+ * You should have received a copy of the GNU General Public License
523b37e3 21632+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21633+ */
21634+
21635+/*
21636+ * support for loopback mount as a branch
21637+ */
21638+
21639+#ifndef __AUFS_LOOP_H__
21640+#define __AUFS_LOOP_H__
21641+
21642+#ifdef __KERNEL__
21643+
dece6358
AM
21644+struct dentry;
21645+struct super_block;
1facf9fc 21646+
21647+#ifdef CONFIG_AUFS_BDEV_LOOP
392086de
AM
21648+/* drivers/block/loop.c */
21649+struct file *loop_backing_file(struct super_block *sb);
21650+
1facf9fc 21651+/* loop.c */
b752ccd1 21652+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
1facf9fc 21653+int au_test_loopback_kthread(void);
87a755f4
AM
21654+void au_warn_loopback(struct super_block *h_sb);
21655+
21656+int au_loopback_init(void);
21657+void au_loopback_fin(void);
1facf9fc 21658+#else
4a4d8108 21659+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
b752ccd1 21660+ struct dentry *h_adding)
4a4d8108 21661+AuStubInt0(au_test_loopback_kthread, void)
87a755f4
AM
21662+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
21663+
21664+AuStubInt0(au_loopback_init, void)
21665+AuStubVoid(au_loopback_fin, void)
1facf9fc 21666+#endif /* BLK_DEV_LOOP */
21667+
21668+#endif /* __KERNEL__ */
21669+#endif /* __AUFS_LOOP_H__ */
7f207e10
AM
21670diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
21671--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
21672+++ linux/fs/aufs/magic.mk 2016-02-28 11:27:01.280579017 +0100
21673@@ -0,0 +1,30 @@
1facf9fc 21674+
21675+# defined in ${srctree}/fs/fuse/inode.c
21676+# tristate
21677+ifdef CONFIG_FUSE_FS
21678+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
21679+endif
21680+
1facf9fc 21681+# defined in ${srctree}/fs/xfs/xfs_sb.h
21682+# tristate
21683+ifdef CONFIG_XFS_FS
21684+ccflags-y += -DXFS_SB_MAGIC=0x58465342
21685+endif
21686+
21687+# defined in ${srctree}/fs/configfs/mount.c
21688+# tristate
21689+ifdef CONFIG_CONFIGFS_FS
21690+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
21691+endif
21692+
1facf9fc 21693+# defined in ${srctree}/fs/ubifs/ubifs.h
21694+# tristate
21695+ifdef CONFIG_UBIFS_FS
21696+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
21697+endif
4a4d8108
AM
21698+
21699+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
21700+# tristate
21701+ifdef CONFIG_HFSPLUS_FS
21702+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
21703+endif
7f207e10
AM
21704diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
21705--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 21706+++ linux/fs/aufs/Makefile 2016-02-28 11:27:01.277245613 +0100
c1595e42 21707@@ -0,0 +1,44 @@
4a4d8108
AM
21708+
21709+include ${src}/magic.mk
21710+ifeq (${CONFIG_AUFS_FS},m)
21711+include ${src}/conf.mk
21712+endif
21713+-include ${src}/priv_def.mk
21714+
21715+# cf. include/linux/kernel.h
21716+# enable pr_debug
21717+ccflags-y += -DDEBUG
f6c5ef8b
AM
21718+# sparse requires the full pathname
21719+ifdef M
523b37e3 21720+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
f6c5ef8b 21721+else
523b37e3 21722+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
f6c5ef8b 21723+endif
4a4d8108
AM
21724+
21725+obj-$(CONFIG_AUFS_FS) += aufs.o
21726+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
21727+ wkq.o vfsub.o dcsub.o \
e49829fe 21728+ cpup.o whout.o wbr_policy.o \
4a4d8108
AM
21729+ dinfo.o dentry.o \
21730+ dynop.o \
21731+ finfo.o file.o f_op.o \
21732+ dir.o vdir.o \
21733+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
c2b27bf2 21734+ mvdown.o ioctl.o
4a4d8108
AM
21735+
21736+# all are boolean
e49829fe 21737+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
4a4d8108
AM
21738+aufs-$(CONFIG_SYSFS) += sysfs.o
21739+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
21740+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
21741+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
21742+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
4a4d8108 21743+aufs-$(CONFIG_AUFS_EXPORT) += export.o
c1595e42
JR
21744+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
21745+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
076b876e 21746+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
4a4d8108
AM
21747+aufs-$(CONFIG_AUFS_POLL) += poll.o
21748+aufs-$(CONFIG_AUFS_RDU) += rdu.o
4a4d8108
AM
21749+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
21750+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
21751+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
7f207e10
AM
21752diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
21753--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
21754+++ linux/fs/aufs/module.c 2016-02-28 11:27:01.280579017 +0100
21755@@ -0,0 +1,222 @@
1facf9fc 21756+/*
7f2ca4b1 21757+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21758+ *
21759+ * This program, aufs is free software; you can redistribute it and/or modify
21760+ * it under the terms of the GNU General Public License as published by
21761+ * the Free Software Foundation; either version 2 of the License, or
21762+ * (at your option) any later version.
dece6358
AM
21763+ *
21764+ * This program is distributed in the hope that it will be useful,
21765+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21766+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21767+ * GNU General Public License for more details.
21768+ *
21769+ * You should have received a copy of the GNU General Public License
523b37e3 21770+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21771+ */
21772+
21773+/*
21774+ * module global variables and operations
21775+ */
21776+
21777+#include <linux/module.h>
21778+#include <linux/seq_file.h>
21779+#include "aufs.h"
21780+
21781+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
21782+{
21783+ if (new_sz <= nused)
21784+ return p;
21785+
21786+ p = krealloc(p, new_sz, gfp);
21787+ if (p)
21788+ memset(p + nused, 0, new_sz - nused);
21789+ return p;
21790+}
21791+
21792+/* ---------------------------------------------------------------------- */
21793+
21794+/*
21795+ * aufs caches
21796+ */
21797+struct kmem_cache *au_cachep[AuCache_Last];
21798+static int __init au_cache_init(void)
21799+{
4a4d8108 21800+ au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
1facf9fc 21801+ if (au_cachep[AuCache_DINFO])
027c5e7a 21802+ /* SLAB_DESTROY_BY_RCU */
4a4d8108
AM
21803+ au_cachep[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
21804+ au_icntnr_init_once);
1facf9fc 21805+ if (au_cachep[AuCache_ICNTNR])
4a4d8108
AM
21806+ au_cachep[AuCache_FINFO] = AuCacheCtor(au_finfo,
21807+ au_fi_init_once);
1facf9fc 21808+ if (au_cachep[AuCache_FINFO])
21809+ au_cachep[AuCache_VDIR] = AuCache(au_vdir);
21810+ if (au_cachep[AuCache_VDIR])
21811+ au_cachep[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
21812+ if (au_cachep[AuCache_DEHSTR])
21813+ return 0;
21814+
21815+ return -ENOMEM;
21816+}
21817+
21818+static void au_cache_fin(void)
21819+{
21820+ int i;
4a4d8108 21821+
537831f9
AM
21822+ /*
21823+ * Make sure all delayed rcu free inodes are flushed before we
21824+ * destroy cache.
21825+ */
21826+ rcu_barrier();
21827+
7eafdf33
AM
21828+ /* excluding AuCache_HNOTIFY */
21829+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
21830+ for (i = 0; i < AuCache_HNOTIFY; i++)
1facf9fc 21831+ if (au_cachep[i]) {
21832+ kmem_cache_destroy(au_cachep[i]);
21833+ au_cachep[i] = NULL;
21834+ }
21835+}
21836+
21837+/* ---------------------------------------------------------------------- */
21838+
21839+int au_dir_roflags;
21840+
e49829fe 21841+#ifdef CONFIG_AUFS_SBILIST
1e00d052
AM
21842+/*
21843+ * iterate_supers_type() doesn't protect us from
21844+ * remounting (branch management)
21845+ */
e49829fe
JR
21846+struct au_splhead au_sbilist;
21847+#endif
21848+
9dbd164d
AM
21849+struct lock_class_key au_lc_key[AuLcKey_Last];
21850+
1facf9fc 21851+/*
21852+ * functions for module interface.
21853+ */
21854+MODULE_LICENSE("GPL");
21855+/* MODULE_LICENSE("GPL v2"); */
dece6358 21856+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
1facf9fc 21857+MODULE_DESCRIPTION(AUFS_NAME
21858+ " -- Advanced multi layered unification filesystem");
21859+MODULE_VERSION(AUFS_VERSION);
c06a8ce3 21860+MODULE_ALIAS_FS(AUFS_NAME);
1facf9fc 21861+
1facf9fc 21862+/* this module parameter has no meaning when SYSFS is disabled */
21863+int sysaufs_brs = 1;
21864+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
21865+module_param_named(brs, sysaufs_brs, int, S_IRUGO);
21866+
076b876e 21867+/* this module parameter has no meaning when USER_NS is disabled */
7f2ca4b1 21868+bool au_userns;
076b876e
AM
21869+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
21870+module_param_named(allow_userns, au_userns, bool, S_IRUGO);
21871+
1facf9fc 21872+/* ---------------------------------------------------------------------- */
21873+
21874+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
21875+
21876+int au_seq_path(struct seq_file *seq, struct path *path)
21877+{
7f2ca4b1
JR
21878+ int err;
21879+
21880+ err = seq_path(seq, path, au_esc_chars);
21881+ if (err > 0)
21882+ err = 0;
21883+ else if (err < 0)
21884+ err = -ENOMEM;
21885+
21886+ return err;
1facf9fc 21887+}
21888+
21889+/* ---------------------------------------------------------------------- */
21890+
21891+static int __init aufs_init(void)
21892+{
21893+ int err, i;
21894+ char *p;
21895+
21896+ p = au_esc_chars;
21897+ for (i = 1; i <= ' '; i++)
21898+ *p++ = i;
21899+ *p++ = '\\';
21900+ *p++ = '\x7f';
21901+ *p = 0;
21902+
21903+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
21904+
7f2ca4b1
JR
21905+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
21906+ for (i = 0; i < AuIop_Last; i++)
21907+ aufs_iop_nogetattr[i].getattr = NULL;
21908+
e49829fe 21909+ au_sbilist_init();
1facf9fc 21910+ sysaufs_brs_init();
21911+ au_debug_init();
4a4d8108 21912+ au_dy_init();
1facf9fc 21913+ err = sysaufs_init();
21914+ if (unlikely(err))
21915+ goto out;
e49829fe 21916+ err = au_procfs_init();
4f0767ce 21917+ if (unlikely(err))
953406b4 21918+ goto out_sysaufs;
e49829fe
JR
21919+ err = au_wkq_init();
21920+ if (unlikely(err))
21921+ goto out_procfs;
87a755f4 21922+ err = au_loopback_init();
1facf9fc 21923+ if (unlikely(err))
21924+ goto out_wkq;
87a755f4
AM
21925+ err = au_hnotify_init();
21926+ if (unlikely(err))
21927+ goto out_loopback;
1facf9fc 21928+ err = au_sysrq_init();
21929+ if (unlikely(err))
21930+ goto out_hin;
21931+ err = au_cache_init();
21932+ if (unlikely(err))
21933+ goto out_sysrq;
076b876e
AM
21934+
21935+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
1facf9fc 21936+ err = register_filesystem(&aufs_fs_type);
21937+ if (unlikely(err))
21938+ goto out_cache;
076b876e 21939+
4a4d8108
AM
21940+ /* since we define pr_fmt, call printk directly */
21941+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
1facf9fc 21942+ goto out; /* success */
21943+
4f0767ce 21944+out_cache:
1facf9fc 21945+ au_cache_fin();
4f0767ce 21946+out_sysrq:
1facf9fc 21947+ au_sysrq_fin();
4f0767ce 21948+out_hin:
4a4d8108 21949+ au_hnotify_fin();
87a755f4
AM
21950+out_loopback:
21951+ au_loopback_fin();
4f0767ce 21952+out_wkq:
1facf9fc 21953+ au_wkq_fin();
e49829fe
JR
21954+out_procfs:
21955+ au_procfs_fin();
4f0767ce 21956+out_sysaufs:
1facf9fc 21957+ sysaufs_fin();
4a4d8108 21958+ au_dy_fin();
4f0767ce 21959+out:
1facf9fc 21960+ return err;
21961+}
21962+
21963+static void __exit aufs_exit(void)
21964+{
21965+ unregister_filesystem(&aufs_fs_type);
21966+ au_cache_fin();
21967+ au_sysrq_fin();
4a4d8108 21968+ au_hnotify_fin();
87a755f4 21969+ au_loopback_fin();
1facf9fc 21970+ au_wkq_fin();
e49829fe 21971+ au_procfs_fin();
1facf9fc 21972+ sysaufs_fin();
4a4d8108 21973+ au_dy_fin();
1facf9fc 21974+}
21975+
21976+module_init(aufs_init);
21977+module_exit(aufs_exit);
7f207e10
AM
21978diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
21979--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
21980+++ linux/fs/aufs/module.h 2016-02-28 11:27:01.280579017 +0100
21981@@ -0,0 +1,105 @@
1facf9fc 21982+/*
7f2ca4b1 21983+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 21984+ *
21985+ * This program, aufs is free software; you can redistribute it and/or modify
21986+ * it under the terms of the GNU General Public License as published by
21987+ * the Free Software Foundation; either version 2 of the License, or
21988+ * (at your option) any later version.
dece6358
AM
21989+ *
21990+ * This program is distributed in the hope that it will be useful,
21991+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21992+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21993+ * GNU General Public License for more details.
21994+ *
21995+ * You should have received a copy of the GNU General Public License
523b37e3 21996+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 21997+ */
21998+
21999+/*
22000+ * module initialization and module-global
22001+ */
22002+
22003+#ifndef __AUFS_MODULE_H__
22004+#define __AUFS_MODULE_H__
22005+
22006+#ifdef __KERNEL__
22007+
22008+#include <linux/slab.h>
22009+
dece6358
AM
22010+struct path;
22011+struct seq_file;
22012+
1facf9fc 22013+/* module parameters */
1facf9fc 22014+extern int sysaufs_brs;
7f2ca4b1 22015+extern bool au_userns;
1facf9fc 22016+
22017+/* ---------------------------------------------------------------------- */
22018+
22019+extern int au_dir_roflags;
22020+
9dbd164d
AM
22021+enum {
22022+ AuLcNonDir_FIINFO,
22023+ AuLcNonDir_DIINFO,
22024+ AuLcNonDir_IIINFO,
22025+
22026+ AuLcDir_FIINFO,
22027+ AuLcDir_DIINFO,
22028+ AuLcDir_IIINFO,
22029+
22030+ AuLcSymlink_DIINFO,
22031+ AuLcSymlink_IIINFO,
22032+
22033+ AuLcKey_Last
22034+};
22035+extern struct lock_class_key au_lc_key[AuLcKey_Last];
22036+
1facf9fc 22037+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
22038+int au_seq_path(struct seq_file *seq, struct path *path);
22039+
e49829fe
JR
22040+#ifdef CONFIG_PROC_FS
22041+/* procfs.c */
22042+int __init au_procfs_init(void);
22043+void au_procfs_fin(void);
22044+#else
22045+AuStubInt0(au_procfs_init, void);
22046+AuStubVoid(au_procfs_fin, void);
22047+#endif
22048+
4f0767ce
JR
22049+/* ---------------------------------------------------------------------- */
22050+
22051+/* kmem cache */
1facf9fc 22052+enum {
22053+ AuCache_DINFO,
22054+ AuCache_ICNTNR,
22055+ AuCache_FINFO,
22056+ AuCache_VDIR,
22057+ AuCache_DEHSTR,
7eafdf33 22058+ AuCache_HNOTIFY, /* must be last */
1facf9fc 22059+ AuCache_Last
22060+};
22061+
4a4d8108
AM
22062+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
22063+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
22064+#define AuCacheCtor(type, ctor) \
22065+ kmem_cache_create(#type, sizeof(struct type), \
22066+ __alignof__(struct type), AuCacheFlags, ctor)
1facf9fc 22067+
22068+extern struct kmem_cache *au_cachep[];
22069+
22070+#define AuCacheFuncs(name, index) \
4a4d8108 22071+static inline struct au_##name *au_cache_alloc_##name(void) \
1facf9fc 22072+{ return kmem_cache_alloc(au_cachep[AuCache_##index], GFP_NOFS); } \
4a4d8108 22073+static inline void au_cache_free_##name(struct au_##name *p) \
1facf9fc 22074+{ kmem_cache_free(au_cachep[AuCache_##index], p); }
22075+
22076+AuCacheFuncs(dinfo, DINFO);
22077+AuCacheFuncs(icntnr, ICNTNR);
22078+AuCacheFuncs(finfo, FINFO);
22079+AuCacheFuncs(vdir, VDIR);
4a4d8108
AM
22080+AuCacheFuncs(vdir_dehstr, DEHSTR);
22081+#ifdef CONFIG_AUFS_HNOTIFY
22082+AuCacheFuncs(hnotify, HNOTIFY);
22083+#endif
1facf9fc 22084+
4a4d8108
AM
22085+#endif /* __KERNEL__ */
22086+#endif /* __AUFS_MODULE_H__ */
c2b27bf2
AM
22087diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
22088--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
22089+++ linux/fs/aufs/mvdown.c 2016-02-28 11:27:01.280579017 +0100
22090@@ -0,0 +1,703 @@
c2b27bf2 22091+/*
7f2ca4b1 22092+ * Copyright (C) 2011-2016 Junjiro R. Okajima
c2b27bf2
AM
22093+ *
22094+ * This program, aufs is free software; you can redistribute it and/or modify
22095+ * it under the terms of the GNU General Public License as published by
22096+ * the Free Software Foundation; either version 2 of the License, or
22097+ * (at your option) any later version.
22098+ *
22099+ * This program is distributed in the hope that it will be useful,
22100+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22101+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22102+ * GNU General Public License for more details.
22103+ *
22104+ * You should have received a copy of the GNU General Public License
523b37e3
AM
22105+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
22106+ */
22107+
22108+/*
22109+ * move-down, opposite of copy-up
c2b27bf2
AM
22110+ */
22111+
22112+#include "aufs.h"
22113+
c2b27bf2
AM
22114+struct au_mvd_args {
22115+ struct {
c2b27bf2
AM
22116+ struct super_block *h_sb;
22117+ struct dentry *h_parent;
22118+ struct au_hinode *hdir;
392086de 22119+ struct inode *h_dir, *h_inode;
c1595e42 22120+ struct au_pin pin;
c2b27bf2
AM
22121+ } info[AUFS_MVDOWN_NARRAY];
22122+
22123+ struct aufs_mvdown mvdown;
22124+ struct dentry *dentry, *parent;
22125+ struct inode *inode, *dir;
22126+ struct super_block *sb;
22127+ aufs_bindex_t bopq, bwh, bfound;
22128+ unsigned char rename_lock;
c2b27bf2
AM
22129+};
22130+
392086de 22131+#define mvd_errno mvdown.au_errno
076b876e
AM
22132+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
22133+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
22134+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
22135+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
c2b27bf2 22136+
392086de
AM
22137+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
22138+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
22139+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
22140+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
22141+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
c1595e42 22142+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
392086de
AM
22143+
22144+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
22145+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
22146+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
22147+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
22148+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
c1595e42 22149+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
c2b27bf2
AM
22150+
22151+#define AU_MVD_PR(flag, ...) do { \
22152+ if (flag) \
22153+ pr_err(__VA_ARGS__); \
22154+ } while (0)
22155+
076b876e
AM
22156+static int find_lower_writable(struct au_mvd_args *a)
22157+{
22158+ struct super_block *sb;
22159+ aufs_bindex_t bindex, bend;
22160+ struct au_branch *br;
22161+
22162+ sb = a->sb;
22163+ bindex = a->mvd_bsrc;
22164+ bend = au_sbend(sb);
22165+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
22166+ for (bindex++; bindex <= bend; bindex++) {
22167+ br = au_sbr(sb, bindex);
22168+ if (au_br_fhsm(br->br_perm)
22169+ && (!(au_br_sb(br)->s_flags & MS_RDONLY)))
22170+ return bindex;
22171+ }
22172+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
22173+ for (bindex++; bindex <= bend; bindex++) {
22174+ br = au_sbr(sb, bindex);
22175+ if (!au_br_rdonly(br))
22176+ return bindex;
22177+ }
22178+ else
22179+ for (bindex++; bindex <= bend; bindex++) {
22180+ br = au_sbr(sb, bindex);
22181+ if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
22182+ if (au_br_rdonly(br))
22183+ a->mvdown.flags
22184+ |= AUFS_MVDOWN_ROLOWER_R;
22185+ return bindex;
22186+ }
22187+ }
22188+
22189+ return -1;
22190+}
22191+
c2b27bf2 22192+/* make the parent dir on bdst */
392086de 22193+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22194+{
22195+ int err;
22196+
22197+ err = 0;
22198+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
22199+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
22200+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
22201+ a->mvd_h_dst_parent = NULL;
22202+ if (au_dbend(a->parent) >= a->mvd_bdst)
22203+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22204+ if (!a->mvd_h_dst_parent) {
22205+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
22206+ if (unlikely(err)) {
392086de 22207+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
c2b27bf2
AM
22208+ goto out;
22209+ }
22210+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
22211+ }
22212+
22213+out:
22214+ AuTraceErr(err);
22215+ return err;
22216+}
22217+
22218+/* lock them all */
392086de 22219+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22220+{
22221+ int err;
22222+ struct dentry *h_trap;
22223+
22224+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
22225+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
c1595e42
JR
22226+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
22227+ au_opt_udba(a->sb),
22228+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22229+ AuTraceErr(err);
22230+ if (unlikely(err)) {
22231+ AU_MVD_PR(dmsg, "pin_dst failed\n");
22232+ goto out;
22233+ }
22234+
c2b27bf2
AM
22235+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
22236+ a->rename_lock = 0;
c1595e42
JR
22237+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22238+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
22239+ au_opt_udba(a->sb),
22240+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22241+ err = au_do_pin(&a->mvd_pin_src);
22242+ AuTraceErr(err);
22243+ a->mvd_h_src_dir = a->mvd_h_src_parent->d_inode;
22244+ if (unlikely(err)) {
22245+ AU_MVD_PR(dmsg, "pin_src failed\n");
22246+ goto out_dst;
22247+ }
22248+ goto out; /* success */
c2b27bf2
AM
22249+ }
22250+
c2b27bf2 22251+ a->rename_lock = 1;
c1595e42
JR
22252+ au_pin_hdir_unlock(&a->mvd_pin_dst);
22253+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
22254+ au_opt_udba(a->sb),
22255+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
22256+ AuTraceErr(err);
22257+ a->mvd_h_src_dir = a->mvd_h_src_parent->d_inode;
22258+ if (unlikely(err)) {
22259+ AU_MVD_PR(dmsg, "pin_src failed\n");
22260+ au_pin_hdir_lock(&a->mvd_pin_dst);
22261+ goto out_dst;
22262+ }
22263+ au_pin_hdir_unlock(&a->mvd_pin_src);
c2b27bf2
AM
22264+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22265+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22266+ if (h_trap) {
22267+ err = (h_trap != a->mvd_h_src_parent);
22268+ if (err)
22269+ err = (h_trap != a->mvd_h_dst_parent);
22270+ }
22271+ BUG_ON(err); /* it should never happen */
c1595e42
JR
22272+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
22273+ err = -EBUSY;
22274+ AuTraceErr(err);
22275+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22276+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
22277+ au_pin_hdir_lock(&a->mvd_pin_src);
22278+ au_unpin(&a->mvd_pin_src);
22279+ au_pin_hdir_lock(&a->mvd_pin_dst);
22280+ goto out_dst;
22281+ }
22282+ goto out; /* success */
c2b27bf2 22283+
c1595e42
JR
22284+out_dst:
22285+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22286+out:
22287+ AuTraceErr(err);
22288+ return err;
22289+}
22290+
392086de 22291+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2 22292+{
c1595e42
JR
22293+ if (!a->rename_lock)
22294+ au_unpin(&a->mvd_pin_src);
22295+ else {
c2b27bf2
AM
22296+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
22297+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
c1595e42
JR
22298+ au_pin_hdir_lock(&a->mvd_pin_src);
22299+ au_unpin(&a->mvd_pin_src);
22300+ au_pin_hdir_lock(&a->mvd_pin_dst);
22301+ }
22302+ au_unpin(&a->mvd_pin_dst);
c2b27bf2
AM
22303+}
22304+
22305+/* copy-down the file */
392086de 22306+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22307+{
22308+ int err;
22309+ struct au_cp_generic cpg = {
22310+ .dentry = a->dentry,
22311+ .bdst = a->mvd_bdst,
22312+ .bsrc = a->mvd_bsrc,
22313+ .len = -1,
c1595e42 22314+ .pin = &a->mvd_pin_dst,
c2b27bf2
AM
22315+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22316+ };
22317+
22318+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
392086de
AM
22319+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22320+ au_fset_cpup(cpg.flags, OVERWRITE);
22321+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
22322+ au_fset_cpup(cpg.flags, RWDST);
c2b27bf2
AM
22323+ err = au_sio_cpdown_simple(&cpg);
22324+ if (unlikely(err))
392086de 22325+ AU_MVD_PR(dmsg, "cpdown failed\n");
c2b27bf2
AM
22326+
22327+ AuTraceErr(err);
22328+ return err;
22329+}
22330+
22331+/*
22332+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
22333+ * were sleeping
22334+ */
392086de 22335+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22336+{
22337+ int err;
22338+ struct path h_path;
22339+ struct au_branch *br;
523b37e3 22340+ struct inode *delegated;
c2b27bf2
AM
22341+
22342+ br = au_sbr(a->sb, a->mvd_bdst);
22343+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
22344+ err = PTR_ERR(h_path.dentry);
22345+ if (IS_ERR(h_path.dentry)) {
392086de 22346+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
c2b27bf2
AM
22347+ goto out;
22348+ }
22349+
22350+ err = 0;
22351+ if (h_path.dentry->d_inode) {
22352+ h_path.mnt = au_br_mnt(br);
523b37e3 22353+ delegated = NULL;
c2b27bf2 22354+ err = vfsub_unlink(a->mvd_h_dst_parent->d_inode, &h_path,
523b37e3
AM
22355+ &delegated, /*force*/0);
22356+ if (unlikely(err == -EWOULDBLOCK)) {
22357+ pr_warn("cannot retry for NFSv4 delegation"
22358+ " for an internal unlink\n");
22359+ iput(delegated);
22360+ }
c2b27bf2 22361+ if (unlikely(err))
392086de 22362+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
c2b27bf2
AM
22363+ }
22364+ dput(h_path.dentry);
22365+
22366+out:
22367+ AuTraceErr(err);
22368+ return err;
22369+}
22370+
22371+/*
22372+ * unlink the topmost h_dentry
c2b27bf2 22373+ */
392086de 22374+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22375+{
22376+ int err;
22377+ struct path h_path;
523b37e3 22378+ struct inode *delegated;
c2b27bf2
AM
22379+
22380+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
22381+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
523b37e3
AM
22382+ delegated = NULL;
22383+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
22384+ if (unlikely(err == -EWOULDBLOCK)) {
22385+ pr_warn("cannot retry for NFSv4 delegation"
22386+ " for an internal unlink\n");
22387+ iput(delegated);
22388+ }
c2b27bf2 22389+ if (unlikely(err))
392086de 22390+ AU_MVD_PR(dmsg, "unlink failed\n");
c2b27bf2
AM
22391+
22392+ AuTraceErr(err);
22393+ return err;
22394+}
22395+
076b876e
AM
22396+/* Since mvdown succeeded, we ignore an error of this function */
22397+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
22398+{
22399+ int err;
22400+ struct au_branch *br;
22401+
22402+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
22403+ br = au_sbr(a->sb, a->mvd_bsrc);
22404+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
22405+ if (!err) {
22406+ br = au_sbr(a->sb, a->mvd_bdst);
22407+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
22408+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
22409+ }
22410+ if (!err)
22411+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
22412+ else
22413+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
22414+}
22415+
c2b27bf2
AM
22416+/*
22417+ * copy-down the file and unlink the bsrc file.
22418+ * - unlink the bdst whout if exist
22419+ * - copy-down the file (with whtmp name and rename)
22420+ * - unlink the bsrc file
22421+ */
392086de 22422+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22423+{
22424+ int err;
22425+
392086de 22426+ err = au_do_mkdir(dmsg, a);
c2b27bf2 22427+ if (!err)
392086de 22428+ err = au_do_lock(dmsg, a);
c2b27bf2
AM
22429+ if (unlikely(err))
22430+ goto out;
22431+
22432+ /*
22433+ * do not revert the activities we made on bdst since they should be
22434+ * harmless in aufs.
22435+ */
22436+
392086de 22437+ err = au_do_cpdown(dmsg, a);
c2b27bf2 22438+ if (!err)
392086de
AM
22439+ err = au_do_unlink_wh(dmsg, a);
22440+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
22441+ err = au_do_unlink(dmsg, a);
c2b27bf2
AM
22442+ if (unlikely(err))
22443+ goto out_unlock;
22444+
c1595e42
JR
22445+ AuDbg("%pd2, 0x%x, %d --> %d\n",
22446+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
076b876e
AM
22447+ if (find_lower_writable(a) < 0)
22448+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
22449+
22450+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
22451+ au_do_stfs(dmsg, a);
22452+
c2b27bf2 22453+ /* maintain internal array */
392086de
AM
22454+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
22455+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
22456+ au_set_dbstart(a->dentry, a->mvd_bdst);
22457+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
22458+ au_set_ibstart(a->inode, a->mvd_bdst);
7f2ca4b1
JR
22459+ } else {
22460+ /* hide the lower */
22461+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
22462+ au_set_dbend(a->dentry, a->mvd_bsrc);
22463+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
22464+ au_set_ibend(a->inode, a->mvd_bsrc);
392086de 22465+ }
c2b27bf2
AM
22466+ if (au_dbend(a->dentry) < a->mvd_bdst)
22467+ au_set_dbend(a->dentry, a->mvd_bdst);
c2b27bf2
AM
22468+ if (au_ibend(a->inode) < a->mvd_bdst)
22469+ au_set_ibend(a->inode, a->mvd_bdst);
22470+
22471+out_unlock:
392086de 22472+ au_do_unlock(dmsg, a);
c2b27bf2
AM
22473+out:
22474+ AuTraceErr(err);
22475+ return err;
22476+}
22477+
22478+/* ---------------------------------------------------------------------- */
22479+
c2b27bf2 22480+/* make sure the file is idle */
392086de 22481+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22482+{
22483+ int err, plinked;
c2b27bf2
AM
22484+
22485+ err = 0;
c2b27bf2
AM
22486+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
22487+ if (au_dbstart(a->dentry) == a->mvd_bsrc
c1595e42 22488+ && au_dcount(a->dentry) == 1
c2b27bf2 22489+ && atomic_read(&a->inode->i_count) == 1
392086de 22490+ /* && a->mvd_h_src_inode->i_nlink == 1 */
c2b27bf2
AM
22491+ && (!plinked || !au_plink_test(a->inode))
22492+ && a->inode->i_nlink == 1)
22493+ goto out;
22494+
22495+ err = -EBUSY;
392086de 22496+ AU_MVD_PR(dmsg,
c1595e42
JR
22497+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
22498+ a->mvd_bsrc, au_dbstart(a->dentry), au_dcount(a->dentry),
c2b27bf2 22499+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
392086de 22500+ a->mvd_h_src_inode->i_nlink,
c2b27bf2
AM
22501+ plinked, plinked ? au_plink_test(a->inode) : 0);
22502+
22503+out:
22504+ AuTraceErr(err);
22505+ return err;
22506+}
22507+
22508+/* make sure the parent dir is fine */
392086de 22509+static int au_mvd_args_parent(const unsigned char dmsg,
c2b27bf2
AM
22510+ struct au_mvd_args *a)
22511+{
22512+ int err;
22513+ aufs_bindex_t bindex;
22514+
22515+ err = 0;
22516+ if (unlikely(au_alive_dir(a->parent))) {
22517+ err = -ENOENT;
392086de 22518+ AU_MVD_PR(dmsg, "parent dir is dead\n");
c2b27bf2
AM
22519+ goto out;
22520+ }
22521+
22522+ a->bopq = au_dbdiropq(a->parent);
22523+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
22524+ AuDbg("b%d\n", bindex);
22525+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
22526+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
22527+ err = -EINVAL;
392086de
AM
22528+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
22529+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
c2b27bf2
AM
22530+ a->bopq, a->mvd_bdst);
22531+ }
22532+
22533+out:
22534+ AuTraceErr(err);
22535+ return err;
22536+}
22537+
392086de 22538+static int au_mvd_args_intermediate(const unsigned char dmsg,
c2b27bf2
AM
22539+ struct au_mvd_args *a)
22540+{
22541+ int err;
22542+ struct au_dinfo *dinfo, *tmp;
22543+
22544+ /* lookup the next lower positive entry */
22545+ err = -ENOMEM;
22546+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
22547+ if (unlikely(!tmp))
22548+ goto out;
22549+
22550+ a->bfound = -1;
22551+ a->bwh = -1;
22552+ dinfo = au_di(a->dentry);
22553+ au_di_cp(tmp, dinfo);
22554+ au_di_swap(tmp, dinfo);
22555+
22556+ /* returns the number of positive dentries */
22557+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1, /*type*/0);
22558+ if (!err)
22559+ a->bwh = au_dbwh(a->dentry);
22560+ else if (err > 0)
22561+ a->bfound = au_dbstart(a->dentry);
22562+
22563+ au_di_swap(tmp, dinfo);
22564+ au_rw_write_unlock(&tmp->di_rwsem);
22565+ au_di_free(tmp);
22566+ if (unlikely(err < 0))
392086de 22567+ AU_MVD_PR(dmsg, "failed look-up lower\n");
c2b27bf2
AM
22568+
22569+ /*
22570+ * here, we have these cases.
22571+ * bfound == -1
22572+ * no positive dentry under bsrc. there are more sub-cases.
22573+ * bwh < 0
22574+ * there no whiteout, we can safely move-down.
22575+ * bwh <= bsrc
22576+ * impossible
22577+ * bsrc < bwh && bwh < bdst
22578+ * there is a whiteout on RO branch. cannot proceed.
22579+ * bwh == bdst
22580+ * there is a whiteout on the RW target branch. it should
22581+ * be removed.
22582+ * bdst < bwh
22583+ * there is a whiteout somewhere unrelated branch.
22584+ * -1 < bfound && bfound <= bsrc
22585+ * impossible.
22586+ * bfound < bdst
22587+ * found, but it is on RO branch between bsrc and bdst. cannot
22588+ * proceed.
22589+ * bfound == bdst
22590+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
22591+ * error.
22592+ * bdst < bfound
22593+ * found, after we create the file on bdst, it will be hidden.
22594+ */
22595+
22596+ AuDebugOn(a->bfound == -1
22597+ && a->bwh != -1
22598+ && a->bwh <= a->mvd_bsrc);
22599+ AuDebugOn(-1 < a->bfound
22600+ && a->bfound <= a->mvd_bsrc);
22601+
22602+ err = -EINVAL;
22603+ if (a->bfound == -1
22604+ && a->mvd_bsrc < a->bwh
22605+ && a->bwh != -1
22606+ && a->bwh < a->mvd_bdst) {
392086de
AM
22607+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
22608+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
c2b27bf2
AM
22609+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
22610+ goto out;
22611+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
392086de
AM
22612+ a->mvd_errno = EAU_MVDOWN_UPPER;
22613+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
c2b27bf2
AM
22614+ a->mvd_bdst, a->bfound);
22615+ goto out;
22616+ }
22617+
22618+ err = 0; /* success */
22619+
22620+out:
22621+ AuTraceErr(err);
22622+ return err;
22623+}
22624+
392086de 22625+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22626+{
22627+ int err;
22628+
392086de
AM
22629+ err = 0;
22630+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
22631+ && a->bfound == a->mvd_bdst)
22632+ err = -EEXIST;
c2b27bf2
AM
22633+ AuTraceErr(err);
22634+ return err;
22635+}
22636+
392086de 22637+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
c2b27bf2
AM
22638+{
22639+ int err;
22640+ struct au_branch *br;
22641+
22642+ err = -EISDIR;
22643+ if (unlikely(S_ISDIR(a->inode->i_mode)))
22644+ goto out;
22645+
22646+ err = -EINVAL;
392086de
AM
22647+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
22648+ a->mvd_bsrc = au_ibstart(a->inode);
22649+ else {
22650+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
22651+ if (unlikely(a->mvd_bsrc < 0
22652+ || (a->mvd_bsrc < au_dbstart(a->dentry)
22653+ || au_dbend(a->dentry) < a->mvd_bsrc
22654+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
22655+ || (a->mvd_bsrc < au_ibstart(a->inode)
22656+ || au_ibend(a->inode) < a->mvd_bsrc
22657+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
22658+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
22659+ AU_MVD_PR(dmsg, "no upper\n");
22660+ goto out;
22661+ }
22662+ }
c2b27bf2 22663+ if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) {
392086de
AM
22664+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22665+ AU_MVD_PR(dmsg, "on the bottom\n");
c2b27bf2
AM
22666+ goto out;
22667+ }
392086de 22668+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
c2b27bf2
AM
22669+ br = au_sbr(a->sb, a->mvd_bsrc);
22670+ err = au_br_rdonly(br);
392086de
AM
22671+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
22672+ if (unlikely(err))
22673+ goto out;
22674+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
22675+ || IS_APPEND(a->mvd_h_src_inode))) {
22676+ if (err)
22677+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
22678+ /* go on */
22679+ } else
c2b27bf2
AM
22680+ goto out;
22681+
22682+ err = -EINVAL;
392086de
AM
22683+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
22684+ a->mvd_bdst = find_lower_writable(a);
22685+ if (unlikely(a->mvd_bdst < 0)) {
22686+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
22687+ AU_MVD_PR(dmsg, "no writable lower branch\n");
22688+ goto out;
22689+ }
22690+ } else {
22691+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
22692+ if (unlikely(a->mvd_bdst < 0
22693+ || au_sbend(a->sb) < a->mvd_bdst)) {
22694+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
22695+ AU_MVD_PR(dmsg, "no lower brid\n");
22696+ goto out;
22697+ }
c2b27bf2
AM
22698+ }
22699+
392086de 22700+ err = au_mvd_args_busy(dmsg, a);
c2b27bf2 22701+ if (!err)
392086de 22702+ err = au_mvd_args_parent(dmsg, a);
c2b27bf2 22703+ if (!err)
392086de 22704+ err = au_mvd_args_intermediate(dmsg, a);
c2b27bf2 22705+ if (!err)
392086de 22706+ err = au_mvd_args_exist(dmsg, a);
c2b27bf2
AM
22707+ if (!err)
22708+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
22709+
22710+out:
22711+ AuTraceErr(err);
22712+ return err;
22713+}
22714+
22715+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
22716+{
392086de
AM
22717+ int err, e;
22718+ unsigned char dmsg;
22719+ struct au_mvd_args *args;
7f2ca4b1 22720+ struct inode *inode;
c2b27bf2 22721+
7f2ca4b1 22722+ inode = dentry->d_inode;
c2b27bf2
AM
22723+ err = -EPERM;
22724+ if (unlikely(!capable(CAP_SYS_ADMIN)))
22725+ goto out;
22726+
392086de
AM
22727+ err = -ENOMEM;
22728+ args = kmalloc(sizeof(*args), GFP_NOFS);
22729+ if (unlikely(!args))
22730+ goto out;
22731+
22732+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
22733+ if (!err)
22734+ err = !access_ok(VERIFY_WRITE, uarg, sizeof(*uarg));
c2b27bf2
AM
22735+ if (unlikely(err)) {
22736+ err = -EFAULT;
392086de
AM
22737+ AuTraceErr(err);
22738+ goto out_free;
c2b27bf2 22739+ }
392086de
AM
22740+ AuDbg("flags 0x%x\n", args->mvdown.flags);
22741+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
22742+ args->mvdown.au_errno = 0;
22743+ args->dentry = dentry;
7f2ca4b1 22744+ args->inode = inode;
392086de 22745+ args->sb = dentry->d_sb;
c2b27bf2 22746+
392086de
AM
22747+ err = -ENOENT;
22748+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
22749+ args->parent = dget_parent(dentry);
22750+ args->dir = args->parent->d_inode;
22751+ mutex_lock_nested(&args->dir->i_mutex, I_MUTEX_PARENT);
22752+ dput(args->parent);
22753+ if (unlikely(args->parent != dentry->d_parent)) {
22754+ AU_MVD_PR(dmsg, "parent dir is moved\n");
c2b27bf2
AM
22755+ goto out_dir;
22756+ }
22757+
7f2ca4b1
JR
22758+ mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
22759+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
c2b27bf2
AM
22760+ if (unlikely(err))
22761+ goto out_inode;
22762+
392086de
AM
22763+ di_write_lock_parent(args->parent);
22764+ err = au_mvd_args(dmsg, args);
c2b27bf2
AM
22765+ if (unlikely(err))
22766+ goto out_parent;
22767+
392086de 22768+ err = au_do_mvdown(dmsg, args);
c2b27bf2
AM
22769+ if (unlikely(err))
22770+ goto out_parent;
c2b27bf2 22771+
392086de 22772+ au_cpup_attr_timesizes(args->dir);
7f2ca4b1
JR
22773+ au_cpup_attr_timesizes(inode);
22774+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
22775+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
c2b27bf2
AM
22776+ /* au_digen_dec(dentry); */
22777+
22778+out_parent:
392086de 22779+ di_write_unlock(args->parent);
c2b27bf2
AM
22780+ aufs_read_unlock(dentry, AuLock_DW);
22781+out_inode:
7f2ca4b1 22782+ mutex_unlock(&inode->i_mutex);
c2b27bf2 22783+out_dir:
392086de
AM
22784+ mutex_unlock(&args->dir->i_mutex);
22785+out_free:
22786+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
22787+ if (unlikely(e))
22788+ err = -EFAULT;
22789+ kfree(args);
c2b27bf2
AM
22790+out:
22791+ AuTraceErr(err);
22792+ return err;
22793+}
22794diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
22795--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
22796+++ linux/fs/aufs/opts.c 2016-02-28 11:27:01.280579017 +0100
22797@@ -0,0 +1,1878 @@
1facf9fc 22798+/*
7f2ca4b1 22799+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 22800+ *
22801+ * This program, aufs is free software; you can redistribute it and/or modify
22802+ * it under the terms of the GNU General Public License as published by
22803+ * the Free Software Foundation; either version 2 of the License, or
22804+ * (at your option) any later version.
dece6358
AM
22805+ *
22806+ * This program is distributed in the hope that it will be useful,
22807+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
22808+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22809+ * GNU General Public License for more details.
22810+ *
22811+ * You should have received a copy of the GNU General Public License
523b37e3 22812+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 22813+ */
22814+
22815+/*
22816+ * mount options/flags
22817+ */
22818+
dece6358 22819+#include <linux/namei.h>
1facf9fc 22820+#include <linux/types.h> /* a distribution requires */
22821+#include <linux/parser.h>
22822+#include "aufs.h"
22823+
22824+/* ---------------------------------------------------------------------- */
22825+
22826+enum {
22827+ Opt_br,
7f2ca4b1
JR
22828+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
22829+ Opt_idel, Opt_imod,
22830+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
dece6358 22831+ Opt_rdblk_def, Opt_rdhash_def,
7f2ca4b1 22832+ Opt_xino, Opt_noxino,
1facf9fc 22833+ Opt_trunc_xino, Opt_trunc_xino_v, Opt_notrunc_xino,
22834+ Opt_trunc_xino_path, Opt_itrunc_xino,
22835+ Opt_trunc_xib, Opt_notrunc_xib,
dece6358 22836+ Opt_shwh, Opt_noshwh,
1facf9fc 22837+ Opt_plink, Opt_noplink, Opt_list_plink,
22838+ Opt_udba,
4a4d8108 22839+ Opt_dio, Opt_nodio,
1facf9fc 22840+ Opt_diropq_a, Opt_diropq_w,
22841+ Opt_warn_perm, Opt_nowarn_perm,
22842+ Opt_wbr_copyup, Opt_wbr_create,
076b876e 22843+ Opt_fhsm_sec,
1facf9fc 22844+ Opt_refrof, Opt_norefrof,
22845+ Opt_verbose, Opt_noverbose,
22846+ Opt_sum, Opt_nosum, Opt_wsum,
076b876e 22847+ Opt_dirperm1, Opt_nodirperm1,
c1595e42 22848+ Opt_acl, Opt_noacl,
1facf9fc 22849+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
22850+};
22851+
22852+static match_table_t options = {
22853+ {Opt_br, "br=%s"},
22854+ {Opt_br, "br:%s"},
22855+
22856+ {Opt_add, "add=%d:%s"},
22857+ {Opt_add, "add:%d:%s"},
22858+ {Opt_add, "ins=%d:%s"},
22859+ {Opt_add, "ins:%d:%s"},
22860+ {Opt_append, "append=%s"},
22861+ {Opt_append, "append:%s"},
22862+ {Opt_prepend, "prepend=%s"},
22863+ {Opt_prepend, "prepend:%s"},
22864+
22865+ {Opt_del, "del=%s"},
22866+ {Opt_del, "del:%s"},
22867+ /* {Opt_idel, "idel:%d"}, */
22868+ {Opt_mod, "mod=%s"},
22869+ {Opt_mod, "mod:%s"},
22870+ /* {Opt_imod, "imod:%d:%s"}, */
22871+
22872+ {Opt_dirwh, "dirwh=%d"},
22873+
22874+ {Opt_xino, "xino=%s"},
22875+ {Opt_noxino, "noxino"},
22876+ {Opt_trunc_xino, "trunc_xino"},
22877+ {Opt_trunc_xino_v, "trunc_xino_v=%d:%d"},
22878+ {Opt_notrunc_xino, "notrunc_xino"},
22879+ {Opt_trunc_xino_path, "trunc_xino=%s"},
22880+ {Opt_itrunc_xino, "itrunc_xino=%d"},
22881+ /* {Opt_zxino, "zxino=%s"}, */
22882+ {Opt_trunc_xib, "trunc_xib"},
22883+ {Opt_notrunc_xib, "notrunc_xib"},
22884+
e49829fe 22885+#ifdef CONFIG_PROC_FS
1facf9fc 22886+ {Opt_plink, "plink"},
e49829fe
JR
22887+#else
22888+ {Opt_ignore_silent, "plink"},
22889+#endif
22890+
1facf9fc 22891+ {Opt_noplink, "noplink"},
e49829fe 22892+
1facf9fc 22893+#ifdef CONFIG_AUFS_DEBUG
22894+ {Opt_list_plink, "list_plink"},
22895+#endif
22896+
22897+ {Opt_udba, "udba=%s"},
22898+
4a4d8108
AM
22899+ {Opt_dio, "dio"},
22900+ {Opt_nodio, "nodio"},
22901+
076b876e
AM
22902+#ifdef CONFIG_AUFS_FHSM
22903+ {Opt_fhsm_sec, "fhsm_sec=%d"},
22904+#else
22905+ {Opt_ignore_silent, "fhsm_sec=%d"},
22906+#endif
22907+
1facf9fc 22908+ {Opt_diropq_a, "diropq=always"},
22909+ {Opt_diropq_a, "diropq=a"},
22910+ {Opt_diropq_w, "diropq=whiteouted"},
22911+ {Opt_diropq_w, "diropq=w"},
22912+
22913+ {Opt_warn_perm, "warn_perm"},
22914+ {Opt_nowarn_perm, "nowarn_perm"},
22915+
22916+ /* keep them temporary */
1facf9fc 22917+ {Opt_ignore_silent, "nodlgt"},
1facf9fc 22918+ {Opt_ignore_silent, "clean_plink"},
22919+
dece6358
AM
22920+#ifdef CONFIG_AUFS_SHWH
22921+ {Opt_shwh, "shwh"},
22922+#endif
22923+ {Opt_noshwh, "noshwh"},
22924+
076b876e
AM
22925+ {Opt_dirperm1, "dirperm1"},
22926+ {Opt_nodirperm1, "nodirperm1"},
22927+
1facf9fc 22928+ {Opt_refrof, "refrof"},
22929+ {Opt_norefrof, "norefrof"},
22930+
22931+ {Opt_verbose, "verbose"},
22932+ {Opt_verbose, "v"},
22933+ {Opt_noverbose, "noverbose"},
22934+ {Opt_noverbose, "quiet"},
22935+ {Opt_noverbose, "q"},
22936+ {Opt_noverbose, "silent"},
22937+
22938+ {Opt_sum, "sum"},
22939+ {Opt_nosum, "nosum"},
22940+ {Opt_wsum, "wsum"},
22941+
22942+ {Opt_rdcache, "rdcache=%d"},
22943+ {Opt_rdblk, "rdblk=%d"},
dece6358 22944+ {Opt_rdblk_def, "rdblk=def"},
1facf9fc 22945+ {Opt_rdhash, "rdhash=%d"},
dece6358 22946+ {Opt_rdhash_def, "rdhash=def"},
1facf9fc 22947+
22948+ {Opt_wbr_create, "create=%s"},
22949+ {Opt_wbr_create, "create_policy=%s"},
22950+ {Opt_wbr_copyup, "cpup=%s"},
22951+ {Opt_wbr_copyup, "copyup=%s"},
22952+ {Opt_wbr_copyup, "copyup_policy=%s"},
22953+
c1595e42
JR
22954+ /* generic VFS flag */
22955+#ifdef CONFIG_FS_POSIX_ACL
22956+ {Opt_acl, "acl"},
22957+ {Opt_noacl, "noacl"},
22958+#else
22959+ {Opt_ignore_silent, "acl"},
22960+ {Opt_ignore_silent, "noacl"},
22961+#endif
22962+
1facf9fc 22963+ /* internal use for the scripts */
22964+ {Opt_ignore_silent, "si=%s"},
22965+
22966+ {Opt_br, "dirs=%s"},
22967+ {Opt_ignore, "debug=%d"},
22968+ {Opt_ignore, "delete=whiteout"},
22969+ {Opt_ignore, "delete=all"},
22970+ {Opt_ignore, "imap=%s"},
22971+
1308ab2a 22972+ /* temporary workaround, due to old mount(8)? */
22973+ {Opt_ignore_silent, "relatime"},
22974+
1facf9fc 22975+ {Opt_err, NULL}
22976+};
22977+
22978+/* ---------------------------------------------------------------------- */
22979+
076b876e 22980+static const char *au_parser_pattern(int val, match_table_t tbl)
1facf9fc 22981+{
076b876e
AM
22982+ struct match_token *p;
22983+
22984+ p = tbl;
22985+ while (p->pattern) {
22986+ if (p->token == val)
22987+ return p->pattern;
22988+ p++;
1facf9fc 22989+ }
22990+ BUG();
22991+ return "??";
22992+}
22993+
076b876e
AM
22994+static const char *au_optstr(int *val, match_table_t tbl)
22995+{
22996+ struct match_token *p;
22997+ int v;
22998+
22999+ v = *val;
7f2ca4b1
JR
23000+ if (!v)
23001+ goto out;
076b876e 23002+ p = tbl;
7f2ca4b1
JR
23003+ while (p->pattern) {
23004+ if (p->token
23005+ && (v & p->token) == p->token) {
076b876e
AM
23006+ *val &= ~p->token;
23007+ return p->pattern;
23008+ }
23009+ p++;
23010+ }
7f2ca4b1
JR
23011+
23012+out:
076b876e
AM
23013+ return NULL;
23014+}
23015+
1facf9fc 23016+/* ---------------------------------------------------------------------- */
23017+
1e00d052 23018+static match_table_t brperm = {
1facf9fc 23019+ {AuBrPerm_RO, AUFS_BRPERM_RO},
23020+ {AuBrPerm_RR, AUFS_BRPERM_RR},
23021+ {AuBrPerm_RW, AUFS_BRPERM_RW},
1e00d052
AM
23022+ {0, NULL}
23023+};
1facf9fc 23024+
86dc4139 23025+static match_table_t brattr = {
076b876e
AM
23026+ /* general */
23027+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
23028+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
c1595e42 23029+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
86dc4139 23030+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
7f2ca4b1 23031+#ifdef CONFIG_AUFS_FHSM
076b876e 23032+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
7f2ca4b1
JR
23033+#endif
23034+#ifdef CONFIG_AUFS_XATTR
c1595e42
JR
23035+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
23036+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
23037+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
23038+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
23039+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
23040+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
7f2ca4b1 23041+#endif
076b876e
AM
23042+
23043+ /* ro/rr branch */
1e00d052 23044+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
076b876e
AM
23045+
23046+ /* rw branch */
23047+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
1e00d052 23048+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
076b876e 23049+
1e00d052 23050+ {0, NULL}
1facf9fc 23051+};
23052+
1e00d052
AM
23053+static int br_attr_val(char *str, match_table_t table, substring_t args[])
23054+{
23055+ int attr, v;
23056+ char *p;
23057+
23058+ attr = 0;
23059+ do {
23060+ p = strchr(str, '+');
23061+ if (p)
23062+ *p = 0;
23063+ v = match_token(str, table, args);
076b876e
AM
23064+ if (v) {
23065+ if (v & AuBrAttr_CMOO_Mask)
23066+ attr &= ~AuBrAttr_CMOO_Mask;
1e00d052 23067+ attr |= v;
076b876e 23068+ } else {
1e00d052
AM
23069+ if (p)
23070+ *p = '+';
0c3ec466 23071+ pr_warn("ignored branch attribute %s\n", str);
1e00d052
AM
23072+ break;
23073+ }
23074+ if (p)
23075+ str = p + 1;
23076+ } while (p);
23077+
23078+ return attr;
23079+}
23080+
076b876e
AM
23081+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
23082+{
23083+ int sz;
23084+ const char *p;
23085+ char *q;
23086+
076b876e
AM
23087+ q = str->a;
23088+ *q = 0;
23089+ p = au_optstr(&perm, brattr);
23090+ if (p) {
23091+ sz = strlen(p);
23092+ memcpy(q, p, sz + 1);
23093+ q += sz;
23094+ } else
23095+ goto out;
23096+
23097+ do {
23098+ p = au_optstr(&perm, brattr);
23099+ if (p) {
23100+ *q++ = '+';
23101+ sz = strlen(p);
23102+ memcpy(q, p, sz + 1);
23103+ q += sz;
23104+ }
23105+ } while (p);
23106+
23107+out:
c1595e42 23108+ return q - str->a;
076b876e
AM
23109+}
23110+
4a4d8108 23111+static int noinline_for_stack br_perm_val(char *perm)
1facf9fc 23112+{
076b876e
AM
23113+ int val, bad, sz;
23114+ char *p;
1facf9fc 23115+ substring_t args[MAX_OPT_ARGS];
076b876e 23116+ au_br_perm_str_t attr;
1facf9fc 23117+
1e00d052
AM
23118+ p = strchr(perm, '+');
23119+ if (p)
23120+ *p = 0;
23121+ val = match_token(perm, brperm, args);
23122+ if (!val) {
23123+ if (p)
23124+ *p = '+';
0c3ec466 23125+ pr_warn("ignored branch permission %s\n", perm);
1e00d052
AM
23126+ val = AuBrPerm_RO;
23127+ goto out;
23128+ }
23129+ if (!p)
23130+ goto out;
23131+
076b876e
AM
23132+ val |= br_attr_val(p + 1, brattr, args);
23133+
23134+ bad = 0;
86dc4139 23135+ switch (val & AuBrPerm_Mask) {
1e00d052
AM
23136+ case AuBrPerm_RO:
23137+ case AuBrPerm_RR:
076b876e
AM
23138+ bad = val & AuBrWAttr_Mask;
23139+ val &= ~AuBrWAttr_Mask;
1e00d052
AM
23140+ break;
23141+ case AuBrPerm_RW:
076b876e
AM
23142+ bad = val & AuBrRAttr_Mask;
23143+ val &= ~AuBrRAttr_Mask;
1e00d052
AM
23144+ break;
23145+ }
c1595e42
JR
23146+
23147+ /*
23148+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
23149+ * does not treat it as an error, just warning.
23150+ * this is a tiny guard for the user operation.
23151+ */
23152+ if (val & AuBrAttr_UNPIN) {
23153+ bad |= AuBrAttr_UNPIN;
23154+ val &= ~AuBrAttr_UNPIN;
23155+ }
23156+
076b876e
AM
23157+ if (unlikely(bad)) {
23158+ sz = au_do_optstr_br_attr(&attr, bad);
23159+ AuDebugOn(!sz);
23160+ pr_warn("ignored branch attribute %s\n", attr.a);
23161+ }
1e00d052
AM
23162+
23163+out:
1facf9fc 23164+ return val;
23165+}
23166+
076b876e 23167+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
1facf9fc 23168+{
076b876e
AM
23169+ au_br_perm_str_t attr;
23170+ const char *p;
23171+ char *q;
1e00d052
AM
23172+ int sz;
23173+
076b876e
AM
23174+ q = str->a;
23175+ p = au_optstr(&perm, brperm);
23176+ AuDebugOn(!p || !*p);
23177+ sz = strlen(p);
23178+ memcpy(q, p, sz + 1);
23179+ q += sz;
1e00d052 23180+
076b876e
AM
23181+ sz = au_do_optstr_br_attr(&attr, perm);
23182+ if (sz) {
23183+ *q++ = '+';
23184+ memcpy(q, attr.a, sz + 1);
1e00d052
AM
23185+ }
23186+
076b876e 23187+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
1facf9fc 23188+}
23189+
23190+/* ---------------------------------------------------------------------- */
23191+
23192+static match_table_t udbalevel = {
23193+ {AuOpt_UDBA_REVAL, "reval"},
23194+ {AuOpt_UDBA_NONE, "none"},
4a4d8108
AM
23195+#ifdef CONFIG_AUFS_HNOTIFY
23196+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
23197+#ifdef CONFIG_AUFS_HFSNOTIFY
23198+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
4a4d8108 23199+#endif
1facf9fc 23200+#endif
23201+ {-1, NULL}
23202+};
23203+
4a4d8108 23204+static int noinline_for_stack udba_val(char *str)
1facf9fc 23205+{
23206+ substring_t args[MAX_OPT_ARGS];
23207+
7f207e10 23208+ return match_token(str, udbalevel, args);
1facf9fc 23209+}
23210+
23211+const char *au_optstr_udba(int udba)
23212+{
076b876e 23213+ return au_parser_pattern(udba, udbalevel);
1facf9fc 23214+}
23215+
23216+/* ---------------------------------------------------------------------- */
23217+
23218+static match_table_t au_wbr_create_policy = {
23219+ {AuWbrCreate_TDP, "tdp"},
23220+ {AuWbrCreate_TDP, "top-down-parent"},
23221+ {AuWbrCreate_RR, "rr"},
23222+ {AuWbrCreate_RR, "round-robin"},
23223+ {AuWbrCreate_MFS, "mfs"},
23224+ {AuWbrCreate_MFS, "most-free-space"},
23225+ {AuWbrCreate_MFSV, "mfs:%d"},
23226+ {AuWbrCreate_MFSV, "most-free-space:%d"},
23227+
23228+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
23229+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
23230+ {AuWbrCreate_PMFS, "pmfs"},
23231+ {AuWbrCreate_PMFSV, "pmfs:%d"},
392086de
AM
23232+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
23233+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
1facf9fc 23234+
23235+ {-1, NULL}
23236+};
23237+
dece6358
AM
23238+/*
23239+ * cf. linux/lib/parser.c and cmdline.c
23240+ * gave up calling memparse() since it uses simple_strtoull() instead of
9dbd164d 23241+ * kstrto...().
dece6358 23242+ */
4a4d8108
AM
23243+static int noinline_for_stack
23244+au_match_ull(substring_t *s, unsigned long long *result)
1facf9fc 23245+{
23246+ int err;
23247+ unsigned int len;
23248+ char a[32];
23249+
23250+ err = -ERANGE;
23251+ len = s->to - s->from;
23252+ if (len + 1 <= sizeof(a)) {
23253+ memcpy(a, s->from, len);
23254+ a[len] = '\0';
9dbd164d 23255+ err = kstrtoull(a, 0, result);
1facf9fc 23256+ }
23257+ return err;
23258+}
23259+
23260+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
23261+ struct au_opt_wbr_create *create)
23262+{
23263+ int err;
23264+ unsigned long long ull;
23265+
23266+ err = 0;
23267+ if (!au_match_ull(arg, &ull))
23268+ create->mfsrr_watermark = ull;
23269+ else {
4a4d8108 23270+ pr_err("bad integer in %s\n", str);
1facf9fc 23271+ err = -EINVAL;
23272+ }
23273+
23274+ return err;
23275+}
23276+
23277+static int au_wbr_mfs_sec(substring_t *arg, char *str,
23278+ struct au_opt_wbr_create *create)
23279+{
23280+ int n, err;
23281+
23282+ err = 0;
027c5e7a 23283+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
1facf9fc 23284+ create->mfs_second = n;
23285+ else {
4a4d8108 23286+ pr_err("bad integer in %s\n", str);
1facf9fc 23287+ err = -EINVAL;
23288+ }
23289+
23290+ return err;
23291+}
23292+
4a4d8108
AM
23293+static int noinline_for_stack
23294+au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
1facf9fc 23295+{
23296+ int err, e;
23297+ substring_t args[MAX_OPT_ARGS];
23298+
23299+ err = match_token(str, au_wbr_create_policy, args);
23300+ create->wbr_create = err;
23301+ switch (err) {
23302+ case AuWbrCreate_MFSRRV:
392086de 23303+ case AuWbrCreate_PMFSRRV:
1facf9fc 23304+ e = au_wbr_mfs_wmark(&args[0], str, create);
23305+ if (!e)
23306+ e = au_wbr_mfs_sec(&args[1], str, create);
23307+ if (unlikely(e))
23308+ err = e;
23309+ break;
23310+ case AuWbrCreate_MFSRR:
392086de 23311+ case AuWbrCreate_PMFSRR:
1facf9fc 23312+ e = au_wbr_mfs_wmark(&args[0], str, create);
23313+ if (unlikely(e)) {
23314+ err = e;
23315+ break;
23316+ }
23317+ /*FALLTHROUGH*/
23318+ case AuWbrCreate_MFS:
23319+ case AuWbrCreate_PMFS:
027c5e7a 23320+ create->mfs_second = AUFS_MFS_DEF_SEC;
1facf9fc 23321+ break;
23322+ case AuWbrCreate_MFSV:
23323+ case AuWbrCreate_PMFSV:
23324+ e = au_wbr_mfs_sec(&args[0], str, create);
23325+ if (unlikely(e))
23326+ err = e;
23327+ break;
23328+ }
23329+
23330+ return err;
23331+}
23332+
23333+const char *au_optstr_wbr_create(int wbr_create)
23334+{
076b876e 23335+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
1facf9fc 23336+}
23337+
23338+static match_table_t au_wbr_copyup_policy = {
23339+ {AuWbrCopyup_TDP, "tdp"},
23340+ {AuWbrCopyup_TDP, "top-down-parent"},
23341+ {AuWbrCopyup_BUP, "bup"},
23342+ {AuWbrCopyup_BUP, "bottom-up-parent"},
23343+ {AuWbrCopyup_BU, "bu"},
23344+ {AuWbrCopyup_BU, "bottom-up"},
23345+ {-1, NULL}
23346+};
23347+
4a4d8108 23348+static int noinline_for_stack au_wbr_copyup_val(char *str)
1facf9fc 23349+{
23350+ substring_t args[MAX_OPT_ARGS];
23351+
23352+ return match_token(str, au_wbr_copyup_policy, args);
23353+}
23354+
23355+const char *au_optstr_wbr_copyup(int wbr_copyup)
23356+{
076b876e 23357+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
1facf9fc 23358+}
23359+
23360+/* ---------------------------------------------------------------------- */
23361+
23362+static const int lkup_dirflags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
23363+
23364+static void dump_opts(struct au_opts *opts)
23365+{
23366+#ifdef CONFIG_AUFS_DEBUG
23367+ /* reduce stack space */
23368+ union {
23369+ struct au_opt_add *add;
23370+ struct au_opt_del *del;
23371+ struct au_opt_mod *mod;
23372+ struct au_opt_xino *xino;
23373+ struct au_opt_xino_itrunc *xino_itrunc;
23374+ struct au_opt_wbr_create *create;
23375+ } u;
23376+ struct au_opt *opt;
23377+
23378+ opt = opts->opt;
23379+ while (opt->type != Opt_tail) {
23380+ switch (opt->type) {
23381+ case Opt_add:
23382+ u.add = &opt->add;
23383+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
23384+ u.add->bindex, u.add->pathname, u.add->perm,
23385+ u.add->path.dentry);
23386+ break;
23387+ case Opt_del:
23388+ case Opt_idel:
23389+ u.del = &opt->del;
23390+ AuDbg("del {%s, %p}\n",
23391+ u.del->pathname, u.del->h_path.dentry);
23392+ break;
23393+ case Opt_mod:
23394+ case Opt_imod:
23395+ u.mod = &opt->mod;
23396+ AuDbg("mod {%s, 0x%x, %p}\n",
23397+ u.mod->path, u.mod->perm, u.mod->h_root);
23398+ break;
23399+ case Opt_append:
23400+ u.add = &opt->add;
23401+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
23402+ u.add->bindex, u.add->pathname, u.add->perm,
23403+ u.add->path.dentry);
23404+ break;
23405+ case Opt_prepend:
23406+ u.add = &opt->add;
23407+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
23408+ u.add->bindex, u.add->pathname, u.add->perm,
23409+ u.add->path.dentry);
23410+ break;
23411+ case Opt_dirwh:
23412+ AuDbg("dirwh %d\n", opt->dirwh);
23413+ break;
23414+ case Opt_rdcache:
23415+ AuDbg("rdcache %d\n", opt->rdcache);
23416+ break;
23417+ case Opt_rdblk:
23418+ AuDbg("rdblk %u\n", opt->rdblk);
23419+ break;
dece6358
AM
23420+ case Opt_rdblk_def:
23421+ AuDbg("rdblk_def\n");
23422+ break;
1facf9fc 23423+ case Opt_rdhash:
23424+ AuDbg("rdhash %u\n", opt->rdhash);
23425+ break;
dece6358
AM
23426+ case Opt_rdhash_def:
23427+ AuDbg("rdhash_def\n");
23428+ break;
1facf9fc 23429+ case Opt_xino:
23430+ u.xino = &opt->xino;
523b37e3 23431+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
1facf9fc 23432+ break;
23433+ case Opt_trunc_xino:
23434+ AuLabel(trunc_xino);
23435+ break;
23436+ case Opt_notrunc_xino:
23437+ AuLabel(notrunc_xino);
23438+ break;
23439+ case Opt_trunc_xino_path:
23440+ case Opt_itrunc_xino:
23441+ u.xino_itrunc = &opt->xino_itrunc;
23442+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
23443+ break;
1facf9fc 23444+ case Opt_noxino:
23445+ AuLabel(noxino);
23446+ break;
23447+ case Opt_trunc_xib:
23448+ AuLabel(trunc_xib);
23449+ break;
23450+ case Opt_notrunc_xib:
23451+ AuLabel(notrunc_xib);
23452+ break;
dece6358
AM
23453+ case Opt_shwh:
23454+ AuLabel(shwh);
23455+ break;
23456+ case Opt_noshwh:
23457+ AuLabel(noshwh);
23458+ break;
076b876e
AM
23459+ case Opt_dirperm1:
23460+ AuLabel(dirperm1);
23461+ break;
23462+ case Opt_nodirperm1:
23463+ AuLabel(nodirperm1);
23464+ break;
1facf9fc 23465+ case Opt_plink:
23466+ AuLabel(plink);
23467+ break;
23468+ case Opt_noplink:
23469+ AuLabel(noplink);
23470+ break;
23471+ case Opt_list_plink:
23472+ AuLabel(list_plink);
23473+ break;
23474+ case Opt_udba:
23475+ AuDbg("udba %d, %s\n",
23476+ opt->udba, au_optstr_udba(opt->udba));
23477+ break;
4a4d8108
AM
23478+ case Opt_dio:
23479+ AuLabel(dio);
23480+ break;
23481+ case Opt_nodio:
23482+ AuLabel(nodio);
23483+ break;
1facf9fc 23484+ case Opt_diropq_a:
23485+ AuLabel(diropq_a);
23486+ break;
23487+ case Opt_diropq_w:
23488+ AuLabel(diropq_w);
23489+ break;
23490+ case Opt_warn_perm:
23491+ AuLabel(warn_perm);
23492+ break;
23493+ case Opt_nowarn_perm:
23494+ AuLabel(nowarn_perm);
23495+ break;
23496+ case Opt_refrof:
23497+ AuLabel(refrof);
23498+ break;
23499+ case Opt_norefrof:
23500+ AuLabel(norefrof);
23501+ break;
23502+ case Opt_verbose:
23503+ AuLabel(verbose);
23504+ break;
23505+ case Opt_noverbose:
23506+ AuLabel(noverbose);
23507+ break;
23508+ case Opt_sum:
23509+ AuLabel(sum);
23510+ break;
23511+ case Opt_nosum:
23512+ AuLabel(nosum);
23513+ break;
23514+ case Opt_wsum:
23515+ AuLabel(wsum);
23516+ break;
23517+ case Opt_wbr_create:
23518+ u.create = &opt->wbr_create;
23519+ AuDbg("create %d, %s\n", u.create->wbr_create,
23520+ au_optstr_wbr_create(u.create->wbr_create));
23521+ switch (u.create->wbr_create) {
23522+ case AuWbrCreate_MFSV:
23523+ case AuWbrCreate_PMFSV:
23524+ AuDbg("%d sec\n", u.create->mfs_second);
23525+ break;
23526+ case AuWbrCreate_MFSRR:
23527+ AuDbg("%llu watermark\n",
23528+ u.create->mfsrr_watermark);
23529+ break;
23530+ case AuWbrCreate_MFSRRV:
392086de 23531+ case AuWbrCreate_PMFSRRV:
1facf9fc 23532+ AuDbg("%llu watermark, %d sec\n",
23533+ u.create->mfsrr_watermark,
23534+ u.create->mfs_second);
23535+ break;
23536+ }
23537+ break;
23538+ case Opt_wbr_copyup:
23539+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
23540+ au_optstr_wbr_copyup(opt->wbr_copyup));
23541+ break;
076b876e
AM
23542+ case Opt_fhsm_sec:
23543+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
23544+ break;
c1595e42
JR
23545+ case Opt_acl:
23546+ AuLabel(acl);
23547+ break;
23548+ case Opt_noacl:
23549+ AuLabel(noacl);
23550+ break;
1facf9fc 23551+ default:
23552+ BUG();
23553+ }
23554+ opt++;
23555+ }
23556+#endif
23557+}
23558+
23559+void au_opts_free(struct au_opts *opts)
23560+{
23561+ struct au_opt *opt;
23562+
23563+ opt = opts->opt;
23564+ while (opt->type != Opt_tail) {
23565+ switch (opt->type) {
23566+ case Opt_add:
23567+ case Opt_append:
23568+ case Opt_prepend:
23569+ path_put(&opt->add.path);
23570+ break;
23571+ case Opt_del:
23572+ case Opt_idel:
23573+ path_put(&opt->del.h_path);
23574+ break;
23575+ case Opt_mod:
23576+ case Opt_imod:
23577+ dput(opt->mod.h_root);
23578+ break;
23579+ case Opt_xino:
23580+ fput(opt->xino.file);
23581+ break;
23582+ }
23583+ opt++;
23584+ }
23585+}
23586+
23587+static int opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
23588+ aufs_bindex_t bindex)
23589+{
23590+ int err;
23591+ struct au_opt_add *add = &opt->add;
23592+ char *p;
23593+
23594+ add->bindex = bindex;
1e00d052 23595+ add->perm = AuBrPerm_RO;
1facf9fc 23596+ add->pathname = opt_str;
23597+ p = strchr(opt_str, '=');
23598+ if (p) {
23599+ *p++ = 0;
23600+ if (*p)
23601+ add->perm = br_perm_val(p);
23602+ }
23603+
23604+ err = vfsub_kern_path(add->pathname, lkup_dirflags, &add->path);
23605+ if (!err) {
23606+ if (!p) {
23607+ add->perm = AuBrPerm_RO;
23608+ if (au_test_fs_rr(add->path.dentry->d_sb))
23609+ add->perm = AuBrPerm_RR;
23610+ else if (!bindex && !(sb_flags & MS_RDONLY))
23611+ add->perm = AuBrPerm_RW;
23612+ }
23613+ opt->type = Opt_add;
23614+ goto out;
23615+ }
4a4d8108 23616+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
1facf9fc 23617+ err = -EINVAL;
23618+
4f0767ce 23619+out:
1facf9fc 23620+ return err;
23621+}
23622+
23623+static int au_opts_parse_del(struct au_opt_del *del, substring_t args[])
23624+{
23625+ int err;
23626+
23627+ del->pathname = args[0].from;
23628+ AuDbg("del path %s\n", del->pathname);
23629+
23630+ err = vfsub_kern_path(del->pathname, lkup_dirflags, &del->h_path);
23631+ if (unlikely(err))
4a4d8108 23632+ pr_err("lookup failed %s (%d)\n", del->pathname, err);
1facf9fc 23633+
23634+ return err;
23635+}
23636+
23637+#if 0 /* reserved for future use */
23638+static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
23639+ struct au_opt_del *del, substring_t args[])
23640+{
23641+ int err;
23642+ struct dentry *root;
23643+
23644+ err = -EINVAL;
23645+ root = sb->s_root;
23646+ aufs_read_lock(root, AuLock_FLUSH);
23647+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23648+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23649+ goto out;
23650+ }
23651+
23652+ err = 0;
23653+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
23654+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
23655+
4f0767ce 23656+out:
1facf9fc 23657+ aufs_read_unlock(root, !AuLock_IR);
23658+ return err;
23659+}
23660+#endif
23661+
4a4d8108
AM
23662+static int noinline_for_stack
23663+au_opts_parse_mod(struct au_opt_mod *mod, substring_t args[])
1facf9fc 23664+{
23665+ int err;
23666+ struct path path;
23667+ char *p;
23668+
23669+ err = -EINVAL;
23670+ mod->path = args[0].from;
23671+ p = strchr(mod->path, '=');
23672+ if (unlikely(!p)) {
4a4d8108 23673+ pr_err("no permssion %s\n", args[0].from);
1facf9fc 23674+ goto out;
23675+ }
23676+
23677+ *p++ = 0;
23678+ err = vfsub_kern_path(mod->path, lkup_dirflags, &path);
23679+ if (unlikely(err)) {
4a4d8108 23680+ pr_err("lookup failed %s (%d)\n", mod->path, err);
1facf9fc 23681+ goto out;
23682+ }
23683+
23684+ mod->perm = br_perm_val(p);
23685+ AuDbg("mod path %s, perm 0x%x, %s\n", mod->path, mod->perm, p);
23686+ mod->h_root = dget(path.dentry);
23687+ path_put(&path);
23688+
4f0767ce 23689+out:
1facf9fc 23690+ return err;
23691+}
23692+
23693+#if 0 /* reserved for future use */
23694+static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
23695+ struct au_opt_mod *mod, substring_t args[])
23696+{
23697+ int err;
23698+ struct dentry *root;
23699+
23700+ err = -EINVAL;
23701+ root = sb->s_root;
23702+ aufs_read_lock(root, AuLock_FLUSH);
23703+ if (bindex < 0 || au_sbend(sb) < bindex) {
4a4d8108 23704+ pr_err("out of bounds, %d\n", bindex);
1facf9fc 23705+ goto out;
23706+ }
23707+
23708+ err = 0;
23709+ mod->perm = br_perm_val(args[1].from);
23710+ AuDbg("mod path %s, perm 0x%x, %s\n",
23711+ mod->path, mod->perm, args[1].from);
23712+ mod->h_root = dget(au_h_dptr(root, bindex));
23713+
4f0767ce 23714+out:
1facf9fc 23715+ aufs_read_unlock(root, !AuLock_IR);
23716+ return err;
23717+}
23718+#endif
23719+
23720+static int au_opts_parse_xino(struct super_block *sb, struct au_opt_xino *xino,
23721+ substring_t args[])
23722+{
23723+ int err;
23724+ struct file *file;
23725+
23726+ file = au_xino_create(sb, args[0].from, /*silent*/0);
23727+ err = PTR_ERR(file);
23728+ if (IS_ERR(file))
23729+ goto out;
23730+
23731+ err = -EINVAL;
23732+ if (unlikely(file->f_dentry->d_sb == sb)) {
23733+ fput(file);
4a4d8108 23734+ pr_err("%s must be outside\n", args[0].from);
1facf9fc 23735+ goto out;
23736+ }
23737+
23738+ err = 0;
23739+ xino->file = file;
23740+ xino->path = args[0].from;
23741+
4f0767ce 23742+out:
1facf9fc 23743+ return err;
23744+}
23745+
4a4d8108
AM
23746+static int noinline_for_stack
23747+au_opts_parse_xino_itrunc_path(struct super_block *sb,
23748+ struct au_opt_xino_itrunc *xino_itrunc,
23749+ substring_t args[])
1facf9fc 23750+{
23751+ int err;
23752+ aufs_bindex_t bend, bindex;
23753+ struct path path;
23754+ struct dentry *root;
23755+
23756+ err = vfsub_kern_path(args[0].from, lkup_dirflags, &path);
23757+ if (unlikely(err)) {
4a4d8108 23758+ pr_err("lookup failed %s (%d)\n", args[0].from, err);
1facf9fc 23759+ goto out;
23760+ }
23761+
23762+ xino_itrunc->bindex = -1;
23763+ root = sb->s_root;
23764+ aufs_read_lock(root, AuLock_FLUSH);
23765+ bend = au_sbend(sb);
23766+ for (bindex = 0; bindex <= bend; bindex++) {
23767+ if (au_h_dptr(root, bindex) == path.dentry) {
23768+ xino_itrunc->bindex = bindex;
23769+ break;
23770+ }
23771+ }
23772+ aufs_read_unlock(root, !AuLock_IR);
23773+ path_put(&path);
23774+
23775+ if (unlikely(xino_itrunc->bindex < 0)) {
4a4d8108 23776+ pr_err("no such branch %s\n", args[0].from);
1facf9fc 23777+ err = -EINVAL;
23778+ }
23779+
4f0767ce 23780+out:
1facf9fc 23781+ return err;
23782+}
23783+
23784+/* called without aufs lock */
23785+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
23786+{
23787+ int err, n, token;
23788+ aufs_bindex_t bindex;
23789+ unsigned char skipped;
23790+ struct dentry *root;
23791+ struct au_opt *opt, *opt_tail;
23792+ char *opt_str;
23793+ /* reduce the stack space */
23794+ union {
23795+ struct au_opt_xino_itrunc *xino_itrunc;
23796+ struct au_opt_wbr_create *create;
23797+ } u;
23798+ struct {
23799+ substring_t args[MAX_OPT_ARGS];
23800+ } *a;
23801+
23802+ err = -ENOMEM;
23803+ a = kmalloc(sizeof(*a), GFP_NOFS);
23804+ if (unlikely(!a))
23805+ goto out;
23806+
23807+ root = sb->s_root;
23808+ err = 0;
23809+ bindex = 0;
23810+ opt = opts->opt;
23811+ opt_tail = opt + opts->max_opt - 1;
23812+ opt->type = Opt_tail;
23813+ while (!err && (opt_str = strsep(&str, ",")) && *opt_str) {
23814+ err = -EINVAL;
23815+ skipped = 0;
23816+ token = match_token(opt_str, options, a->args);
23817+ switch (token) {
23818+ case Opt_br:
23819+ err = 0;
23820+ while (!err && (opt_str = strsep(&a->args[0].from, ":"))
23821+ && *opt_str) {
23822+ err = opt_add(opt, opt_str, opts->sb_flags,
23823+ bindex++);
23824+ if (unlikely(!err && ++opt > opt_tail)) {
23825+ err = -E2BIG;
23826+ break;
23827+ }
23828+ opt->type = Opt_tail;
23829+ skipped = 1;
23830+ }
23831+ break;
23832+ case Opt_add:
23833+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23834+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23835+ break;
23836+ }
23837+ bindex = n;
23838+ err = opt_add(opt, a->args[1].from, opts->sb_flags,
23839+ bindex);
23840+ if (!err)
23841+ opt->type = token;
23842+ break;
23843+ case Opt_append:
23844+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23845+ /*dummy bindex*/1);
23846+ if (!err)
23847+ opt->type = token;
23848+ break;
23849+ case Opt_prepend:
23850+ err = opt_add(opt, a->args[0].from, opts->sb_flags,
23851+ /*bindex*/0);
23852+ if (!err)
23853+ opt->type = token;
23854+ break;
23855+ case Opt_del:
23856+ err = au_opts_parse_del(&opt->del, a->args);
23857+ if (!err)
23858+ opt->type = token;
23859+ break;
23860+#if 0 /* reserved for future use */
23861+ case Opt_idel:
23862+ del->pathname = "(indexed)";
23863+ if (unlikely(match_int(&args[0], &n))) {
4a4d8108 23864+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23865+ break;
23866+ }
23867+ err = au_opts_parse_idel(sb, n, &opt->del, a->args);
23868+ if (!err)
23869+ opt->type = token;
23870+ break;
23871+#endif
23872+ case Opt_mod:
23873+ err = au_opts_parse_mod(&opt->mod, a->args);
23874+ if (!err)
23875+ opt->type = token;
23876+ break;
23877+#ifdef IMOD /* reserved for future use */
23878+ case Opt_imod:
23879+ u.mod->path = "(indexed)";
23880+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23881+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23882+ break;
23883+ }
23884+ err = au_opts_parse_imod(sb, n, &opt->mod, a->args);
23885+ if (!err)
23886+ opt->type = token;
23887+ break;
23888+#endif
23889+ case Opt_xino:
23890+ err = au_opts_parse_xino(sb, &opt->xino, a->args);
23891+ if (!err)
23892+ opt->type = token;
23893+ break;
23894+
23895+ case Opt_trunc_xino_path:
23896+ err = au_opts_parse_xino_itrunc_path
23897+ (sb, &opt->xino_itrunc, a->args);
23898+ if (!err)
23899+ opt->type = token;
23900+ break;
23901+
23902+ case Opt_itrunc_xino:
23903+ u.xino_itrunc = &opt->xino_itrunc;
23904+ if (unlikely(match_int(&a->args[0], &n))) {
4a4d8108 23905+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23906+ break;
23907+ }
23908+ u.xino_itrunc->bindex = n;
23909+ aufs_read_lock(root, AuLock_FLUSH);
23910+ if (n < 0 || au_sbend(sb) < n) {
4a4d8108 23911+ pr_err("out of bounds, %d\n", n);
1facf9fc 23912+ aufs_read_unlock(root, !AuLock_IR);
23913+ break;
23914+ }
23915+ aufs_read_unlock(root, !AuLock_IR);
23916+ err = 0;
23917+ opt->type = token;
23918+ break;
23919+
23920+ case Opt_dirwh:
23921+ if (unlikely(match_int(&a->args[0], &opt->dirwh)))
23922+ break;
23923+ err = 0;
23924+ opt->type = token;
23925+ break;
23926+
23927+ case Opt_rdcache:
027c5e7a
AM
23928+ if (unlikely(match_int(&a->args[0], &n))) {
23929+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23930+ break;
027c5e7a
AM
23931+ }
23932+ if (unlikely(n > AUFS_RDCACHE_MAX)) {
23933+ pr_err("rdcache must be smaller than %d\n",
23934+ AUFS_RDCACHE_MAX);
23935+ break;
23936+ }
23937+ opt->rdcache = n;
1facf9fc 23938+ err = 0;
23939+ opt->type = token;
23940+ break;
23941+ case Opt_rdblk:
23942+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23943+ || n < 0
1facf9fc 23944+ || n > KMALLOC_MAX_SIZE)) {
4a4d8108 23945+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23946+ break;
23947+ }
1308ab2a 23948+ if (unlikely(n && n < NAME_MAX)) {
4a4d8108
AM
23949+ pr_err("rdblk must be larger than %d\n",
23950+ NAME_MAX);
1facf9fc 23951+ break;
23952+ }
23953+ opt->rdblk = n;
23954+ err = 0;
23955+ opt->type = token;
23956+ break;
23957+ case Opt_rdhash:
23958+ if (unlikely(match_int(&a->args[0], &n)
1308ab2a 23959+ || n < 0
1facf9fc 23960+ || n * sizeof(struct hlist_head)
23961+ > KMALLOC_MAX_SIZE)) {
4a4d8108 23962+ pr_err("bad integer in %s\n", opt_str);
1facf9fc 23963+ break;
23964+ }
23965+ opt->rdhash = n;
23966+ err = 0;
23967+ opt->type = token;
23968+ break;
23969+
23970+ case Opt_trunc_xino:
23971+ case Opt_notrunc_xino:
23972+ case Opt_noxino:
23973+ case Opt_trunc_xib:
23974+ case Opt_notrunc_xib:
dece6358
AM
23975+ case Opt_shwh:
23976+ case Opt_noshwh:
076b876e
AM
23977+ case Opt_dirperm1:
23978+ case Opt_nodirperm1:
1facf9fc 23979+ case Opt_plink:
23980+ case Opt_noplink:
23981+ case Opt_list_plink:
4a4d8108
AM
23982+ case Opt_dio:
23983+ case Opt_nodio:
1facf9fc 23984+ case Opt_diropq_a:
23985+ case Opt_diropq_w:
23986+ case Opt_warn_perm:
23987+ case Opt_nowarn_perm:
23988+ case Opt_refrof:
23989+ case Opt_norefrof:
23990+ case Opt_verbose:
23991+ case Opt_noverbose:
23992+ case Opt_sum:
23993+ case Opt_nosum:
23994+ case Opt_wsum:
dece6358
AM
23995+ case Opt_rdblk_def:
23996+ case Opt_rdhash_def:
c1595e42
JR
23997+ case Opt_acl:
23998+ case Opt_noacl:
1facf9fc 23999+ err = 0;
24000+ opt->type = token;
24001+ break;
24002+
24003+ case Opt_udba:
24004+ opt->udba = udba_val(a->args[0].from);
24005+ if (opt->udba >= 0) {
24006+ err = 0;
24007+ opt->type = token;
24008+ } else
4a4d8108 24009+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24010+ break;
24011+
24012+ case Opt_wbr_create:
24013+ u.create = &opt->wbr_create;
24014+ u.create->wbr_create
24015+ = au_wbr_create_val(a->args[0].from, u.create);
24016+ if (u.create->wbr_create >= 0) {
24017+ err = 0;
24018+ opt->type = token;
24019+ } else
4a4d8108 24020+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24021+ break;
24022+ case Opt_wbr_copyup:
24023+ opt->wbr_copyup = au_wbr_copyup_val(a->args[0].from);
24024+ if (opt->wbr_copyup >= 0) {
24025+ err = 0;
24026+ opt->type = token;
24027+ } else
4a4d8108 24028+ pr_err("wrong value, %s\n", opt_str);
1facf9fc 24029+ break;
24030+
076b876e
AM
24031+ case Opt_fhsm_sec:
24032+ if (unlikely(match_int(&a->args[0], &n)
24033+ || n < 0)) {
24034+ pr_err("bad integer in %s\n", opt_str);
24035+ break;
24036+ }
24037+ if (sysaufs_brs) {
24038+ opt->fhsm_second = n;
24039+ opt->type = token;
24040+ } else
24041+ pr_warn("ignored %s\n", opt_str);
24042+ err = 0;
24043+ break;
24044+
1facf9fc 24045+ case Opt_ignore:
0c3ec466 24046+ pr_warn("ignored %s\n", opt_str);
1facf9fc 24047+ /*FALLTHROUGH*/
24048+ case Opt_ignore_silent:
24049+ skipped = 1;
24050+ err = 0;
24051+ break;
24052+ case Opt_err:
4a4d8108 24053+ pr_err("unknown option %s\n", opt_str);
1facf9fc 24054+ break;
24055+ }
24056+
24057+ if (!err && !skipped) {
24058+ if (unlikely(++opt > opt_tail)) {
24059+ err = -E2BIG;
24060+ opt--;
24061+ opt->type = Opt_tail;
24062+ break;
24063+ }
24064+ opt->type = Opt_tail;
24065+ }
24066+ }
24067+
24068+ kfree(a);
24069+ dump_opts(opts);
24070+ if (unlikely(err))
24071+ au_opts_free(opts);
24072+
4f0767ce 24073+out:
1facf9fc 24074+ return err;
24075+}
24076+
24077+static int au_opt_wbr_create(struct super_block *sb,
24078+ struct au_opt_wbr_create *create)
24079+{
24080+ int err;
24081+ struct au_sbinfo *sbinfo;
24082+
dece6358
AM
24083+ SiMustWriteLock(sb);
24084+
1facf9fc 24085+ err = 1; /* handled */
24086+ sbinfo = au_sbi(sb);
24087+ if (sbinfo->si_wbr_create_ops->fin) {
24088+ err = sbinfo->si_wbr_create_ops->fin(sb);
24089+ if (!err)
24090+ err = 1;
24091+ }
24092+
24093+ sbinfo->si_wbr_create = create->wbr_create;
24094+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
24095+ switch (create->wbr_create) {
24096+ case AuWbrCreate_MFSRRV:
24097+ case AuWbrCreate_MFSRR:
392086de
AM
24098+ case AuWbrCreate_PMFSRR:
24099+ case AuWbrCreate_PMFSRRV:
1facf9fc 24100+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
24101+ /*FALLTHROUGH*/
24102+ case AuWbrCreate_MFS:
24103+ case AuWbrCreate_MFSV:
24104+ case AuWbrCreate_PMFS:
24105+ case AuWbrCreate_PMFSV:
e49829fe
JR
24106+ sbinfo->si_wbr_mfs.mfs_expire
24107+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
1facf9fc 24108+ break;
24109+ }
24110+
24111+ if (sbinfo->si_wbr_create_ops->init)
24112+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
24113+
24114+ return err;
24115+}
24116+
24117+/*
24118+ * returns,
24119+ * plus: processed without an error
24120+ * zero: unprocessed
24121+ */
24122+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
24123+ struct au_opts *opts)
24124+{
24125+ int err;
24126+ struct au_sbinfo *sbinfo;
24127+
dece6358
AM
24128+ SiMustWriteLock(sb);
24129+
1facf9fc 24130+ err = 1; /* handled */
24131+ sbinfo = au_sbi(sb);
24132+ switch (opt->type) {
24133+ case Opt_udba:
24134+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
24135+ sbinfo->si_mntflags |= opt->udba;
24136+ opts->given_udba |= opt->udba;
24137+ break;
24138+
24139+ case Opt_plink:
24140+ au_opt_set(sbinfo->si_mntflags, PLINK);
24141+ break;
24142+ case Opt_noplink:
24143+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
e49829fe 24144+ au_plink_put(sb, /*verbose*/1);
1facf9fc 24145+ au_opt_clr(sbinfo->si_mntflags, PLINK);
24146+ break;
24147+ case Opt_list_plink:
24148+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
24149+ au_plink_list(sb);
24150+ break;
24151+
4a4d8108
AM
24152+ case Opt_dio:
24153+ au_opt_set(sbinfo->si_mntflags, DIO);
24154+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24155+ break;
24156+ case Opt_nodio:
24157+ au_opt_clr(sbinfo->si_mntflags, DIO);
24158+ au_fset_opts(opts->flags, REFRESH_DYAOP);
24159+ break;
24160+
076b876e
AM
24161+ case Opt_fhsm_sec:
24162+ au_fhsm_set(sbinfo, opt->fhsm_second);
24163+ break;
24164+
1facf9fc 24165+ case Opt_diropq_a:
24166+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24167+ break;
24168+ case Opt_diropq_w:
24169+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
24170+ break;
24171+
24172+ case Opt_warn_perm:
24173+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
24174+ break;
24175+ case Opt_nowarn_perm:
24176+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
24177+ break;
24178+
24179+ case Opt_refrof:
24180+ au_opt_set(sbinfo->si_mntflags, REFROF);
24181+ break;
24182+ case Opt_norefrof:
24183+ au_opt_clr(sbinfo->si_mntflags, REFROF);
24184+ break;
24185+
24186+ case Opt_verbose:
24187+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
24188+ break;
24189+ case Opt_noverbose:
24190+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
24191+ break;
24192+
24193+ case Opt_sum:
24194+ au_opt_set(sbinfo->si_mntflags, SUM);
24195+ break;
24196+ case Opt_wsum:
24197+ au_opt_clr(sbinfo->si_mntflags, SUM);
24198+ au_opt_set(sbinfo->si_mntflags, SUM_W);
24199+ case Opt_nosum:
24200+ au_opt_clr(sbinfo->si_mntflags, SUM);
24201+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
24202+ break;
24203+
24204+ case Opt_wbr_create:
24205+ err = au_opt_wbr_create(sb, &opt->wbr_create);
24206+ break;
24207+ case Opt_wbr_copyup:
24208+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
24209+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
24210+ break;
24211+
24212+ case Opt_dirwh:
24213+ sbinfo->si_dirwh = opt->dirwh;
24214+ break;
24215+
24216+ case Opt_rdcache:
e49829fe
JR
24217+ sbinfo->si_rdcache
24218+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
1facf9fc 24219+ break;
24220+ case Opt_rdblk:
24221+ sbinfo->si_rdblk = opt->rdblk;
24222+ break;
dece6358
AM
24223+ case Opt_rdblk_def:
24224+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
24225+ break;
1facf9fc 24226+ case Opt_rdhash:
24227+ sbinfo->si_rdhash = opt->rdhash;
24228+ break;
dece6358
AM
24229+ case Opt_rdhash_def:
24230+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
24231+ break;
24232+
24233+ case Opt_shwh:
24234+ au_opt_set(sbinfo->si_mntflags, SHWH);
24235+ break;
24236+ case Opt_noshwh:
24237+ au_opt_clr(sbinfo->si_mntflags, SHWH);
24238+ break;
1facf9fc 24239+
076b876e
AM
24240+ case Opt_dirperm1:
24241+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
24242+ break;
24243+ case Opt_nodirperm1:
24244+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
24245+ break;
24246+
1facf9fc 24247+ case Opt_trunc_xino:
24248+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
24249+ break;
24250+ case Opt_notrunc_xino:
24251+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
24252+ break;
24253+
24254+ case Opt_trunc_xino_path:
24255+ case Opt_itrunc_xino:
24256+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex);
24257+ if (!err)
24258+ err = 1;
24259+ break;
24260+
24261+ case Opt_trunc_xib:
24262+ au_fset_opts(opts->flags, TRUNC_XIB);
24263+ break;
24264+ case Opt_notrunc_xib:
24265+ au_fclr_opts(opts->flags, TRUNC_XIB);
24266+ break;
24267+
c1595e42
JR
24268+ case Opt_acl:
24269+ sb->s_flags |= MS_POSIXACL;
24270+ break;
24271+ case Opt_noacl:
24272+ sb->s_flags &= ~MS_POSIXACL;
24273+ break;
24274+
1facf9fc 24275+ default:
24276+ err = 0;
24277+ break;
24278+ }
24279+
24280+ return err;
24281+}
24282+
24283+/*
24284+ * returns tri-state.
24285+ * plus: processed without an error
24286+ * zero: unprocessed
24287+ * minus: error
24288+ */
24289+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
24290+ struct au_opts *opts)
24291+{
24292+ int err, do_refresh;
24293+
24294+ err = 0;
24295+ switch (opt->type) {
24296+ case Opt_append:
24297+ opt->add.bindex = au_sbend(sb) + 1;
24298+ if (opt->add.bindex < 0)
24299+ opt->add.bindex = 0;
24300+ goto add;
24301+ case Opt_prepend:
24302+ opt->add.bindex = 0;
f6b6e03d 24303+ add: /* indented label */
1facf9fc 24304+ case Opt_add:
24305+ err = au_br_add(sb, &opt->add,
24306+ au_ftest_opts(opts->flags, REMOUNT));
24307+ if (!err) {
24308+ err = 1;
027c5e7a 24309+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24310+ }
24311+ break;
24312+
24313+ case Opt_del:
24314+ case Opt_idel:
24315+ err = au_br_del(sb, &opt->del,
24316+ au_ftest_opts(opts->flags, REMOUNT));
24317+ if (!err) {
24318+ err = 1;
24319+ au_fset_opts(opts->flags, TRUNC_XIB);
027c5e7a 24320+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24321+ }
24322+ break;
24323+
24324+ case Opt_mod:
24325+ case Opt_imod:
24326+ err = au_br_mod(sb, &opt->mod,
24327+ au_ftest_opts(opts->flags, REMOUNT),
24328+ &do_refresh);
24329+ if (!err) {
24330+ err = 1;
027c5e7a
AM
24331+ if (do_refresh)
24332+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24333+ }
24334+ break;
24335+ }
24336+
24337+ return err;
24338+}
24339+
24340+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
24341+ struct au_opt_xino **opt_xino,
24342+ struct au_opts *opts)
24343+{
24344+ int err;
24345+ aufs_bindex_t bend, bindex;
24346+ struct dentry *root, *parent, *h_root;
24347+
24348+ err = 0;
24349+ switch (opt->type) {
24350+ case Opt_xino:
24351+ err = au_xino_set(sb, &opt->xino,
24352+ !!au_ftest_opts(opts->flags, REMOUNT));
24353+ if (unlikely(err))
24354+ break;
24355+
24356+ *opt_xino = &opt->xino;
24357+ au_xino_brid_set(sb, -1);
24358+
24359+ /* safe d_parent access */
24360+ parent = opt->xino.file->f_dentry->d_parent;
24361+ root = sb->s_root;
24362+ bend = au_sbend(sb);
24363+ for (bindex = 0; bindex <= bend; bindex++) {
24364+ h_root = au_h_dptr(root, bindex);
24365+ if (h_root == parent) {
24366+ au_xino_brid_set(sb, au_sbr_id(sb, bindex));
24367+ break;
24368+ }
24369+ }
24370+ break;
24371+
24372+ case Opt_noxino:
24373+ au_xino_clr(sb);
24374+ au_xino_brid_set(sb, -1);
24375+ *opt_xino = (void *)-1;
24376+ break;
24377+ }
24378+
24379+ return err;
24380+}
24381+
24382+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24383+ unsigned int pending)
24384+{
076b876e 24385+ int err, fhsm;
1facf9fc 24386+ aufs_bindex_t bindex, bend;
7f2ca4b1 24387+ unsigned char do_plink, skip, do_free, can_no_dreval;
1facf9fc 24388+ struct au_branch *br;
24389+ struct au_wbr *wbr;
7f2ca4b1 24390+ struct dentry *root, *dentry;
1facf9fc 24391+ struct inode *dir, *h_dir;
24392+ struct au_sbinfo *sbinfo;
24393+ struct au_hinode *hdir;
24394+
dece6358
AM
24395+ SiMustAnyLock(sb);
24396+
1facf9fc 24397+ sbinfo = au_sbi(sb);
24398+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
24399+
dece6358
AM
24400+ if (!(sb_flags & MS_RDONLY)) {
24401+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
0c3ec466 24402+ pr_warn("first branch should be rw\n");
dece6358 24403+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
0c3ec466 24404+ pr_warn("shwh should be used with ro\n");
dece6358 24405+ }
1facf9fc 24406+
4a4d8108 24407+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
1facf9fc 24408+ && !au_opt_test(sbinfo->si_mntflags, XINO))
0c3ec466 24409+ pr_warn("udba=*notify requires xino\n");
1facf9fc 24410+
076b876e
AM
24411+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
24412+ pr_warn("dirperm1 breaks the protection"
24413+ " by the permission bits on the lower branch\n");
24414+
1facf9fc 24415+ err = 0;
076b876e 24416+ fhsm = 0;
1facf9fc 24417+ root = sb->s_root;
4a4d8108 24418+ dir = root->d_inode;
1facf9fc 24419+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
7f2ca4b1
JR
24420+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
24421+ UDBA_NONE);
1facf9fc 24422+ bend = au_sbend(sb);
24423+ for (bindex = 0; !err && bindex <= bend; bindex++) {
24424+ skip = 0;
24425+ h_dir = au_h_iptr(dir, bindex);
24426+ br = au_sbr(sb, bindex);
1facf9fc 24427+
c1595e42
JR
24428+ if ((br->br_perm & AuBrAttr_ICEX)
24429+ && !h_dir->i_op->listxattr)
24430+ br->br_perm &= ~AuBrAttr_ICEX;
24431+#if 0
24432+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
24433+ && (au_br_sb(br)->s_flags & MS_NOSEC))
24434+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
24435+#endif
24436+
24437+ do_free = 0;
1facf9fc 24438+ wbr = br->br_wbr;
24439+ if (wbr)
24440+ wbr_wh_read_lock(wbr);
24441+
1e00d052 24442+ if (!au_br_writable(br->br_perm)) {
1facf9fc 24443+ do_free = !!wbr;
24444+ skip = (!wbr
24445+ || (!wbr->wbr_whbase
24446+ && !wbr->wbr_plink
24447+ && !wbr->wbr_orph));
1e00d052 24448+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 24449+ /* skip = (!br->br_whbase && !br->br_orph); */
24450+ skip = (!wbr || !wbr->wbr_whbase);
24451+ if (skip && wbr) {
24452+ if (do_plink)
24453+ skip = !!wbr->wbr_plink;
24454+ else
24455+ skip = !wbr->wbr_plink;
24456+ }
1e00d052 24457+ } else {
1facf9fc 24458+ /* skip = (br->br_whbase && br->br_ohph); */
24459+ skip = (wbr && wbr->wbr_whbase);
24460+ if (skip) {
24461+ if (do_plink)
24462+ skip = !!wbr->wbr_plink;
24463+ else
24464+ skip = !wbr->wbr_plink;
24465+ }
1facf9fc 24466+ }
24467+ if (wbr)
24468+ wbr_wh_read_unlock(wbr);
24469+
7f2ca4b1
JR
24470+ if (can_no_dreval) {
24471+ dentry = br->br_path.dentry;
24472+ spin_lock(&dentry->d_lock);
24473+ if (dentry->d_flags &
24474+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
24475+ can_no_dreval = 0;
24476+ spin_unlock(&dentry->d_lock);
24477+ }
24478+
076b876e
AM
24479+ if (au_br_fhsm(br->br_perm)) {
24480+ fhsm++;
24481+ AuDebugOn(!br->br_fhsm);
24482+ }
24483+
1facf9fc 24484+ if (skip)
24485+ continue;
24486+
24487+ hdir = au_hi(dir, bindex);
4a4d8108 24488+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 24489+ if (wbr)
24490+ wbr_wh_write_lock(wbr);
86dc4139 24491+ err = au_wh_init(br, sb);
1facf9fc 24492+ if (wbr)
24493+ wbr_wh_write_unlock(wbr);
4a4d8108 24494+ au_hn_imtx_unlock(hdir);
1facf9fc 24495+
24496+ if (!err && do_free) {
24497+ kfree(wbr);
24498+ br->br_wbr = NULL;
24499+ }
24500+ }
24501+
7f2ca4b1
JR
24502+ if (can_no_dreval)
24503+ au_fset_si(sbinfo, NO_DREVAL);
24504+ else
24505+ au_fclr_si(sbinfo, NO_DREVAL);
24506+
c1595e42 24507+ if (fhsm >= 2) {
076b876e 24508+ au_fset_si(sbinfo, FHSM);
c1595e42
JR
24509+ for (bindex = bend; bindex >= 0; bindex--) {
24510+ br = au_sbr(sb, bindex);
24511+ if (au_br_fhsm(br->br_perm)) {
24512+ au_fhsm_set_bottom(sb, bindex);
24513+ break;
24514+ }
24515+ }
24516+ } else {
076b876e 24517+ au_fclr_si(sbinfo, FHSM);
c1595e42
JR
24518+ au_fhsm_set_bottom(sb, -1);
24519+ }
076b876e 24520+
1facf9fc 24521+ return err;
24522+}
24523+
24524+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
24525+{
24526+ int err;
24527+ unsigned int tmp;
027c5e7a 24528+ aufs_bindex_t bindex, bend;
1facf9fc 24529+ struct au_opt *opt;
24530+ struct au_opt_xino *opt_xino, xino;
24531+ struct au_sbinfo *sbinfo;
027c5e7a 24532+ struct au_branch *br;
076b876e 24533+ struct inode *dir;
1facf9fc 24534+
dece6358
AM
24535+ SiMustWriteLock(sb);
24536+
1facf9fc 24537+ err = 0;
24538+ opt_xino = NULL;
24539+ opt = opts->opt;
24540+ while (err >= 0 && opt->type != Opt_tail)
24541+ err = au_opt_simple(sb, opt++, opts);
24542+ if (err > 0)
24543+ err = 0;
24544+ else if (unlikely(err < 0))
24545+ goto out;
24546+
24547+ /* disable xino and udba temporary */
24548+ sbinfo = au_sbi(sb);
24549+ tmp = sbinfo->si_mntflags;
24550+ au_opt_clr(sbinfo->si_mntflags, XINO);
24551+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
24552+
24553+ opt = opts->opt;
24554+ while (err >= 0 && opt->type != Opt_tail)
24555+ err = au_opt_br(sb, opt++, opts);
24556+ if (err > 0)
24557+ err = 0;
24558+ else if (unlikely(err < 0))
24559+ goto out;
24560+
24561+ bend = au_sbend(sb);
24562+ if (unlikely(bend < 0)) {
24563+ err = -EINVAL;
4a4d8108 24564+ pr_err("no branches\n");
1facf9fc 24565+ goto out;
24566+ }
24567+
24568+ if (au_opt_test(tmp, XINO))
24569+ au_opt_set(sbinfo->si_mntflags, XINO);
24570+ opt = opts->opt;
24571+ while (!err && opt->type != Opt_tail)
24572+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
24573+ if (unlikely(err))
24574+ goto out;
24575+
24576+ err = au_opts_verify(sb, sb->s_flags, tmp);
24577+ if (unlikely(err))
24578+ goto out;
24579+
24580+ /* restore xino */
24581+ if (au_opt_test(tmp, XINO) && !opt_xino) {
24582+ xino.file = au_xino_def(sb);
24583+ err = PTR_ERR(xino.file);
24584+ if (IS_ERR(xino.file))
24585+ goto out;
24586+
24587+ err = au_xino_set(sb, &xino, /*remount*/0);
24588+ fput(xino.file);
24589+ if (unlikely(err))
24590+ goto out;
24591+ }
24592+
24593+ /* restore udba */
027c5e7a 24594+ tmp &= AuOptMask_UDBA;
1facf9fc 24595+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
027c5e7a
AM
24596+ sbinfo->si_mntflags |= tmp;
24597+ bend = au_sbend(sb);
24598+ for (bindex = 0; bindex <= bend; bindex++) {
24599+ br = au_sbr(sb, bindex);
24600+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
24601+ if (unlikely(err))
24602+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
24603+ bindex, err);
24604+ /* go on even if err */
24605+ }
4a4d8108 24606+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
076b876e 24607+ dir = sb->s_root->d_inode;
4a4d8108 24608+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
1facf9fc 24609+ }
24610+
4f0767ce 24611+out:
1facf9fc 24612+ return err;
24613+}
24614+
24615+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
24616+{
24617+ int err, rerr;
7f2ca4b1 24618+ unsigned char no_dreval;
1facf9fc 24619+ struct inode *dir;
24620+ struct au_opt_xino *opt_xino;
24621+ struct au_opt *opt;
24622+ struct au_sbinfo *sbinfo;
24623+
dece6358
AM
24624+ SiMustWriteLock(sb);
24625+
7f2ca4b1 24626+ err = 0;
1facf9fc 24627+ dir = sb->s_root->d_inode;
24628+ sbinfo = au_sbi(sb);
1facf9fc 24629+ opt_xino = NULL;
24630+ opt = opts->opt;
24631+ while (err >= 0 && opt->type != Opt_tail) {
24632+ err = au_opt_simple(sb, opt, opts);
24633+ if (!err)
24634+ err = au_opt_br(sb, opt, opts);
24635+ if (!err)
24636+ err = au_opt_xino(sb, opt, &opt_xino, opts);
24637+ opt++;
24638+ }
24639+ if (err > 0)
24640+ err = 0;
24641+ AuTraceErr(err);
24642+ /* go on even err */
24643+
7f2ca4b1 24644+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
1facf9fc 24645+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
24646+ if (unlikely(rerr && !err))
24647+ err = rerr;
24648+
7f2ca4b1
JR
24649+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
24650+ au_fset_opts(opts->flags, REFRESH_IDOP);
24651+
1facf9fc 24652+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
24653+ rerr = au_xib_trunc(sb);
24654+ if (unlikely(rerr && !err))
24655+ err = rerr;
24656+ }
24657+
24658+ /* will be handled by the caller */
027c5e7a 24659+ if (!au_ftest_opts(opts->flags, REFRESH)
7f2ca4b1
JR
24660+ && (opts->given_udba
24661+ || au_opt_test(sbinfo->si_mntflags, XINO)
24662+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
24663+ ))
027c5e7a 24664+ au_fset_opts(opts->flags, REFRESH);
1facf9fc 24665+
24666+ AuDbg("status 0x%x\n", opts->flags);
24667+ return err;
24668+}
24669+
24670+/* ---------------------------------------------------------------------- */
24671+
24672+unsigned int au_opt_udba(struct super_block *sb)
24673+{
24674+ return au_mntflags(sb) & AuOptMask_UDBA;
24675+}
7f207e10
AM
24676diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
24677--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
24678+++ linux/fs/aufs/opts.h 2016-02-28 11:27:01.280579017 +0100
24679@@ -0,0 +1,212 @@
1facf9fc 24680+/*
7f2ca4b1 24681+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 24682+ *
24683+ * This program, aufs is free software; you can redistribute it and/or modify
24684+ * it under the terms of the GNU General Public License as published by
24685+ * the Free Software Foundation; either version 2 of the License, or
24686+ * (at your option) any later version.
dece6358
AM
24687+ *
24688+ * This program is distributed in the hope that it will be useful,
24689+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24690+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24691+ * GNU General Public License for more details.
24692+ *
24693+ * You should have received a copy of the GNU General Public License
523b37e3 24694+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24695+ */
24696+
24697+/*
24698+ * mount options/flags
24699+ */
24700+
24701+#ifndef __AUFS_OPTS_H__
24702+#define __AUFS_OPTS_H__
24703+
24704+#ifdef __KERNEL__
24705+
dece6358 24706+#include <linux/path.h>
1facf9fc 24707+
dece6358
AM
24708+struct file;
24709+struct super_block;
24710+
1facf9fc 24711+/* ---------------------------------------------------------------------- */
24712+
24713+/* mount flags */
24714+#define AuOpt_XINO 1 /* external inode number bitmap
24715+ and translation table */
24716+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
24717+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
24718+#define AuOpt_UDBA_REVAL (1 << 3)
4a4d8108 24719+#define AuOpt_UDBA_HNOTIFY (1 << 4)
dece6358
AM
24720+#define AuOpt_SHWH (1 << 5) /* show whiteout */
24721+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
076b876e
AM
24722+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
24723+ bits */
dece6358
AM
24724+#define AuOpt_REFROF (1 << 8) /* unimplemented */
24725+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
24726+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
24727+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
24728+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
24729+#define AuOpt_VERBOSE (1 << 13) /* busy inode when del-branch */
4a4d8108 24730+#define AuOpt_DIO (1 << 14) /* direct io */
1facf9fc 24731+
4a4d8108
AM
24732+#ifndef CONFIG_AUFS_HNOTIFY
24733+#undef AuOpt_UDBA_HNOTIFY
24734+#define AuOpt_UDBA_HNOTIFY 0
1facf9fc 24735+#endif
dece6358
AM
24736+#ifndef CONFIG_AUFS_SHWH
24737+#undef AuOpt_SHWH
24738+#define AuOpt_SHWH 0
24739+#endif
1facf9fc 24740+
24741+#define AuOpt_Def (AuOpt_XINO \
24742+ | AuOpt_UDBA_REVAL \
24743+ | AuOpt_PLINK \
24744+ /* | AuOpt_DIRPERM1 */ \
24745+ | AuOpt_WARN_PERM)
24746+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
24747+ | AuOpt_UDBA_REVAL \
4a4d8108 24748+ | AuOpt_UDBA_HNOTIFY)
1facf9fc 24749+
24750+#define au_opt_test(flags, name) (flags & AuOpt_##name)
24751+#define au_opt_set(flags, name) do { \
24752+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
24753+ ((flags) |= AuOpt_##name); \
24754+} while (0)
24755+#define au_opt_set_udba(flags, name) do { \
24756+ (flags) &= ~AuOptMask_UDBA; \
24757+ ((flags) |= AuOpt_##name); \
24758+} while (0)
7f207e10
AM
24759+#define au_opt_clr(flags, name) do { \
24760+ ((flags) &= ~AuOpt_##name); \
24761+} while (0)
1facf9fc 24762+
e49829fe
JR
24763+static inline unsigned int au_opts_plink(unsigned int mntflags)
24764+{
24765+#ifdef CONFIG_PROC_FS
24766+ return mntflags;
24767+#else
24768+ return mntflags & ~AuOpt_PLINK;
24769+#endif
24770+}
24771+
1facf9fc 24772+/* ---------------------------------------------------------------------- */
24773+
24774+/* policies to select one among multiple writable branches */
24775+enum {
24776+ AuWbrCreate_TDP, /* top down parent */
24777+ AuWbrCreate_RR, /* round robin */
24778+ AuWbrCreate_MFS, /* most free space */
24779+ AuWbrCreate_MFSV, /* mfs with seconds */
24780+ AuWbrCreate_MFSRR, /* mfs then rr */
24781+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
24782+ AuWbrCreate_PMFS, /* parent and mfs */
24783+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
392086de
AM
24784+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
24785+ AuWbrCreate_PMFSRRV, /* plus seconds */
1facf9fc 24786+
24787+ AuWbrCreate_Def = AuWbrCreate_TDP
24788+};
24789+
24790+enum {
24791+ AuWbrCopyup_TDP, /* top down parent */
24792+ AuWbrCopyup_BUP, /* bottom up parent */
24793+ AuWbrCopyup_BU, /* bottom up */
24794+
24795+ AuWbrCopyup_Def = AuWbrCopyup_TDP
24796+};
24797+
24798+/* ---------------------------------------------------------------------- */
24799+
24800+struct au_opt_add {
24801+ aufs_bindex_t bindex;
24802+ char *pathname;
24803+ int perm;
24804+ struct path path;
24805+};
24806+
24807+struct au_opt_del {
24808+ char *pathname;
24809+ struct path h_path;
24810+};
24811+
24812+struct au_opt_mod {
24813+ char *path;
24814+ int perm;
24815+ struct dentry *h_root;
24816+};
24817+
24818+struct au_opt_xino {
24819+ char *path;
24820+ struct file *file;
24821+};
24822+
24823+struct au_opt_xino_itrunc {
24824+ aufs_bindex_t bindex;
24825+};
24826+
24827+struct au_opt_wbr_create {
24828+ int wbr_create;
24829+ int mfs_second;
24830+ unsigned long long mfsrr_watermark;
24831+};
24832+
24833+struct au_opt {
24834+ int type;
24835+ union {
24836+ struct au_opt_xino xino;
24837+ struct au_opt_xino_itrunc xino_itrunc;
24838+ struct au_opt_add add;
24839+ struct au_opt_del del;
24840+ struct au_opt_mod mod;
24841+ int dirwh;
24842+ int rdcache;
24843+ unsigned int rdblk;
24844+ unsigned int rdhash;
24845+ int udba;
24846+ struct au_opt_wbr_create wbr_create;
24847+ int wbr_copyup;
076b876e 24848+ unsigned int fhsm_second;
1facf9fc 24849+ };
24850+};
24851+
24852+/* opts flags */
24853+#define AuOpts_REMOUNT 1
027c5e7a
AM
24854+#define AuOpts_REFRESH (1 << 1)
24855+#define AuOpts_TRUNC_XIB (1 << 2)
24856+#define AuOpts_REFRESH_DYAOP (1 << 3)
7f2ca4b1 24857+#define AuOpts_REFRESH_IDOP (1 << 4)
1facf9fc 24858+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
7f207e10
AM
24859+#define au_fset_opts(flags, name) \
24860+ do { (flags) |= AuOpts_##name; } while (0)
24861+#define au_fclr_opts(flags, name) \
24862+ do { (flags) &= ~AuOpts_##name; } while (0)
1facf9fc 24863+
24864+struct au_opts {
24865+ struct au_opt *opt;
24866+ int max_opt;
24867+
24868+ unsigned int given_udba;
24869+ unsigned int flags;
24870+ unsigned long sb_flags;
24871+};
24872+
24873+/* ---------------------------------------------------------------------- */
24874+
7f2ca4b1 24875+/* opts.c */
076b876e 24876+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
1facf9fc 24877+const char *au_optstr_udba(int udba);
24878+const char *au_optstr_wbr_copyup(int wbr_copyup);
24879+const char *au_optstr_wbr_create(int wbr_create);
24880+
24881+void au_opts_free(struct au_opts *opts);
24882+int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
24883+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
24884+ unsigned int pending);
24885+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
24886+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
24887+
24888+unsigned int au_opt_udba(struct super_block *sb);
24889+
1facf9fc 24890+#endif /* __KERNEL__ */
24891+#endif /* __AUFS_OPTS_H__ */
7f207e10
AM
24892diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
24893--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 24894+++ linux/fs/aufs/plink.c 2016-02-28 11:27:01.280579017 +0100
523b37e3 24895@@ -0,0 +1,532 @@
1facf9fc 24896+/*
7f2ca4b1 24897+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 24898+ *
24899+ * This program, aufs is free software; you can redistribute it and/or modify
24900+ * it under the terms of the GNU General Public License as published by
24901+ * the Free Software Foundation; either version 2 of the License, or
24902+ * (at your option) any later version.
dece6358
AM
24903+ *
24904+ * This program is distributed in the hope that it will be useful,
24905+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
24906+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24907+ * GNU General Public License for more details.
24908+ *
24909+ * You should have received a copy of the GNU General Public License
523b37e3 24910+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 24911+ */
24912+
24913+/*
24914+ * pseudo-link
24915+ */
24916+
24917+#include "aufs.h"
24918+
24919+/*
e49829fe 24920+ * the pseudo-link maintenance mode.
1facf9fc 24921+ * during a user process maintains the pseudo-links,
24922+ * prohibit adding a new plink and branch manipulation.
e49829fe
JR
24923+ *
24924+ * Flags
24925+ * NOPLM:
24926+ * For entry functions which will handle plink, and i_mutex is already held
24927+ * in VFS.
24928+ * They cannot wait and should return an error at once.
24929+ * Callers has to check the error.
24930+ * NOPLMW:
24931+ * For entry functions which will handle plink, but i_mutex is not held
24932+ * in VFS.
24933+ * They can wait the plink maintenance mode to finish.
24934+ *
24935+ * They behave like F_SETLK and F_SETLKW.
24936+ * If the caller never handle plink, then both flags are unnecessary.
1facf9fc 24937+ */
e49829fe
JR
24938+
24939+int au_plink_maint(struct super_block *sb, int flags)
1facf9fc 24940+{
e49829fe
JR
24941+ int err;
24942+ pid_t pid, ppid;
24943+ struct au_sbinfo *sbi;
dece6358
AM
24944+
24945+ SiMustAnyLock(sb);
24946+
e49829fe
JR
24947+ err = 0;
24948+ if (!au_opt_test(au_mntflags(sb), PLINK))
24949+ goto out;
24950+
24951+ sbi = au_sbi(sb);
24952+ pid = sbi->si_plink_maint_pid;
24953+ if (!pid || pid == current->pid)
24954+ goto out;
24955+
24956+ /* todo: it highly depends upon /sbin/mount.aufs */
24957+ rcu_read_lock();
24958+ ppid = task_pid_vnr(rcu_dereference(current->real_parent));
24959+ rcu_read_unlock();
24960+ if (pid == ppid)
24961+ goto out;
24962+
24963+ if (au_ftest_lock(flags, NOPLMW)) {
027c5e7a
AM
24964+ /* if there is no i_mutex lock in VFS, we don't need to wait */
24965+ /* AuDebugOn(!lockdep_depth(current)); */
e49829fe
JR
24966+ while (sbi->si_plink_maint_pid) {
24967+ si_read_unlock(sb);
24968+ /* gave up wake_up_bit() */
24969+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
24970+
24971+ if (au_ftest_lock(flags, FLUSH))
24972+ au_nwt_flush(&sbi->si_nowait);
24973+ si_noflush_read_lock(sb);
24974+ }
24975+ } else if (au_ftest_lock(flags, NOPLM)) {
24976+ AuDbg("ppid %d, pid %d\n", ppid, pid);
24977+ err = -EAGAIN;
24978+ }
24979+
24980+out:
24981+ return err;
4a4d8108
AM
24982+}
24983+
e49829fe 24984+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
4a4d8108 24985+{
4a4d8108 24986+ spin_lock(&sbinfo->si_plink_maint_lock);
027c5e7a 24987+ sbinfo->si_plink_maint_pid = 0;
4a4d8108 24988+ spin_unlock(&sbinfo->si_plink_maint_lock);
027c5e7a 24989+ wake_up_all(&sbinfo->si_plink_wq);
4a4d8108
AM
24990+}
24991+
e49829fe 24992+int au_plink_maint_enter(struct super_block *sb)
4a4d8108
AM
24993+{
24994+ int err;
4a4d8108
AM
24995+ struct au_sbinfo *sbinfo;
24996+
24997+ err = 0;
4a4d8108
AM
24998+ sbinfo = au_sbi(sb);
24999+ /* make sure i am the only one in this fs */
e49829fe
JR
25000+ si_write_lock(sb, AuLock_FLUSH);
25001+ if (au_opt_test(au_mntflags(sb), PLINK)) {
25002+ spin_lock(&sbinfo->si_plink_maint_lock);
25003+ if (!sbinfo->si_plink_maint_pid)
25004+ sbinfo->si_plink_maint_pid = current->pid;
25005+ else
25006+ err = -EBUSY;
25007+ spin_unlock(&sbinfo->si_plink_maint_lock);
25008+ }
4a4d8108
AM
25009+ si_write_unlock(sb);
25010+
25011+ return err;
1facf9fc 25012+}
25013+
25014+/* ---------------------------------------------------------------------- */
25015+
1facf9fc 25016+#ifdef CONFIG_AUFS_DEBUG
25017+void au_plink_list(struct super_block *sb)
25018+{
86dc4139 25019+ int i;
1facf9fc 25020+ struct au_sbinfo *sbinfo;
86dc4139 25021+ struct hlist_head *plink_hlist;
1facf9fc 25022+ struct pseudo_link *plink;
25023+
dece6358
AM
25024+ SiMustAnyLock(sb);
25025+
1facf9fc 25026+ sbinfo = au_sbi(sb);
25027+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25028+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25029+
86dc4139
AM
25030+ for (i = 0; i < AuPlink_NHASH; i++) {
25031+ plink_hlist = &sbinfo->si_plink[i].head;
25032+ rcu_read_lock();
25033+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
25034+ AuDbg("%lu\n", plink->inode->i_ino);
25035+ rcu_read_unlock();
25036+ }
1facf9fc 25037+}
25038+#endif
25039+
25040+/* is the inode pseudo-linked? */
25041+int au_plink_test(struct inode *inode)
25042+{
86dc4139 25043+ int found, i;
1facf9fc 25044+ struct au_sbinfo *sbinfo;
86dc4139 25045+ struct hlist_head *plink_hlist;
1facf9fc 25046+ struct pseudo_link *plink;
25047+
25048+ sbinfo = au_sbi(inode->i_sb);
dece6358 25049+ AuRwMustAnyLock(&sbinfo->si_rwsem);
1facf9fc 25050+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
e49829fe 25051+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
1facf9fc 25052+
25053+ found = 0;
86dc4139
AM
25054+ i = au_plink_hash(inode->i_ino);
25055+ plink_hlist = &sbinfo->si_plink[i].head;
4a4d8108 25056+ rcu_read_lock();
86dc4139 25057+ hlist_for_each_entry_rcu(plink, plink_hlist, hlist)
1facf9fc 25058+ if (plink->inode == inode) {
25059+ found = 1;
25060+ break;
25061+ }
4a4d8108 25062+ rcu_read_unlock();
1facf9fc 25063+ return found;
25064+}
25065+
25066+/* ---------------------------------------------------------------------- */
25067+
25068+/*
25069+ * generate a name for plink.
25070+ * the file will be stored under AUFS_WH_PLINKDIR.
25071+ */
25072+/* 20 is max digits length of ulong 64 */
25073+#define PLINK_NAME_LEN ((20 + 1) * 2)
25074+
25075+static int plink_name(char *name, int len, struct inode *inode,
25076+ aufs_bindex_t bindex)
25077+{
25078+ int rlen;
25079+ struct inode *h_inode;
25080+
25081+ h_inode = au_h_iptr(inode, bindex);
25082+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
25083+ return rlen;
25084+}
25085+
7f207e10
AM
25086+struct au_do_plink_lkup_args {
25087+ struct dentry **errp;
25088+ struct qstr *tgtname;
25089+ struct dentry *h_parent;
25090+ struct au_branch *br;
25091+};
25092+
25093+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
25094+ struct dentry *h_parent,
25095+ struct au_branch *br)
25096+{
25097+ struct dentry *h_dentry;
25098+ struct mutex *h_mtx;
25099+
25100+ h_mtx = &h_parent->d_inode->i_mutex;
25101+ mutex_lock_nested(h_mtx, AuLsc_I_CHILD2);
b4510431 25102+ h_dentry = vfsub_lkup_one(tgtname, h_parent);
7f207e10
AM
25103+ mutex_unlock(h_mtx);
25104+ return h_dentry;
25105+}
25106+
25107+static void au_call_do_plink_lkup(void *args)
25108+{
25109+ struct au_do_plink_lkup_args *a = args;
25110+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
25111+}
25112+
1facf9fc 25113+/* lookup the plink-ed @inode under the branch at @bindex */
25114+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
25115+{
25116+ struct dentry *h_dentry, *h_parent;
25117+ struct au_branch *br;
25118+ struct inode *h_dir;
7f207e10 25119+ int wkq_err;
1facf9fc 25120+ char a[PLINK_NAME_LEN];
0c3ec466 25121+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25122+
e49829fe
JR
25123+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
25124+
1facf9fc 25125+ br = au_sbr(inode->i_sb, bindex);
25126+ h_parent = br->br_wbr->wbr_plink;
25127+ h_dir = h_parent->d_inode;
25128+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25129+
2dfbb274 25130+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
7f207e10
AM
25131+ struct au_do_plink_lkup_args args = {
25132+ .errp = &h_dentry,
25133+ .tgtname = &tgtname,
25134+ .h_parent = h_parent,
25135+ .br = br
25136+ };
25137+
25138+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
25139+ if (unlikely(wkq_err))
25140+ h_dentry = ERR_PTR(wkq_err);
25141+ } else
25142+ h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
25143+
1facf9fc 25144+ return h_dentry;
25145+}
25146+
25147+/* create a pseudo-link */
25148+static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
25149+ struct dentry *h_dentry, struct au_branch *br)
25150+{
25151+ int err;
25152+ struct path h_path = {
86dc4139 25153+ .mnt = au_br_mnt(br)
1facf9fc 25154+ };
523b37e3 25155+ struct inode *h_dir, *delegated;
1facf9fc 25156+
25157+ h_dir = h_parent->d_inode;
7f207e10 25158+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_CHILD2);
4f0767ce 25159+again:
b4510431 25160+ h_path.dentry = vfsub_lkup_one(tgt, h_parent);
1facf9fc 25161+ err = PTR_ERR(h_path.dentry);
25162+ if (IS_ERR(h_path.dentry))
25163+ goto out;
25164+
25165+ err = 0;
25166+ /* wh.plink dir is not monitored */
7f207e10 25167+ /* todo: is it really safe? */
1facf9fc 25168+ if (h_path.dentry->d_inode
25169+ && h_path.dentry->d_inode != h_dentry->d_inode) {
523b37e3
AM
25170+ delegated = NULL;
25171+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
25172+ if (unlikely(err == -EWOULDBLOCK)) {
25173+ pr_warn("cannot retry for NFSv4 delegation"
25174+ " for an internal unlink\n");
25175+ iput(delegated);
25176+ }
1facf9fc 25177+ dput(h_path.dentry);
25178+ h_path.dentry = NULL;
25179+ if (!err)
25180+ goto again;
25181+ }
523b37e3
AM
25182+ if (!err && !h_path.dentry->d_inode) {
25183+ delegated = NULL;
25184+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
25185+ if (unlikely(err == -EWOULDBLOCK)) {
25186+ pr_warn("cannot retry for NFSv4 delegation"
25187+ " for an internal link\n");
25188+ iput(delegated);
25189+ }
25190+ }
1facf9fc 25191+ dput(h_path.dentry);
25192+
4f0767ce 25193+out:
7f207e10 25194+ mutex_unlock(&h_dir->i_mutex);
1facf9fc 25195+ return err;
25196+}
25197+
25198+struct do_whplink_args {
25199+ int *errp;
25200+ struct qstr *tgt;
25201+ struct dentry *h_parent;
25202+ struct dentry *h_dentry;
25203+ struct au_branch *br;
25204+};
25205+
25206+static void call_do_whplink(void *args)
25207+{
25208+ struct do_whplink_args *a = args;
25209+ *a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
25210+}
25211+
25212+static int whplink(struct dentry *h_dentry, struct inode *inode,
25213+ aufs_bindex_t bindex, struct au_branch *br)
25214+{
25215+ int err, wkq_err;
25216+ struct au_wbr *wbr;
25217+ struct dentry *h_parent;
25218+ struct inode *h_dir;
25219+ char a[PLINK_NAME_LEN];
0c3ec466 25220+ struct qstr tgtname = QSTR_INIT(a, 0);
1facf9fc 25221+
25222+ wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
25223+ h_parent = wbr->wbr_plink;
25224+ h_dir = h_parent->d_inode;
25225+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
25226+
25227+ /* always superio. */
2dfbb274 25228+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
1facf9fc 25229+ struct do_whplink_args args = {
25230+ .errp = &err,
25231+ .tgt = &tgtname,
25232+ .h_parent = h_parent,
25233+ .h_dentry = h_dentry,
25234+ .br = br
25235+ };
25236+ wkq_err = au_wkq_wait(call_do_whplink, &args);
25237+ if (unlikely(wkq_err))
25238+ err = wkq_err;
25239+ } else
25240+ err = do_whplink(&tgtname, h_parent, h_dentry, br);
1facf9fc 25241+
25242+ return err;
25243+}
25244+
25245+/* free a single plink */
25246+static void do_put_plink(struct pseudo_link *plink, int do_del)
25247+{
1facf9fc 25248+ if (do_del)
86dc4139 25249+ hlist_del(&plink->hlist);
4a4d8108
AM
25250+ iput(plink->inode);
25251+ kfree(plink);
25252+}
25253+
25254+static void do_put_plink_rcu(struct rcu_head *rcu)
25255+{
25256+ struct pseudo_link *plink;
25257+
25258+ plink = container_of(rcu, struct pseudo_link, rcu);
25259+ iput(plink->inode);
1facf9fc 25260+ kfree(plink);
25261+}
25262+
25263+/*
25264+ * create a new pseudo-link for @h_dentry on @bindex.
25265+ * the linked inode is held in aufs @inode.
25266+ */
25267+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
25268+ struct dentry *h_dentry)
25269+{
25270+ struct super_block *sb;
25271+ struct au_sbinfo *sbinfo;
86dc4139 25272+ struct hlist_head *plink_hlist;
4a4d8108 25273+ struct pseudo_link *plink, *tmp;
86dc4139
AM
25274+ struct au_sphlhead *sphl;
25275+ int found, err, cnt, i;
1facf9fc 25276+
25277+ sb = inode->i_sb;
25278+ sbinfo = au_sbi(sb);
25279+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25280+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25281+
86dc4139 25282+ found = au_plink_test(inode);
4a4d8108 25283+ if (found)
1facf9fc 25284+ return;
4a4d8108 25285+
86dc4139
AM
25286+ i = au_plink_hash(inode->i_ino);
25287+ sphl = sbinfo->si_plink + i;
25288+ plink_hlist = &sphl->head;
4a4d8108
AM
25289+ tmp = kmalloc(sizeof(*plink), GFP_NOFS);
25290+ if (tmp)
25291+ tmp->inode = au_igrab(inode);
25292+ else {
25293+ err = -ENOMEM;
25294+ goto out;
1facf9fc 25295+ }
25296+
86dc4139
AM
25297+ spin_lock(&sphl->spin);
25298+ hlist_for_each_entry(plink, plink_hlist, hlist) {
4a4d8108
AM
25299+ if (plink->inode == inode) {
25300+ found = 1;
25301+ break;
25302+ }
1facf9fc 25303+ }
4a4d8108 25304+ if (!found)
86dc4139
AM
25305+ hlist_add_head_rcu(&tmp->hlist, plink_hlist);
25306+ spin_unlock(&sphl->spin);
4a4d8108 25307+ if (!found) {
86dc4139
AM
25308+ cnt = au_sphl_count(sphl);
25309+#define msg "unexpectedly unblanced or too many pseudo-links"
25310+ if (cnt > AUFS_PLINK_WARN)
25311+ AuWarn1(msg ", %d\n", cnt);
25312+#undef msg
1facf9fc 25313+ err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
4a4d8108
AM
25314+ } else {
25315+ do_put_plink(tmp, 0);
25316+ return;
1facf9fc 25317+ }
25318+
4a4d8108 25319+out:
1facf9fc 25320+ if (unlikely(err)) {
0c3ec466 25321+ pr_warn("err %d, damaged pseudo link.\n", err);
4a4d8108 25322+ if (tmp) {
86dc4139 25323+ au_sphl_del_rcu(&tmp->hlist, sphl);
4a4d8108
AM
25324+ call_rcu(&tmp->rcu, do_put_plink_rcu);
25325+ }
1facf9fc 25326+ }
25327+}
25328+
25329+/* free all plinks */
e49829fe 25330+void au_plink_put(struct super_block *sb, int verbose)
1facf9fc 25331+{
86dc4139 25332+ int i, warned;
1facf9fc 25333+ struct au_sbinfo *sbinfo;
86dc4139
AM
25334+ struct hlist_head *plink_hlist;
25335+ struct hlist_node *tmp;
25336+ struct pseudo_link *plink;
1facf9fc 25337+
dece6358
AM
25338+ SiMustWriteLock(sb);
25339+
1facf9fc 25340+ sbinfo = au_sbi(sb);
25341+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25342+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25343+
1facf9fc 25344+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25345+ warned = 0;
25346+ for (i = 0; i < AuPlink_NHASH; i++) {
25347+ plink_hlist = &sbinfo->si_plink[i].head;
25348+ if (!warned && verbose && !hlist_empty(plink_hlist)) {
25349+ pr_warn("pseudo-link is not flushed");
25350+ warned = 1;
25351+ }
25352+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist)
25353+ do_put_plink(plink, 0);
25354+ INIT_HLIST_HEAD(plink_hlist);
25355+ }
1facf9fc 25356+}
25357+
e49829fe
JR
25358+void au_plink_clean(struct super_block *sb, int verbose)
25359+{
25360+ struct dentry *root;
25361+
25362+ root = sb->s_root;
25363+ aufs_write_lock(root);
25364+ if (au_opt_test(au_mntflags(sb), PLINK))
25365+ au_plink_put(sb, verbose);
25366+ aufs_write_unlock(root);
25367+}
25368+
86dc4139
AM
25369+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
25370+{
25371+ int do_put;
25372+ aufs_bindex_t bstart, bend, bindex;
25373+
25374+ do_put = 0;
25375+ bstart = au_ibstart(inode);
25376+ bend = au_ibend(inode);
25377+ if (bstart >= 0) {
25378+ for (bindex = bstart; bindex <= bend; bindex++) {
25379+ if (!au_h_iptr(inode, bindex)
25380+ || au_ii_br_id(inode, bindex) != br_id)
25381+ continue;
25382+ au_set_h_iptr(inode, bindex, NULL, 0);
25383+ do_put = 1;
25384+ break;
25385+ }
25386+ if (do_put)
25387+ for (bindex = bstart; bindex <= bend; bindex++)
25388+ if (au_h_iptr(inode, bindex)) {
25389+ do_put = 0;
25390+ break;
25391+ }
25392+ } else
25393+ do_put = 1;
25394+
25395+ return do_put;
25396+}
25397+
1facf9fc 25398+/* free the plinks on a branch specified by @br_id */
25399+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
25400+{
25401+ struct au_sbinfo *sbinfo;
86dc4139
AM
25402+ struct hlist_head *plink_hlist;
25403+ struct hlist_node *tmp;
25404+ struct pseudo_link *plink;
1facf9fc 25405+ struct inode *inode;
86dc4139 25406+ int i, do_put;
1facf9fc 25407+
dece6358
AM
25408+ SiMustWriteLock(sb);
25409+
1facf9fc 25410+ sbinfo = au_sbi(sb);
25411+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
e49829fe 25412+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
1facf9fc 25413+
1facf9fc 25414+ /* no spin_lock since sbinfo is write-locked */
86dc4139
AM
25415+ for (i = 0; i < AuPlink_NHASH; i++) {
25416+ plink_hlist = &sbinfo->si_plink[i].head;
25417+ hlist_for_each_entry_safe(plink, tmp, plink_hlist, hlist) {
25418+ inode = au_igrab(plink->inode);
25419+ ii_write_lock_child(inode);
25420+ do_put = au_plink_do_half_refresh(inode, br_id);
dece6358
AM
25421+ if (do_put)
25422+ do_put_plink(plink, 1);
86dc4139
AM
25423+ ii_write_unlock(inode);
25424+ iput(inode);
dece6358 25425+ }
dece6358
AM
25426+ }
25427+}
7f207e10
AM
25428diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
25429--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
25430+++ linux/fs/aufs/poll.c 2016-02-28 11:27:01.280579017 +0100
25431@@ -0,0 +1,52 @@
dece6358 25432+/*
7f2ca4b1 25433+ * Copyright (C) 2005-2016 Junjiro R. Okajima
dece6358
AM
25434+ *
25435+ * This program, aufs is free software; you can redistribute it and/or modify
25436+ * it under the terms of the GNU General Public License as published by
25437+ * the Free Software Foundation; either version 2 of the License, or
25438+ * (at your option) any later version.
25439+ *
25440+ * This program is distributed in the hope that it will be useful,
25441+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25442+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25443+ * GNU General Public License for more details.
25444+ *
25445+ * You should have received a copy of the GNU General Public License
523b37e3 25446+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
dece6358
AM
25447+ */
25448+
1308ab2a 25449+/*
25450+ * poll operation
25451+ * There is only one filesystem which implements ->poll operation, currently.
25452+ */
25453+
25454+#include "aufs.h"
25455+
25456+unsigned int aufs_poll(struct file *file, poll_table *wait)
25457+{
25458+ unsigned int mask;
25459+ int err;
25460+ struct file *h_file;
1308ab2a 25461+ struct super_block *sb;
25462+
25463+ /* We should pretend an error happened. */
25464+ mask = POLLERR /* | POLLIN | POLLOUT */;
7f2ca4b1 25465+ sb = file->f_dentry->d_sb;
e49829fe 25466+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
7f2ca4b1
JR
25467+
25468+ h_file = au_read_pre(file, /*keep_fi*/0);
25469+ err = PTR_ERR(h_file);
25470+ if (IS_ERR(h_file))
1308ab2a 25471+ goto out;
25472+
25473+ /* it is not an error if h_file has no operation */
25474+ mask = DEFAULT_POLLMASK;
523b37e3 25475+ if (h_file->f_op->poll)
1308ab2a 25476+ mask = h_file->f_op->poll(h_file, wait);
7f2ca4b1 25477+ fput(h_file); /* instead of au_read_post() */
1308ab2a 25478+
4f0767ce 25479+out:
1308ab2a 25480+ si_read_unlock(sb);
25481+ AuTraceErr((int)mask);
25482+ return mask;
25483+}
c1595e42
JR
25484diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
25485--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
25486+++ linux/fs/aufs/posix_acl.c 2016-02-28 11:27:01.280579017 +0100
25487@@ -0,0 +1,98 @@
c1595e42 25488+/*
7f2ca4b1 25489+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
25490+ *
25491+ * This program, aufs is free software; you can redistribute it and/or modify
25492+ * it under the terms of the GNU General Public License as published by
25493+ * the Free Software Foundation; either version 2 of the License, or
25494+ * (at your option) any later version.
25495+ *
25496+ * This program is distributed in the hope that it will be useful,
25497+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25498+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25499+ * GNU General Public License for more details.
25500+ *
25501+ * You should have received a copy of the GNU General Public License
25502+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25503+ */
25504+
25505+/*
25506+ * posix acl operations
25507+ */
25508+
25509+#include <linux/fs.h>
c1595e42
JR
25510+#include "aufs.h"
25511+
25512+struct posix_acl *aufs_get_acl(struct inode *inode, int type)
25513+{
25514+ struct posix_acl *acl;
25515+ int err;
25516+ aufs_bindex_t bindex;
25517+ struct inode *h_inode;
25518+ struct super_block *sb;
25519+
25520+ acl = NULL;
25521+ sb = inode->i_sb;
25522+ si_read_lock(sb, AuLock_FLUSH);
25523+ ii_read_lock_child(inode);
25524+ if (!(sb->s_flags & MS_POSIXACL))
25525+ goto out;
25526+
25527+ bindex = au_ibstart(inode);
25528+ h_inode = au_h_iptr(inode, bindex);
25529+ if (unlikely(!h_inode
25530+ || ((h_inode->i_mode & S_IFMT)
25531+ != (inode->i_mode & S_IFMT)))) {
25532+ err = au_busy_or_stale();
25533+ acl = ERR_PTR(err);
25534+ goto out;
25535+ }
25536+
25537+ /* always topmost only */
25538+ acl = get_acl(h_inode, type);
25539+
25540+out:
25541+ ii_read_unlock(inode);
25542+ si_read_unlock(sb);
25543+
25544+ AuTraceErrPtr(acl);
25545+ return acl;
25546+}
25547+
25548+int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
25549+{
25550+ int err;
25551+ ssize_t ssz;
25552+ struct dentry *dentry;
25553+ struct au_srxattr arg = {
25554+ .type = AU_ACL_SET,
25555+ .u.acl_set = {
25556+ .acl = acl,
25557+ .type = type
25558+ },
25559+ };
25560+
25561+ mutex_lock(&inode->i_mutex);
25562+ if (inode->i_ino == AUFS_ROOT_INO)
25563+ dentry = dget(inode->i_sb->s_root);
25564+ else {
25565+ dentry = d_find_alias(inode);
25566+ if (!dentry)
25567+ dentry = d_find_any_alias(inode);
25568+ if (!dentry) {
25569+ pr_warn("cannot handle this inode, "
25570+ "please report to aufs-users ML\n");
25571+ err = -ENOENT;
25572+ goto out;
25573+ }
25574+ }
25575+
25576+ ssz = au_srxattr(dentry, &arg);
25577+ dput(dentry);
25578+ err = ssz;
25579+ if (ssz >= 0)
25580+ err = 0;
25581+
25582+out:
25583+ mutex_unlock(&inode->i_mutex);
25584+ return err;
25585+}
7f207e10
AM
25586diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
25587--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 25588+++ linux/fs/aufs/procfs.c 2016-02-28 11:27:01.280579017 +0100
523b37e3 25589@@ -0,0 +1,169 @@
e49829fe 25590+/*
7f2ca4b1 25591+ * Copyright (C) 2010-2016 Junjiro R. Okajima
e49829fe
JR
25592+ *
25593+ * This program, aufs is free software; you can redistribute it and/or modify
25594+ * it under the terms of the GNU General Public License as published by
25595+ * the Free Software Foundation; either version 2 of the License, or
25596+ * (at your option) any later version.
25597+ *
25598+ * This program is distributed in the hope that it will be useful,
25599+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25600+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25601+ * GNU General Public License for more details.
25602+ *
25603+ * You should have received a copy of the GNU General Public License
523b37e3 25604+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
e49829fe
JR
25605+ */
25606+
25607+/*
25608+ * procfs interfaces
25609+ */
25610+
25611+#include <linux/proc_fs.h>
25612+#include "aufs.h"
25613+
25614+static int au_procfs_plm_release(struct inode *inode, struct file *file)
25615+{
25616+ struct au_sbinfo *sbinfo;
25617+
25618+ sbinfo = file->private_data;
25619+ if (sbinfo) {
25620+ au_plink_maint_leave(sbinfo);
25621+ kobject_put(&sbinfo->si_kobj);
25622+ }
25623+
25624+ return 0;
25625+}
25626+
25627+static void au_procfs_plm_write_clean(struct file *file)
25628+{
25629+ struct au_sbinfo *sbinfo;
25630+
25631+ sbinfo = file->private_data;
25632+ if (sbinfo)
25633+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
25634+}
25635+
25636+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
25637+{
25638+ int err;
25639+ struct super_block *sb;
25640+ struct au_sbinfo *sbinfo;
25641+
25642+ err = -EBUSY;
25643+ if (unlikely(file->private_data))
25644+ goto out;
25645+
25646+ sb = NULL;
53392da6 25647+ /* don't use au_sbilist_lock() here */
e49829fe
JR
25648+ spin_lock(&au_sbilist.spin);
25649+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
25650+ if (id == sysaufs_si_id(sbinfo)) {
25651+ kobject_get(&sbinfo->si_kobj);
25652+ sb = sbinfo->si_sb;
25653+ break;
25654+ }
25655+ spin_unlock(&au_sbilist.spin);
25656+
25657+ err = -EINVAL;
25658+ if (unlikely(!sb))
25659+ goto out;
25660+
25661+ err = au_plink_maint_enter(sb);
25662+ if (!err)
25663+ /* keep kobject_get() */
25664+ file->private_data = sbinfo;
25665+ else
25666+ kobject_put(&sbinfo->si_kobj);
25667+out:
25668+ return err;
25669+}
25670+
25671+/*
25672+ * Accept a valid "si=xxxx" only.
25673+ * Once it is accepted successfully, accept "clean" too.
25674+ */
25675+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
25676+ size_t count, loff_t *ppos)
25677+{
25678+ ssize_t err;
25679+ unsigned long id;
25680+ /* last newline is allowed */
25681+ char buf[3 + sizeof(unsigned long) * 2 + 1];
25682+
25683+ err = -EACCES;
25684+ if (unlikely(!capable(CAP_SYS_ADMIN)))
25685+ goto out;
25686+
25687+ err = -EINVAL;
25688+ if (unlikely(count > sizeof(buf)))
25689+ goto out;
25690+
25691+ err = copy_from_user(buf, ubuf, count);
25692+ if (unlikely(err)) {
25693+ err = -EFAULT;
25694+ goto out;
25695+ }
25696+ buf[count] = 0;
25697+
25698+ err = -EINVAL;
25699+ if (!strcmp("clean", buf)) {
25700+ au_procfs_plm_write_clean(file);
25701+ goto out_success;
25702+ } else if (unlikely(strncmp("si=", buf, 3)))
25703+ goto out;
25704+
9dbd164d 25705+ err = kstrtoul(buf + 3, 16, &id);
e49829fe
JR
25706+ if (unlikely(err))
25707+ goto out;
25708+
25709+ err = au_procfs_plm_write_si(file, id);
25710+ if (unlikely(err))
25711+ goto out;
25712+
25713+out_success:
25714+ err = count; /* success */
25715+out:
25716+ return err;
25717+}
25718+
25719+static const struct file_operations au_procfs_plm_fop = {
25720+ .write = au_procfs_plm_write,
25721+ .release = au_procfs_plm_release,
25722+ .owner = THIS_MODULE
25723+};
25724+
25725+/* ---------------------------------------------------------------------- */
25726+
25727+static struct proc_dir_entry *au_procfs_dir;
25728+
25729+void au_procfs_fin(void)
25730+{
25731+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
25732+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25733+}
25734+
25735+int __init au_procfs_init(void)
25736+{
25737+ int err;
25738+ struct proc_dir_entry *entry;
25739+
25740+ err = -ENOMEM;
25741+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
25742+ if (unlikely(!au_procfs_dir))
25743+ goto out;
25744+
25745+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | S_IWUSR,
25746+ au_procfs_dir, &au_procfs_plm_fop);
25747+ if (unlikely(!entry))
25748+ goto out_dir;
25749+
25750+ err = 0;
25751+ goto out; /* success */
25752+
25753+
25754+out_dir:
25755+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
25756+out:
25757+ return err;
25758+}
7f207e10
AM
25759diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
25760--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 25761+++ linux/fs/aufs/rdu.c 2016-02-28 11:27:01.280579017 +0100
523b37e3 25762@@ -0,0 +1,388 @@
1308ab2a 25763+/*
7f2ca4b1 25764+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1308ab2a 25765+ *
25766+ * This program, aufs is free software; you can redistribute it and/or modify
25767+ * it under the terms of the GNU General Public License as published by
25768+ * the Free Software Foundation; either version 2 of the License, or
25769+ * (at your option) any later version.
25770+ *
25771+ * This program is distributed in the hope that it will be useful,
25772+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25773+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25774+ * GNU General Public License for more details.
25775+ *
25776+ * You should have received a copy of the GNU General Public License
523b37e3 25777+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1308ab2a 25778+ */
25779+
25780+/*
25781+ * readdir in userspace.
25782+ */
25783+
b752ccd1 25784+#include <linux/compat.h>
4a4d8108 25785+#include <linux/fs_stack.h>
1308ab2a 25786+#include <linux/security.h>
1308ab2a 25787+#include "aufs.h"
25788+
25789+/* bits for struct aufs_rdu.flags */
25790+#define AuRdu_CALLED 1
25791+#define AuRdu_CONT (1 << 1)
25792+#define AuRdu_FULL (1 << 2)
25793+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
7f207e10
AM
25794+#define au_fset_rdu(flags, name) \
25795+ do { (flags) |= AuRdu_##name; } while (0)
25796+#define au_fclr_rdu(flags, name) \
25797+ do { (flags) &= ~AuRdu_##name; } while (0)
1308ab2a 25798+
25799+struct au_rdu_arg {
392086de 25800+ struct dir_context ctx;
1308ab2a 25801+ struct aufs_rdu *rdu;
25802+ union au_rdu_ent_ul ent;
25803+ unsigned long end;
25804+
25805+ struct super_block *sb;
25806+ int err;
25807+};
25808+
392086de 25809+static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
1308ab2a 25810+ loff_t offset, u64 h_ino, unsigned int d_type)
25811+{
25812+ int err, len;
392086de 25813+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
1308ab2a 25814+ struct aufs_rdu *rdu = arg->rdu;
25815+ struct au_rdu_ent ent;
25816+
25817+ err = 0;
25818+ arg->err = 0;
25819+ au_fset_rdu(rdu->cookie.flags, CALLED);
25820+ len = au_rdu_len(nlen);
25821+ if (arg->ent.ul + len < arg->end) {
25822+ ent.ino = h_ino;
25823+ ent.bindex = rdu->cookie.bindex;
25824+ ent.type = d_type;
25825+ ent.nlen = nlen;
4a4d8108
AM
25826+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
25827+ ent.type = DT_UNKNOWN;
1308ab2a 25828+
9dbd164d 25829+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 25830+ err = -EFAULT;
25831+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
25832+ goto out;
25833+ if (copy_to_user(arg->ent.e->name, name, nlen))
25834+ goto out;
25835+ /* the terminating NULL */
25836+ if (__put_user(0, arg->ent.e->name + nlen))
25837+ goto out;
25838+ err = 0;
25839+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
25840+ arg->ent.ul += len;
25841+ rdu->rent++;
25842+ } else {
25843+ err = -EFAULT;
25844+ au_fset_rdu(rdu->cookie.flags, FULL);
25845+ rdu->full = 1;
25846+ rdu->tail = arg->ent;
25847+ }
25848+
4f0767ce 25849+out:
1308ab2a 25850+ /* AuTraceErr(err); */
25851+ return err;
25852+}
25853+
25854+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
25855+{
25856+ int err;
25857+ loff_t offset;
25858+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
25859+
92d182d2 25860+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
1308ab2a 25861+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
25862+ err = offset;
25863+ if (unlikely(offset != cookie->h_pos))
25864+ goto out;
25865+
25866+ err = 0;
25867+ do {
25868+ arg->err = 0;
25869+ au_fclr_rdu(cookie->flags, CALLED);
25870+ /* smp_mb(); */
392086de 25871+ err = vfsub_iterate_dir(h_file, &arg->ctx);
1308ab2a 25872+ if (err >= 0)
25873+ err = arg->err;
25874+ } while (!err
25875+ && au_ftest_rdu(cookie->flags, CALLED)
25876+ && !au_ftest_rdu(cookie->flags, FULL));
25877+ cookie->h_pos = h_file->f_pos;
25878+
4f0767ce 25879+out:
1308ab2a 25880+ AuTraceErr(err);
25881+ return err;
25882+}
25883+
25884+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
25885+{
25886+ int err;
25887+ aufs_bindex_t bend;
392086de
AM
25888+ struct au_rdu_arg arg = {
25889+ .ctx = {
25890+ .actor = au_diractor(au_rdu_fill)
25891+ }
25892+ };
1308ab2a 25893+ struct dentry *dentry;
25894+ struct inode *inode;
25895+ struct file *h_file;
25896+ struct au_rdu_cookie *cookie = &rdu->cookie;
25897+
25898+ err = !access_ok(VERIFY_WRITE, rdu->ent.e, rdu->sz);
25899+ if (unlikely(err)) {
25900+ err = -EFAULT;
25901+ AuTraceErr(err);
25902+ goto out;
25903+ }
25904+ rdu->rent = 0;
25905+ rdu->tail = rdu->ent;
25906+ rdu->full = 0;
25907+ arg.rdu = rdu;
25908+ arg.ent = rdu->ent;
25909+ arg.end = arg.ent.ul;
25910+ arg.end += rdu->sz;
25911+
25912+ err = -ENOTDIR;
523b37e3 25913+ if (unlikely(!file->f_op->iterate))
1308ab2a 25914+ goto out;
25915+
25916+ err = security_file_permission(file, MAY_READ);
25917+ AuTraceErr(err);
25918+ if (unlikely(err))
25919+ goto out;
25920+
25921+ dentry = file->f_dentry;
25922+ inode = dentry->d_inode;
25923+#if 1
25924+ mutex_lock(&inode->i_mutex);
25925+#else
25926+ err = mutex_lock_killable(&inode->i_mutex);
25927+ AuTraceErr(err);
25928+ if (unlikely(err))
25929+ goto out;
25930+#endif
1308ab2a 25931+
25932+ arg.sb = inode->i_sb;
e49829fe
JR
25933+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
25934+ if (unlikely(err))
25935+ goto out_mtx;
027c5e7a
AM
25936+ err = au_alive_dir(dentry);
25937+ if (unlikely(err))
25938+ goto out_si;
e49829fe 25939+ /* todo: reval? */
1308ab2a 25940+ fi_read_lock(file);
25941+
25942+ err = -EAGAIN;
25943+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
25944+ && cookie->generation != au_figen(file)))
25945+ goto out_unlock;
25946+
25947+ err = 0;
25948+ if (!rdu->blk) {
25949+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
25950+ if (!rdu->blk)
25951+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
25952+ }
25953+ bend = au_fbstart(file);
25954+ if (cookie->bindex < bend)
25955+ cookie->bindex = bend;
4a4d8108 25956+ bend = au_fbend_dir(file);
1308ab2a 25957+ /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
25958+ for (; !err && cookie->bindex <= bend;
25959+ cookie->bindex++, cookie->h_pos = 0) {
4a4d8108 25960+ h_file = au_hf_dir(file, cookie->bindex);
1308ab2a 25961+ if (!h_file)
25962+ continue;
25963+
25964+ au_fclr_rdu(cookie->flags, FULL);
25965+ err = au_rdu_do(h_file, &arg);
25966+ AuTraceErr(err);
25967+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
25968+ break;
25969+ }
25970+ AuDbg("rent %llu\n", rdu->rent);
25971+
25972+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
25973+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
25974+ au_fset_rdu(cookie->flags, CONT);
25975+ cookie->generation = au_figen(file);
25976+ }
25977+
25978+ ii_read_lock_child(inode);
25979+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
25980+ ii_read_unlock(inode);
25981+
4f0767ce 25982+out_unlock:
1308ab2a 25983+ fi_read_unlock(file);
027c5e7a 25984+out_si:
1308ab2a 25985+ si_read_unlock(arg.sb);
4f0767ce 25986+out_mtx:
1308ab2a 25987+ mutex_unlock(&inode->i_mutex);
4f0767ce 25988+out:
1308ab2a 25989+ AuTraceErr(err);
25990+ return err;
25991+}
25992+
25993+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
25994+{
25995+ int err;
25996+ ino_t ino;
25997+ unsigned long long nent;
25998+ union au_rdu_ent_ul *u;
25999+ struct au_rdu_ent ent;
26000+ struct super_block *sb;
26001+
26002+ err = 0;
26003+ nent = rdu->nent;
26004+ u = &rdu->ent;
26005+ sb = file->f_dentry->d_sb;
26006+ si_read_lock(sb, AuLock_FLUSH);
26007+ while (nent-- > 0) {
9dbd164d 26008+ /* unnecessary to support mmap_sem since this is a dir */
1308ab2a 26009+ err = copy_from_user(&ent, u->e, sizeof(ent));
4a4d8108
AM
26010+ if (!err)
26011+ err = !access_ok(VERIFY_WRITE, &u->e->ino, sizeof(ino));
1308ab2a 26012+ if (unlikely(err)) {
26013+ err = -EFAULT;
26014+ AuTraceErr(err);
26015+ break;
26016+ }
26017+
26018+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
26019+ if (!ent.wh)
26020+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
26021+ else
26022+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
26023+ &ino);
26024+ if (unlikely(err)) {
26025+ AuTraceErr(err);
26026+ break;
26027+ }
26028+
26029+ err = __put_user(ino, &u->e->ino);
26030+ if (unlikely(err)) {
26031+ err = -EFAULT;
26032+ AuTraceErr(err);
26033+ break;
26034+ }
26035+ u->ul += au_rdu_len(ent.nlen);
26036+ }
26037+ si_read_unlock(sb);
26038+
26039+ return err;
26040+}
26041+
26042+/* ---------------------------------------------------------------------- */
26043+
26044+static int au_rdu_verify(struct aufs_rdu *rdu)
26045+{
b752ccd1 26046+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
1308ab2a 26047+ "%llu, b%d, 0x%x, g%u}\n",
b752ccd1 26048+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
1308ab2a 26049+ rdu->blk,
26050+ rdu->rent, rdu->shwh, rdu->full,
26051+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
26052+ rdu->cookie.generation);
dece6358 26053+
b752ccd1 26054+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
1308ab2a 26055+ return 0;
dece6358 26056+
b752ccd1
AM
26057+ AuDbg("%u:%u\n",
26058+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
1308ab2a 26059+ return -EINVAL;
26060+}
26061+
26062+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
dece6358 26063+{
1308ab2a 26064+ long err, e;
26065+ struct aufs_rdu rdu;
26066+ void __user *p = (void __user *)arg;
dece6358 26067+
1308ab2a 26068+ err = copy_from_user(&rdu, p, sizeof(rdu));
26069+ if (unlikely(err)) {
26070+ err = -EFAULT;
26071+ AuTraceErr(err);
26072+ goto out;
26073+ }
26074+ err = au_rdu_verify(&rdu);
dece6358
AM
26075+ if (unlikely(err))
26076+ goto out;
26077+
1308ab2a 26078+ switch (cmd) {
26079+ case AUFS_CTL_RDU:
26080+ err = au_rdu(file, &rdu);
26081+ if (unlikely(err))
26082+ break;
dece6358 26083+
1308ab2a 26084+ e = copy_to_user(p, &rdu, sizeof(rdu));
26085+ if (unlikely(e)) {
26086+ err = -EFAULT;
26087+ AuTraceErr(err);
26088+ }
26089+ break;
26090+ case AUFS_CTL_RDU_INO:
26091+ err = au_rdu_ino(file, &rdu);
26092+ break;
26093+
26094+ default:
4a4d8108 26095+ /* err = -ENOTTY; */
1308ab2a 26096+ err = -EINVAL;
26097+ }
dece6358 26098+
4f0767ce 26099+out:
1308ab2a 26100+ AuTraceErr(err);
26101+ return err;
1facf9fc 26102+}
b752ccd1
AM
26103+
26104+#ifdef CONFIG_COMPAT
26105+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
26106+{
26107+ long err, e;
26108+ struct aufs_rdu rdu;
26109+ void __user *p = compat_ptr(arg);
26110+
26111+ /* todo: get_user()? */
26112+ err = copy_from_user(&rdu, p, sizeof(rdu));
26113+ if (unlikely(err)) {
26114+ err = -EFAULT;
26115+ AuTraceErr(err);
26116+ goto out;
26117+ }
26118+ rdu.ent.e = compat_ptr(rdu.ent.ul);
26119+ err = au_rdu_verify(&rdu);
26120+ if (unlikely(err))
26121+ goto out;
26122+
26123+ switch (cmd) {
26124+ case AUFS_CTL_RDU:
26125+ err = au_rdu(file, &rdu);
26126+ if (unlikely(err))
26127+ break;
26128+
26129+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
26130+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
26131+ e = copy_to_user(p, &rdu, sizeof(rdu));
26132+ if (unlikely(e)) {
26133+ err = -EFAULT;
26134+ AuTraceErr(err);
26135+ }
26136+ break;
26137+ case AUFS_CTL_RDU_INO:
26138+ err = au_rdu_ino(file, &rdu);
26139+ break;
26140+
26141+ default:
26142+ /* err = -ENOTTY; */
26143+ err = -EINVAL;
26144+ }
26145+
4f0767ce 26146+out:
b752ccd1
AM
26147+ AuTraceErr(err);
26148+ return err;
26149+}
26150+#endif
7f207e10
AM
26151diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
26152--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 26153+++ linux/fs/aufs/rwsem.h 2016-02-28 11:27:01.280579017 +0100
076b876e 26154@@ -0,0 +1,191 @@
1facf9fc 26155+/*
7f2ca4b1 26156+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26157+ *
26158+ * This program, aufs is free software; you can redistribute it and/or modify
26159+ * it under the terms of the GNU General Public License as published by
26160+ * the Free Software Foundation; either version 2 of the License, or
26161+ * (at your option) any later version.
dece6358
AM
26162+ *
26163+ * This program is distributed in the hope that it will be useful,
26164+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26165+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26166+ * GNU General Public License for more details.
26167+ *
26168+ * You should have received a copy of the GNU General Public License
523b37e3 26169+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26170+ */
26171+
26172+/*
26173+ * simple read-write semaphore wrappers
26174+ */
26175+
26176+#ifndef __AUFS_RWSEM_H__
26177+#define __AUFS_RWSEM_H__
26178+
26179+#ifdef __KERNEL__
26180+
4a4d8108 26181+#include "debug.h"
dece6358
AM
26182+
26183+struct au_rwsem {
26184+ struct rw_semaphore rwsem;
26185+#ifdef CONFIG_AUFS_DEBUG
26186+ /* just for debugging, not almighty counter */
26187+ atomic_t rcnt, wcnt;
26188+#endif
26189+};
26190+
26191+#ifdef CONFIG_AUFS_DEBUG
26192+#define AuDbgCntInit(rw) do { \
26193+ atomic_set(&(rw)->rcnt, 0); \
26194+ atomic_set(&(rw)->wcnt, 0); \
26195+ smp_mb(); /* atomic set */ \
26196+} while (0)
26197+
e49829fe 26198+#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
dece6358 26199+#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
e49829fe 26200+#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
dece6358
AM
26201+#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
26202+#else
26203+#define AuDbgCntInit(rw) do {} while (0)
26204+#define AuDbgRcntInc(rw) do {} while (0)
26205+#define AuDbgRcntDec(rw) do {} while (0)
26206+#define AuDbgWcntInc(rw) do {} while (0)
26207+#define AuDbgWcntDec(rw) do {} while (0)
26208+#endif /* CONFIG_AUFS_DEBUG */
26209+
26210+/* to debug easier, do not make them inlined functions */
26211+#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
26212+/* rwsem_is_locked() is unusable */
26213+#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
26214+#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
26215+#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
26216+ && atomic_read(&(rw)->wcnt) <= 0)
26217+#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
26218+ || atomic_read(&(rw)->wcnt))
26219+
e49829fe
JR
26220+#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
26221+
dece6358
AM
26222+static inline void au_rw_init(struct au_rwsem *rw)
26223+{
26224+ AuDbgCntInit(rw);
26225+ init_rwsem(&rw->rwsem);
26226+}
26227+
26228+static inline void au_rw_init_wlock(struct au_rwsem *rw)
26229+{
26230+ au_rw_init(rw);
26231+ down_write(&rw->rwsem);
26232+ AuDbgWcntInc(rw);
26233+}
26234+
26235+static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
26236+ unsigned int lsc)
26237+{
26238+ au_rw_init(rw);
26239+ down_write_nested(&rw->rwsem, lsc);
26240+ AuDbgWcntInc(rw);
26241+}
26242+
26243+static inline void au_rw_read_lock(struct au_rwsem *rw)
26244+{
26245+ down_read(&rw->rwsem);
26246+ AuDbgRcntInc(rw);
26247+}
26248+
26249+static inline void au_rw_read_lock_nested(struct au_rwsem *rw, unsigned int lsc)
26250+{
26251+ down_read_nested(&rw->rwsem, lsc);
26252+ AuDbgRcntInc(rw);
26253+}
26254+
26255+static inline void au_rw_read_unlock(struct au_rwsem *rw)
26256+{
26257+ AuRwMustReadLock(rw);
26258+ AuDbgRcntDec(rw);
26259+ up_read(&rw->rwsem);
26260+}
26261+
26262+static inline void au_rw_dgrade_lock(struct au_rwsem *rw)
26263+{
26264+ AuRwMustWriteLock(rw);
26265+ AuDbgRcntInc(rw);
26266+ AuDbgWcntDec(rw);
26267+ downgrade_write(&rw->rwsem);
26268+}
26269+
26270+static inline void au_rw_write_lock(struct au_rwsem *rw)
26271+{
26272+ down_write(&rw->rwsem);
26273+ AuDbgWcntInc(rw);
26274+}
26275+
26276+static inline void au_rw_write_lock_nested(struct au_rwsem *rw,
26277+ unsigned int lsc)
26278+{
26279+ down_write_nested(&rw->rwsem, lsc);
26280+ AuDbgWcntInc(rw);
26281+}
1facf9fc 26282+
dece6358
AM
26283+static inline void au_rw_write_unlock(struct au_rwsem *rw)
26284+{
26285+ AuRwMustWriteLock(rw);
26286+ AuDbgWcntDec(rw);
26287+ up_write(&rw->rwsem);
26288+}
26289+
26290+/* why is not _nested version defined */
26291+static inline int au_rw_read_trylock(struct au_rwsem *rw)
26292+{
076b876e
AM
26293+ int ret;
26294+
26295+ ret = down_read_trylock(&rw->rwsem);
dece6358
AM
26296+ if (ret)
26297+ AuDbgRcntInc(rw);
26298+ return ret;
26299+}
26300+
26301+static inline int au_rw_write_trylock(struct au_rwsem *rw)
26302+{
076b876e
AM
26303+ int ret;
26304+
26305+ ret = down_write_trylock(&rw->rwsem);
dece6358
AM
26306+ if (ret)
26307+ AuDbgWcntInc(rw);
26308+ return ret;
26309+}
26310+
26311+#undef AuDbgCntInit
26312+#undef AuDbgRcntInc
26313+#undef AuDbgRcntDec
26314+#undef AuDbgWcntInc
26315+#undef AuDbgWcntDec
1facf9fc 26316+
26317+#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26318+static inline void prefix##_read_lock(param) \
dece6358 26319+{ au_rw_read_lock(rwsem); } \
1facf9fc 26320+static inline void prefix##_write_lock(param) \
dece6358 26321+{ au_rw_write_lock(rwsem); } \
1facf9fc 26322+static inline int prefix##_read_trylock(param) \
dece6358 26323+{ return au_rw_read_trylock(rwsem); } \
1facf9fc 26324+static inline int prefix##_write_trylock(param) \
dece6358 26325+{ return au_rw_write_trylock(rwsem); }
1facf9fc 26326+/* why is not _nested version defined */
26327+/* static inline void prefix##_read_trylock_nested(param, lsc)
dece6358 26328+{ au_rw_read_trylock_nested(rwsem, lsc)); }
1facf9fc 26329+static inline void prefix##_write_trylock_nestd(param, lsc)
dece6358 26330+{ au_rw_write_trylock_nested(rwsem, lsc); } */
1facf9fc 26331+
26332+#define AuSimpleUnlockRwsemFuncs(prefix, param, rwsem) \
26333+static inline void prefix##_read_unlock(param) \
dece6358 26334+{ au_rw_read_unlock(rwsem); } \
1facf9fc 26335+static inline void prefix##_write_unlock(param) \
dece6358 26336+{ au_rw_write_unlock(rwsem); } \
1facf9fc 26337+static inline void prefix##_downgrade_lock(param) \
dece6358 26338+{ au_rw_dgrade_lock(rwsem); }
1facf9fc 26339+
26340+#define AuSimpleRwsemFuncs(prefix, param, rwsem) \
26341+ AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
26342+ AuSimpleUnlockRwsemFuncs(prefix, param, rwsem)
26343+
26344+#endif /* __KERNEL__ */
26345+#endif /* __AUFS_RWSEM_H__ */
7f207e10
AM
26346diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
26347--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
26348+++ linux/fs/aufs/sbinfo.c 2016-02-28 11:27:01.280579017 +0100
26349@@ -0,0 +1,360 @@
1facf9fc 26350+/*
7f2ca4b1 26351+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26352+ *
26353+ * This program, aufs is free software; you can redistribute it and/or modify
26354+ * it under the terms of the GNU General Public License as published by
26355+ * the Free Software Foundation; either version 2 of the License, or
26356+ * (at your option) any later version.
dece6358
AM
26357+ *
26358+ * This program is distributed in the hope that it will be useful,
26359+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26360+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26361+ * GNU General Public License for more details.
26362+ *
26363+ * You should have received a copy of the GNU General Public License
523b37e3 26364+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26365+ */
26366+
26367+/*
26368+ * superblock private data
26369+ */
26370+
26371+#include "aufs.h"
26372+
26373+/*
26374+ * they are necessary regardless sysfs is disabled.
26375+ */
26376+void au_si_free(struct kobject *kobj)
26377+{
86dc4139 26378+ int i;
1facf9fc 26379+ struct au_sbinfo *sbinfo;
b752ccd1 26380+ char *locked __maybe_unused; /* debug only */
1facf9fc 26381+
26382+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
86dc4139
AM
26383+ for (i = 0; i < AuPlink_NHASH; i++)
26384+ AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
e49829fe 26385+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
1facf9fc 26386+
e49829fe 26387+ au_rw_write_lock(&sbinfo->si_rwsem);
1facf9fc 26388+ au_br_free(sbinfo);
e49829fe 26389+ au_rw_write_unlock(&sbinfo->si_rwsem);
b752ccd1
AM
26390+
26391+ AuDebugOn(radix_tree_gang_lookup
26392+ (&sbinfo->au_si_pid.tree, (void **)&locked,
26393+ /*first_index*/PID_MAX_DEFAULT - 1,
26394+ /*max_items*/sizeof(locked)/sizeof(*locked)));
26395+
1facf9fc 26396+ kfree(sbinfo->si_branch);
b752ccd1 26397+ kfree(sbinfo->au_si_pid.bitmap);
1facf9fc 26398+ mutex_destroy(&sbinfo->si_xib_mtx);
dece6358 26399+ AuRwDestroy(&sbinfo->si_rwsem);
1facf9fc 26400+
26401+ kfree(sbinfo);
26402+}
26403+
26404+int au_si_alloc(struct super_block *sb)
26405+{
86dc4139 26406+ int err, i;
1facf9fc 26407+ struct au_sbinfo *sbinfo;
e49829fe 26408+ static struct lock_class_key aufs_si;
1facf9fc 26409+
26410+ err = -ENOMEM;
4a4d8108 26411+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
1facf9fc 26412+ if (unlikely(!sbinfo))
26413+ goto out;
26414+
b752ccd1
AM
26415+ BUILD_BUG_ON(sizeof(unsigned long) !=
26416+ sizeof(*sbinfo->au_si_pid.bitmap));
26417+ sbinfo->au_si_pid.bitmap = kcalloc(BITS_TO_LONGS(PID_MAX_DEFAULT),
26418+ sizeof(*sbinfo->au_si_pid.bitmap),
26419+ GFP_NOFS);
26420+ if (unlikely(!sbinfo->au_si_pid.bitmap))
26421+ goto out_sbinfo;
26422+
1facf9fc 26423+ /* will be reallocated separately */
26424+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
26425+ if (unlikely(!sbinfo->si_branch))
b752ccd1 26426+ goto out_pidmap;
1facf9fc 26427+
1facf9fc 26428+ err = sysaufs_si_init(sbinfo);
26429+ if (unlikely(err))
26430+ goto out_br;
26431+
26432+ au_nwt_init(&sbinfo->si_nowait);
dece6358 26433+ au_rw_init_wlock(&sbinfo->si_rwsem);
e49829fe 26434+ au_rw_class(&sbinfo->si_rwsem, &aufs_si);
b752ccd1
AM
26435+ spin_lock_init(&sbinfo->au_si_pid.tree_lock);
26436+ INIT_RADIX_TREE(&sbinfo->au_si_pid.tree, GFP_ATOMIC | __GFP_NOFAIL);
26437+
7f207e10 26438+ atomic_long_set(&sbinfo->si_ninodes, 0);
7f207e10
AM
26439+ atomic_long_set(&sbinfo->si_nfiles, 0);
26440+
1facf9fc 26441+ sbinfo->si_bend = -1;
392086de 26442+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
1facf9fc 26443+
26444+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
26445+ sbinfo->si_wbr_create = AuWbrCreate_Def;
4a4d8108
AM
26446+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
26447+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
1facf9fc 26448+
076b876e
AM
26449+ au_fhsm_init(sbinfo);
26450+
e49829fe 26451+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
1facf9fc 26452+
392086de
AM
26453+ sbinfo->si_xino_jiffy = jiffies;
26454+ sbinfo->si_xino_expire
26455+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
1facf9fc 26456+ mutex_init(&sbinfo->si_xib_mtx);
1facf9fc 26457+ sbinfo->si_xino_brid = -1;
26458+ /* leave si_xib_last_pindex and si_xib_next_bit */
26459+
7f2ca4b1
JR
26460+ au_sphl_init(&sbinfo->si_aopen);
26461+
e49829fe 26462+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
1facf9fc 26463+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
26464+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
26465+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
26466+
86dc4139
AM
26467+ for (i = 0; i < AuPlink_NHASH; i++)
26468+ au_sphl_init(sbinfo->si_plink + i);
1facf9fc 26469+ init_waitqueue_head(&sbinfo->si_plink_wq);
4a4d8108 26470+ spin_lock_init(&sbinfo->si_plink_maint_lock);
1facf9fc 26471+
523b37e3
AM
26472+ au_sphl_init(&sbinfo->si_files);
26473+
7f2ca4b1
JR
26474+ /* with getattr by default */
26475+ sbinfo->si_iop_array = aufs_iop;
26476+
1facf9fc 26477+ /* leave other members for sysaufs and si_mnt. */
26478+ sbinfo->si_sb = sb;
26479+ sb->s_fs_info = sbinfo;
b752ccd1 26480+ si_pid_set(sb);
1facf9fc 26481+ return 0; /* success */
26482+
4f0767ce 26483+out_br:
1facf9fc 26484+ kfree(sbinfo->si_branch);
4f0767ce 26485+out_pidmap:
b752ccd1 26486+ kfree(sbinfo->au_si_pid.bitmap);
4f0767ce 26487+out_sbinfo:
1facf9fc 26488+ kfree(sbinfo);
4f0767ce 26489+out:
1facf9fc 26490+ return err;
26491+}
26492+
26493+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
26494+{
26495+ int err, sz;
26496+ struct au_branch **brp;
26497+
dece6358
AM
26498+ AuRwMustWriteLock(&sbinfo->si_rwsem);
26499+
1facf9fc 26500+ err = -ENOMEM;
26501+ sz = sizeof(*brp) * (sbinfo->si_bend + 1);
26502+ if (unlikely(!sz))
26503+ sz = sizeof(*brp);
26504+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
26505+ if (brp) {
26506+ sbinfo->si_branch = brp;
26507+ err = 0;
26508+ }
26509+
26510+ return err;
26511+}
26512+
26513+/* ---------------------------------------------------------------------- */
26514+
26515+unsigned int au_sigen_inc(struct super_block *sb)
26516+{
26517+ unsigned int gen;
26518+
dece6358
AM
26519+ SiMustWriteLock(sb);
26520+
1facf9fc 26521+ gen = ++au_sbi(sb)->si_generation;
26522+ au_update_digen(sb->s_root);
537831f9 26523+ au_update_iigen(sb->s_root->d_inode, /*half*/0);
1facf9fc 26524+ sb->s_root->d_inode->i_version++;
26525+ return gen;
26526+}
26527+
26528+aufs_bindex_t au_new_br_id(struct super_block *sb)
26529+{
26530+ aufs_bindex_t br_id;
26531+ int i;
26532+ struct au_sbinfo *sbinfo;
26533+
dece6358
AM
26534+ SiMustWriteLock(sb);
26535+
1facf9fc 26536+ sbinfo = au_sbi(sb);
26537+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
26538+ br_id = ++sbinfo->si_last_br_id;
7f207e10 26539+ AuDebugOn(br_id < 0);
1facf9fc 26540+ if (br_id && au_br_index(sb, br_id) < 0)
26541+ return br_id;
26542+ }
26543+
26544+ return -1;
26545+}
26546+
26547+/* ---------------------------------------------------------------------- */
26548+
e49829fe
JR
26549+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
26550+int si_read_lock(struct super_block *sb, int flags)
26551+{
26552+ int err;
26553+
26554+ err = 0;
26555+ if (au_ftest_lock(flags, FLUSH))
26556+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26557+
26558+ si_noflush_read_lock(sb);
26559+ err = au_plink_maint(sb, flags);
26560+ if (unlikely(err))
26561+ si_read_unlock(sb);
26562+
26563+ return err;
26564+}
26565+
26566+int si_write_lock(struct super_block *sb, int flags)
26567+{
26568+ int err;
26569+
26570+ if (au_ftest_lock(flags, FLUSH))
26571+ au_nwt_flush(&au_sbi(sb)->si_nowait);
26572+
26573+ si_noflush_write_lock(sb);
26574+ err = au_plink_maint(sb, flags);
26575+ if (unlikely(err))
26576+ si_write_unlock(sb);
26577+
26578+ return err;
26579+}
26580+
1facf9fc 26581+/* dentry and super_block lock. call at entry point */
e49829fe 26582+int aufs_read_lock(struct dentry *dentry, int flags)
1facf9fc 26583+{
e49829fe 26584+ int err;
027c5e7a 26585+ struct super_block *sb;
e49829fe 26586+
027c5e7a
AM
26587+ sb = dentry->d_sb;
26588+ err = si_read_lock(sb, flags);
26589+ if (unlikely(err))
26590+ goto out;
26591+
26592+ if (au_ftest_lock(flags, DW))
26593+ di_write_lock_child(dentry);
26594+ else
26595+ di_read_lock_child(dentry, flags);
26596+
26597+ if (au_ftest_lock(flags, GEN)) {
26598+ err = au_digen_test(dentry, au_sigen(sb));
7f2ca4b1
JR
26599+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
26600+ AuDebugOn(!err && au_dbrange_test(dentry));
26601+ else if (!err)
26602+ err = au_dbrange_test(dentry);
027c5e7a
AM
26603+ if (unlikely(err))
26604+ aufs_read_unlock(dentry, flags);
e49829fe
JR
26605+ }
26606+
027c5e7a 26607+out:
e49829fe 26608+ return err;
1facf9fc 26609+}
26610+
26611+void aufs_read_unlock(struct dentry *dentry, int flags)
26612+{
26613+ if (au_ftest_lock(flags, DW))
26614+ di_write_unlock(dentry);
26615+ else
26616+ di_read_unlock(dentry, flags);
26617+ si_read_unlock(dentry->d_sb);
26618+}
26619+
26620+void aufs_write_lock(struct dentry *dentry)
26621+{
e49829fe 26622+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
1facf9fc 26623+ di_write_lock_child(dentry);
26624+}
26625+
26626+void aufs_write_unlock(struct dentry *dentry)
26627+{
26628+ di_write_unlock(dentry);
26629+ si_write_unlock(dentry->d_sb);
26630+}
26631+
e49829fe 26632+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
1facf9fc 26633+{
e49829fe 26634+ int err;
027c5e7a
AM
26635+ unsigned int sigen;
26636+ struct super_block *sb;
e49829fe 26637+
027c5e7a
AM
26638+ sb = d1->d_sb;
26639+ err = si_read_lock(sb, flags);
26640+ if (unlikely(err))
26641+ goto out;
26642+
7f2ca4b1 26643+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
027c5e7a
AM
26644+
26645+ if (au_ftest_lock(flags, GEN)) {
26646+ sigen = au_sigen(sb);
26647+ err = au_digen_test(d1, sigen);
26648+ AuDebugOn(!err && au_dbrange_test(d1));
26649+ if (!err) {
26650+ err = au_digen_test(d2, sigen);
26651+ AuDebugOn(!err && au_dbrange_test(d2));
26652+ }
26653+ if (unlikely(err))
26654+ aufs_read_and_write_unlock2(d1, d2);
26655+ }
26656+
26657+out:
e49829fe 26658+ return err;
1facf9fc 26659+}
26660+
26661+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
26662+{
26663+ di_write_unlock2(d1, d2);
26664+ si_read_unlock(d1->d_sb);
26665+}
b752ccd1
AM
26666+
26667+/* ---------------------------------------------------------------------- */
26668+
26669+int si_pid_test_slow(struct super_block *sb)
26670+{
26671+ void *p;
26672+
26673+ rcu_read_lock();
26674+ p = radix_tree_lookup(&au_sbi(sb)->au_si_pid.tree, current->pid);
26675+ rcu_read_unlock();
26676+
027c5e7a 26677+ return (long)!!p;
b752ccd1
AM
26678+}
26679+
26680+void si_pid_set_slow(struct super_block *sb)
26681+{
26682+ int err;
26683+ struct au_sbinfo *sbinfo;
26684+
26685+ AuDebugOn(si_pid_test_slow(sb));
26686+
26687+ sbinfo = au_sbi(sb);
26688+ err = radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
26689+ AuDebugOn(err);
26690+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26691+ err = radix_tree_insert(&sbinfo->au_si_pid.tree, current->pid,
027c5e7a 26692+ /*any valid ptr*/sb);
b752ccd1
AM
26693+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
26694+ AuDebugOn(err);
26695+ radix_tree_preload_end();
26696+}
26697+
26698+void si_pid_clr_slow(struct super_block *sb)
26699+{
26700+ void *p;
26701+ struct au_sbinfo *sbinfo;
26702+
26703+ AuDebugOn(!si_pid_test_slow(sb));
26704+
26705+ sbinfo = au_sbi(sb);
26706+ spin_lock(&sbinfo->au_si_pid.tree_lock);
26707+ p = radix_tree_delete(&sbinfo->au_si_pid.tree, current->pid);
26708+ spin_unlock(&sbinfo->au_si_pid.tree_lock);
b752ccd1 26709+}
7f207e10
AM
26710diff -urN /usr/share/empty/fs/aufs/spl.h linux/fs/aufs/spl.h
26711--- /usr/share/empty/fs/aufs/spl.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 26712+++ linux/fs/aufs/spl.h 2016-02-28 11:27:01.280579017 +0100
523b37e3 26713@@ -0,0 +1,111 @@
1facf9fc 26714+/*
7f2ca4b1 26715+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26716+ *
26717+ * This program, aufs is free software; you can redistribute it and/or modify
26718+ * it under the terms of the GNU General Public License as published by
26719+ * the Free Software Foundation; either version 2 of the License, or
26720+ * (at your option) any later version.
dece6358
AM
26721+ *
26722+ * This program is distributed in the hope that it will be useful,
26723+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26724+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26725+ * GNU General Public License for more details.
26726+ *
26727+ * You should have received a copy of the GNU General Public License
523b37e3 26728+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26729+ */
26730+
26731+/*
26732+ * simple list protected by a spinlock
26733+ */
26734+
26735+#ifndef __AUFS_SPL_H__
26736+#define __AUFS_SPL_H__
26737+
26738+#ifdef __KERNEL__
26739+
1facf9fc 26740+struct au_splhead {
26741+ spinlock_t spin;
26742+ struct list_head head;
26743+};
26744+
26745+static inline void au_spl_init(struct au_splhead *spl)
26746+{
26747+ spin_lock_init(&spl->spin);
26748+ INIT_LIST_HEAD(&spl->head);
26749+}
26750+
26751+static inline void au_spl_add(struct list_head *list, struct au_splhead *spl)
26752+{
26753+ spin_lock(&spl->spin);
26754+ list_add(list, &spl->head);
26755+ spin_unlock(&spl->spin);
26756+}
26757+
26758+static inline void au_spl_del(struct list_head *list, struct au_splhead *spl)
26759+{
26760+ spin_lock(&spl->spin);
26761+ list_del(list);
26762+ spin_unlock(&spl->spin);
26763+}
26764+
4a4d8108
AM
26765+static inline void au_spl_del_rcu(struct list_head *list,
26766+ struct au_splhead *spl)
26767+{
26768+ spin_lock(&spl->spin);
26769+ list_del_rcu(list);
26770+ spin_unlock(&spl->spin);
26771+}
26772+
86dc4139
AM
26773+/* ---------------------------------------------------------------------- */
26774+
26775+struct au_sphlhead {
26776+ spinlock_t spin;
26777+ struct hlist_head head;
26778+};
26779+
26780+static inline void au_sphl_init(struct au_sphlhead *sphl)
26781+{
26782+ spin_lock_init(&sphl->spin);
26783+ INIT_HLIST_HEAD(&sphl->head);
26784+}
26785+
26786+static inline void au_sphl_add(struct hlist_node *hlist,
26787+ struct au_sphlhead *sphl)
26788+{
26789+ spin_lock(&sphl->spin);
26790+ hlist_add_head(hlist, &sphl->head);
26791+ spin_unlock(&sphl->spin);
26792+}
26793+
26794+static inline void au_sphl_del(struct hlist_node *hlist,
26795+ struct au_sphlhead *sphl)
26796+{
26797+ spin_lock(&sphl->spin);
26798+ hlist_del(hlist);
26799+ spin_unlock(&sphl->spin);
26800+}
26801+
26802+static inline void au_sphl_del_rcu(struct hlist_node *hlist,
26803+ struct au_sphlhead *sphl)
26804+{
26805+ spin_lock(&sphl->spin);
26806+ hlist_del_rcu(hlist);
26807+ spin_unlock(&sphl->spin);
26808+}
26809+
26810+static inline unsigned long au_sphl_count(struct au_sphlhead *sphl)
26811+{
26812+ unsigned long cnt;
26813+ struct hlist_node *pos;
26814+
26815+ cnt = 0;
26816+ spin_lock(&sphl->spin);
26817+ hlist_for_each(pos, &sphl->head)
26818+ cnt++;
26819+ spin_unlock(&sphl->spin);
26820+ return cnt;
26821+}
26822+
1facf9fc 26823+#endif /* __KERNEL__ */
26824+#endif /* __AUFS_SPL_H__ */
7f207e10
AM
26825diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
26826--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
26827+++ linux/fs/aufs/super.c 2016-02-28 11:27:01.280579017 +0100
26828@@ -0,0 +1,1041 @@
1facf9fc 26829+/*
7f2ca4b1 26830+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 26831+ *
26832+ * This program, aufs is free software; you can redistribute it and/or modify
26833+ * it under the terms of the GNU General Public License as published by
26834+ * the Free Software Foundation; either version 2 of the License, or
26835+ * (at your option) any later version.
dece6358
AM
26836+ *
26837+ * This program is distributed in the hope that it will be useful,
26838+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26839+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26840+ * GNU General Public License for more details.
26841+ *
26842+ * You should have received a copy of the GNU General Public License
523b37e3 26843+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 26844+ */
26845+
26846+/*
26847+ * mount and super_block operations
26848+ */
26849+
f6c5ef8b 26850+#include <linux/mm.h>
1facf9fc 26851+#include <linux/seq_file.h>
26852+#include <linux/statfs.h>
7f207e10 26853+#include <linux/vmalloc.h>
1facf9fc 26854+#include "aufs.h"
26855+
26856+/*
26857+ * super_operations
26858+ */
26859+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
26860+{
26861+ struct au_icntnr *c;
26862+
26863+ c = au_cache_alloc_icntnr();
26864+ if (c) {
027c5e7a 26865+ au_icntnr_init(c);
1facf9fc 26866+ c->vfs_inode.i_version = 1; /* sigen(sb); */
26867+ c->iinfo.ii_hinode = NULL;
26868+ return &c->vfs_inode;
26869+ }
26870+ return NULL;
26871+}
26872+
027c5e7a
AM
26873+static void aufs_destroy_inode_cb(struct rcu_head *head)
26874+{
26875+ struct inode *inode = container_of(head, struct inode, i_rcu);
26876+
b4510431 26877+ INIT_HLIST_HEAD(&inode->i_dentry);
027c5e7a
AM
26878+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
26879+}
26880+
1facf9fc 26881+static void aufs_destroy_inode(struct inode *inode)
26882+{
26883+ au_iinfo_fin(inode);
027c5e7a 26884+ call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
1facf9fc 26885+}
26886+
26887+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
26888+{
26889+ struct inode *inode;
26890+ int err;
26891+
26892+ inode = iget_locked(sb, ino);
26893+ if (unlikely(!inode)) {
26894+ inode = ERR_PTR(-ENOMEM);
26895+ goto out;
26896+ }
26897+ if (!(inode->i_state & I_NEW))
26898+ goto out;
26899+
26900+ err = au_xigen_new(inode);
26901+ if (!err)
26902+ err = au_iinfo_init(inode);
26903+ if (!err)
26904+ inode->i_version++;
26905+ else {
26906+ iget_failed(inode);
26907+ inode = ERR_PTR(err);
26908+ }
26909+
4f0767ce 26910+out:
1facf9fc 26911+ /* never return NULL */
26912+ AuDebugOn(!inode);
26913+ AuTraceErrPtr(inode);
26914+ return inode;
26915+}
26916+
26917+/* lock free root dinfo */
26918+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
26919+{
26920+ int err;
26921+ aufs_bindex_t bindex, bend;
26922+ struct path path;
4a4d8108 26923+ struct au_hdentry *hdp;
1facf9fc 26924+ struct au_branch *br;
076b876e 26925+ au_br_perm_str_t perm;
1facf9fc 26926+
26927+ err = 0;
26928+ bend = au_sbend(sb);
4a4d8108 26929+ hdp = au_di(sb->s_root)->di_hdentry;
1facf9fc 26930+ for (bindex = 0; !err && bindex <= bend; bindex++) {
26931+ br = au_sbr(sb, bindex);
86dc4139 26932+ path.mnt = au_br_mnt(br);
4a4d8108 26933+ path.dentry = hdp[bindex].hd_dentry;
1facf9fc 26934+ err = au_seq_path(seq, &path);
7f2ca4b1 26935+ if (!err) {
076b876e
AM
26936+ au_optstr_br_perm(&perm, br->br_perm);
26937+ err = seq_printf(seq, "=%s", perm.a);
26938+ if (err == -1)
26939+ err = -E2BIG;
1e00d052 26940+ }
1facf9fc 26941+ if (!err && bindex != bend)
26942+ err = seq_putc(seq, ':');
26943+ }
26944+
26945+ return err;
26946+}
26947+
26948+static void au_show_wbr_create(struct seq_file *m, int v,
26949+ struct au_sbinfo *sbinfo)
26950+{
26951+ const char *pat;
26952+
dece6358
AM
26953+ AuRwMustAnyLock(&sbinfo->si_rwsem);
26954+
c2b27bf2 26955+ seq_puts(m, ",create=");
1facf9fc 26956+ pat = au_optstr_wbr_create(v);
26957+ switch (v) {
26958+ case AuWbrCreate_TDP:
26959+ case AuWbrCreate_RR:
26960+ case AuWbrCreate_MFS:
26961+ case AuWbrCreate_PMFS:
c2b27bf2 26962+ seq_puts(m, pat);
1facf9fc 26963+ break;
26964+ case AuWbrCreate_MFSV:
26965+ seq_printf(m, /*pat*/"mfs:%lu",
e49829fe
JR
26966+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26967+ / MSEC_PER_SEC);
1facf9fc 26968+ break;
26969+ case AuWbrCreate_PMFSV:
26970+ seq_printf(m, /*pat*/"pmfs:%lu",
e49829fe
JR
26971+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26972+ / MSEC_PER_SEC);
1facf9fc 26973+ break;
26974+ case AuWbrCreate_MFSRR:
26975+ seq_printf(m, /*pat*/"mfsrr:%llu",
26976+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26977+ break;
26978+ case AuWbrCreate_MFSRRV:
26979+ seq_printf(m, /*pat*/"mfsrr:%llu:%lu",
26980+ sbinfo->si_wbr_mfs.mfsrr_watermark,
e49829fe
JR
26981+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26982+ / MSEC_PER_SEC);
1facf9fc 26983+ break;
392086de
AM
26984+ case AuWbrCreate_PMFSRR:
26985+ seq_printf(m, /*pat*/"pmfsrr:%llu",
26986+ sbinfo->si_wbr_mfs.mfsrr_watermark);
26987+ break;
26988+ case AuWbrCreate_PMFSRRV:
26989+ seq_printf(m, /*pat*/"pmfsrr:%llu:%lu",
26990+ sbinfo->si_wbr_mfs.mfsrr_watermark,
26991+ jiffies_to_msecs(sbinfo->si_wbr_mfs.mfs_expire)
26992+ / MSEC_PER_SEC);
26993+ break;
1facf9fc 26994+ }
26995+}
26996+
7eafdf33 26997+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
1facf9fc 26998+{
26999+#ifdef CONFIG_SYSFS
27000+ return 0;
27001+#else
27002+ int err;
27003+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
27004+ aufs_bindex_t bindex, brid;
1facf9fc 27005+ struct qstr *name;
27006+ struct file *f;
27007+ struct dentry *d, *h_root;
4a4d8108 27008+ struct au_hdentry *hdp;
1facf9fc 27009+
dece6358
AM
27010+ AuRwMustAnyLock(&sbinfo->si_rwsem);
27011+
1facf9fc 27012+ err = 0;
1facf9fc 27013+ f = au_sbi(sb)->si_xib;
27014+ if (!f)
27015+ goto out;
27016+
27017+ /* stop printing the default xino path on the first writable branch */
27018+ h_root = NULL;
27019+ brid = au_xino_brid(sb);
27020+ if (brid >= 0) {
27021+ bindex = au_br_index(sb, brid);
4a4d8108
AM
27022+ hdp = au_di(sb->s_root)->di_hdentry;
27023+ h_root = hdp[0 + bindex].hd_dentry;
1facf9fc 27024+ }
27025+ d = f->f_dentry;
27026+ name = &d->d_name;
27027+ /* safe ->d_parent because the file is unlinked */
27028+ if (d->d_parent == h_root
27029+ && name->len == len
27030+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
27031+ goto out;
27032+
27033+ seq_puts(seq, ",xino=");
27034+ err = au_xino_path(seq, f);
27035+
4f0767ce 27036+out:
1facf9fc 27037+ return err;
27038+#endif
27039+}
27040+
27041+/* seq_file will re-call me in case of too long string */
7eafdf33 27042+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
1facf9fc 27043+{
027c5e7a 27044+ int err;
1facf9fc 27045+ unsigned int mnt_flags, v;
27046+ struct super_block *sb;
27047+ struct au_sbinfo *sbinfo;
27048+
27049+#define AuBool(name, str) do { \
27050+ v = au_opt_test(mnt_flags, name); \
27051+ if (v != au_opt_test(AuOpt_Def, name)) \
27052+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
27053+} while (0)
27054+
27055+#define AuStr(name, str) do { \
27056+ v = mnt_flags & AuOptMask_##name; \
27057+ if (v != (AuOpt_Def & AuOptMask_##name)) \
27058+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
27059+} while (0)
27060+
27061+#define AuUInt(name, str, val) do { \
27062+ if (val != AUFS_##name##_DEF) \
27063+ seq_printf(m, "," #str "=%u", val); \
27064+} while (0)
27065+
7eafdf33 27066+ sb = dentry->d_sb;
c1595e42
JR
27067+ if (sb->s_flags & MS_POSIXACL)
27068+ seq_puts(m, ",acl");
27069+
27070+ /* lock free root dinfo */
1facf9fc 27071+ si_noflush_read_lock(sb);
27072+ sbinfo = au_sbi(sb);
27073+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
27074+
27075+ mnt_flags = au_mntflags(sb);
27076+ if (au_opt_test(mnt_flags, XINO)) {
7eafdf33 27077+ err = au_show_xino(m, sb);
1facf9fc 27078+ if (unlikely(err))
27079+ goto out;
27080+ } else
27081+ seq_puts(m, ",noxino");
27082+
27083+ AuBool(TRUNC_XINO, trunc_xino);
27084+ AuStr(UDBA, udba);
dece6358 27085+ AuBool(SHWH, shwh);
1facf9fc 27086+ AuBool(PLINK, plink);
4a4d8108 27087+ AuBool(DIO, dio);
076b876e 27088+ AuBool(DIRPERM1, dirperm1);
1facf9fc 27089+ /* AuBool(REFROF, refrof); */
27090+
27091+ v = sbinfo->si_wbr_create;
27092+ if (v != AuWbrCreate_Def)
27093+ au_show_wbr_create(m, v, sbinfo);
27094+
27095+ v = sbinfo->si_wbr_copyup;
27096+ if (v != AuWbrCopyup_Def)
27097+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
27098+
27099+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
27100+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
27101+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
27102+
27103+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
27104+
027c5e7a
AM
27105+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
27106+ AuUInt(RDCACHE, rdcache, v);
1facf9fc 27107+
27108+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
27109+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
27110+
076b876e
AM
27111+ au_fhsm_show(m, sbinfo);
27112+
1facf9fc 27113+ AuBool(SUM, sum);
27114+ /* AuBool(SUM_W, wsum); */
27115+ AuBool(WARN_PERM, warn_perm);
27116+ AuBool(VERBOSE, verbose);
27117+
4f0767ce 27118+out:
1facf9fc 27119+ /* be sure to print "br:" last */
27120+ if (!sysaufs_brs) {
27121+ seq_puts(m, ",br:");
27122+ au_show_brs(m, sb);
27123+ }
27124+ si_read_unlock(sb);
27125+ return 0;
27126+
1facf9fc 27127+#undef AuBool
27128+#undef AuStr
4a4d8108 27129+#undef AuUInt
1facf9fc 27130+}
27131+
27132+/* ---------------------------------------------------------------------- */
27133+
27134+/* sum mode which returns the summation for statfs(2) */
27135+
27136+static u64 au_add_till_max(u64 a, u64 b)
27137+{
27138+ u64 old;
27139+
27140+ old = a;
27141+ a += b;
92d182d2
AM
27142+ if (old <= a)
27143+ return a;
27144+ return ULLONG_MAX;
27145+}
27146+
27147+static u64 au_mul_till_max(u64 a, long mul)
27148+{
27149+ u64 old;
27150+
27151+ old = a;
27152+ a *= mul;
27153+ if (old <= a)
1facf9fc 27154+ return a;
27155+ return ULLONG_MAX;
27156+}
27157+
27158+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
27159+{
27160+ int err;
92d182d2 27161+ long bsize, factor;
1facf9fc 27162+ u64 blocks, bfree, bavail, files, ffree;
27163+ aufs_bindex_t bend, bindex, i;
27164+ unsigned char shared;
7f207e10 27165+ struct path h_path;
1facf9fc 27166+ struct super_block *h_sb;
27167+
92d182d2
AM
27168+ err = 0;
27169+ bsize = LONG_MAX;
27170+ files = 0;
27171+ ffree = 0;
1facf9fc 27172+ blocks = 0;
27173+ bfree = 0;
27174+ bavail = 0;
1facf9fc 27175+ bend = au_sbend(sb);
92d182d2 27176+ for (bindex = 0; bindex <= bend; bindex++) {
7f207e10
AM
27177+ h_path.mnt = au_sbr_mnt(sb, bindex);
27178+ h_sb = h_path.mnt->mnt_sb;
1facf9fc 27179+ shared = 0;
92d182d2 27180+ for (i = 0; !shared && i < bindex; i++)
1facf9fc 27181+ shared = (au_sbr_sb(sb, i) == h_sb);
27182+ if (shared)
27183+ continue;
27184+
27185+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27186+ h_path.dentry = h_path.mnt->mnt_root;
27187+ err = vfs_statfs(&h_path, buf);
1facf9fc 27188+ if (unlikely(err))
27189+ goto out;
27190+
92d182d2
AM
27191+ if (bsize > buf->f_bsize) {
27192+ /*
27193+ * we will reduce bsize, so we have to expand blocks
27194+ * etc. to match them again
27195+ */
27196+ factor = (bsize / buf->f_bsize);
27197+ blocks = au_mul_till_max(blocks, factor);
27198+ bfree = au_mul_till_max(bfree, factor);
27199+ bavail = au_mul_till_max(bavail, factor);
27200+ bsize = buf->f_bsize;
27201+ }
27202+
27203+ factor = (buf->f_bsize / bsize);
27204+ blocks = au_add_till_max(blocks,
27205+ au_mul_till_max(buf->f_blocks, factor));
27206+ bfree = au_add_till_max(bfree,
27207+ au_mul_till_max(buf->f_bfree, factor));
27208+ bavail = au_add_till_max(bavail,
27209+ au_mul_till_max(buf->f_bavail, factor));
1facf9fc 27210+ files = au_add_till_max(files, buf->f_files);
27211+ ffree = au_add_till_max(ffree, buf->f_ffree);
27212+ }
27213+
92d182d2 27214+ buf->f_bsize = bsize;
1facf9fc 27215+ buf->f_blocks = blocks;
27216+ buf->f_bfree = bfree;
27217+ buf->f_bavail = bavail;
27218+ buf->f_files = files;
27219+ buf->f_ffree = ffree;
92d182d2 27220+ buf->f_frsize = 0;
1facf9fc 27221+
4f0767ce 27222+out:
1facf9fc 27223+ return err;
27224+}
27225+
27226+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
27227+{
27228+ int err;
7f207e10 27229+ struct path h_path;
1facf9fc 27230+ struct super_block *sb;
27231+
27232+ /* lock free root dinfo */
27233+ sb = dentry->d_sb;
27234+ si_noflush_read_lock(sb);
7f207e10 27235+ if (!au_opt_test(au_mntflags(sb), SUM)) {
1facf9fc 27236+ /* sb->s_root for NFS is unreliable */
7f207e10
AM
27237+ h_path.mnt = au_sbr_mnt(sb, 0);
27238+ h_path.dentry = h_path.mnt->mnt_root;
27239+ err = vfs_statfs(&h_path, buf);
27240+ } else
1facf9fc 27241+ err = au_statfs_sum(sb, buf);
27242+ si_read_unlock(sb);
27243+
27244+ if (!err) {
27245+ buf->f_type = AUFS_SUPER_MAGIC;
4a4d8108 27246+ buf->f_namelen = AUFS_MAX_NAMELEN;
1facf9fc 27247+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
27248+ }
27249+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
27250+
27251+ return err;
27252+}
27253+
27254+/* ---------------------------------------------------------------------- */
27255+
537831f9
AM
27256+static int aufs_sync_fs(struct super_block *sb, int wait)
27257+{
27258+ int err, e;
27259+ aufs_bindex_t bend, bindex;
27260+ struct au_branch *br;
27261+ struct super_block *h_sb;
27262+
27263+ err = 0;
27264+ si_noflush_read_lock(sb);
27265+ bend = au_sbend(sb);
27266+ for (bindex = 0; bindex <= bend; bindex++) {
27267+ br = au_sbr(sb, bindex);
27268+ if (!au_br_writable(br->br_perm))
27269+ continue;
27270+
27271+ h_sb = au_sbr_sb(sb, bindex);
27272+ if (h_sb->s_op->sync_fs) {
27273+ e = h_sb->s_op->sync_fs(h_sb, wait);
27274+ if (unlikely(e && !err))
27275+ err = e;
27276+ /* go on even if an error happens */
27277+ }
27278+ }
27279+ si_read_unlock(sb);
27280+
27281+ return err;
27282+}
27283+
27284+/* ---------------------------------------------------------------------- */
27285+
1facf9fc 27286+/* final actions when unmounting a file system */
27287+static void aufs_put_super(struct super_block *sb)
27288+{
27289+ struct au_sbinfo *sbinfo;
27290+
27291+ sbinfo = au_sbi(sb);
27292+ if (!sbinfo)
27293+ return;
27294+
1facf9fc 27295+ dbgaufs_si_fin(sbinfo);
27296+ kobject_put(&sbinfo->si_kobj);
27297+}
27298+
27299+/* ---------------------------------------------------------------------- */
27300+
7f207e10
AM
27301+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg)
27302+{
27303+ void *array;
076b876e 27304+ unsigned long long n, sz;
7f207e10
AM
27305+
27306+ array = NULL;
27307+ n = 0;
27308+ if (!*hint)
27309+ goto out;
27310+
27311+ if (*hint > ULLONG_MAX / sizeof(array)) {
27312+ array = ERR_PTR(-EMFILE);
27313+ pr_err("hint %llu\n", *hint);
27314+ goto out;
27315+ }
27316+
076b876e
AM
27317+ sz = sizeof(array) * *hint;
27318+ array = kzalloc(sz, GFP_NOFS);
7f207e10 27319+ if (unlikely(!array))
076b876e 27320+ array = vzalloc(sz);
7f207e10
AM
27321+ if (unlikely(!array)) {
27322+ array = ERR_PTR(-ENOMEM);
27323+ goto out;
27324+ }
27325+
27326+ n = cb(array, *hint, arg);
27327+ AuDebugOn(n > *hint);
27328+
27329+out:
27330+ *hint = n;
27331+ return array;
27332+}
27333+
27334+static unsigned long long au_iarray_cb(void *a,
27335+ unsigned long long max __maybe_unused,
27336+ void *arg)
27337+{
27338+ unsigned long long n;
27339+ struct inode **p, *inode;
27340+ struct list_head *head;
27341+
27342+ n = 0;
27343+ p = a;
27344+ head = arg;
2cbb1c4b 27345+ spin_lock(&inode_sb_list_lock);
7f207e10
AM
27346+ list_for_each_entry(inode, head, i_sb_list) {
27347+ if (!is_bad_inode(inode)
27348+ && au_ii(inode)->ii_bstart >= 0) {
2cbb1c4b
JR
27349+ spin_lock(&inode->i_lock);
27350+ if (atomic_read(&inode->i_count)) {
27351+ au_igrab(inode);
27352+ *p++ = inode;
27353+ n++;
27354+ AuDebugOn(n > max);
27355+ }
27356+ spin_unlock(&inode->i_lock);
7f207e10
AM
27357+ }
27358+ }
2cbb1c4b 27359+ spin_unlock(&inode_sb_list_lock);
7f207e10
AM
27360+
27361+ return n;
27362+}
27363+
27364+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
27365+{
27366+ *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
27367+ return au_array_alloc(max, au_iarray_cb, &sb->s_inodes);
27368+}
27369+
27370+void au_iarray_free(struct inode **a, unsigned long long max)
27371+{
27372+ unsigned long long ull;
27373+
27374+ for (ull = 0; ull < max; ull++)
27375+ iput(a[ull]);
7f2ca4b1 27376+ kvfree(a);
7f207e10
AM
27377+}
27378+
27379+/* ---------------------------------------------------------------------- */
27380+
1facf9fc 27381+/*
27382+ * refresh dentry and inode at remount time.
27383+ */
027c5e7a
AM
27384+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
27385+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
27386+ struct dentry *parent)
1facf9fc 27387+{
27388+ int err;
1facf9fc 27389+
27390+ di_write_lock_child(dentry);
1facf9fc 27391+ di_read_lock_parent(parent, AuLock_IR);
027c5e7a
AM
27392+ err = au_refresh_dentry(dentry, parent);
27393+ if (!err && dir_flags)
27394+ au_hn_reset(dentry->d_inode, dir_flags);
1facf9fc 27395+ di_read_unlock(parent, AuLock_IR);
1facf9fc 27396+ di_write_unlock(dentry);
27397+
27398+ return err;
27399+}
27400+
027c5e7a
AM
27401+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
27402+ struct au_sbinfo *sbinfo,
7f2ca4b1 27403+ const unsigned int dir_flags, unsigned int do_idop)
1facf9fc 27404+{
027c5e7a
AM
27405+ int err;
27406+ struct dentry *parent;
27407+ struct inode *inode;
27408+
27409+ err = 0;
27410+ parent = dget_parent(dentry);
27411+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
27412+ inode = dentry->d_inode;
27413+ if (inode) {
27414+ if (!S_ISDIR(inode->i_mode))
27415+ err = au_do_refresh(dentry, /*dir_flags*/0,
27416+ parent);
27417+ else {
27418+ err = au_do_refresh(dentry, dir_flags, parent);
27419+ if (unlikely(err))
27420+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
27421+ }
27422+ } else
27423+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
27424+ AuDbgDentry(dentry);
27425+ }
27426+ dput(parent);
27427+
7f2ca4b1
JR
27428+ if (!err) {
27429+ if (do_idop)
27430+ au_refresh_dop(dentry, /*force_reval*/0);
27431+ } else
27432+ au_refresh_dop(dentry, /*force_reval*/1);
27433+
027c5e7a
AM
27434+ AuTraceErr(err);
27435+ return err;
1facf9fc 27436+}
27437+
7f2ca4b1 27438+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
1facf9fc 27439+{
27440+ int err, i, j, ndentry, e;
027c5e7a 27441+ unsigned int sigen;
1facf9fc 27442+ struct au_dcsub_pages dpages;
27443+ struct au_dpage *dpage;
027c5e7a
AM
27444+ struct dentry **dentries, *d;
27445+ struct au_sbinfo *sbinfo;
27446+ struct dentry *root = sb->s_root;
27447+ const unsigned int dir_flags = au_hi_flags(root->d_inode, /*isdir*/1);
1facf9fc 27448+
7f2ca4b1
JR
27449+ if (do_idop)
27450+ au_refresh_dop(root, /*force_reval*/0);
27451+
027c5e7a
AM
27452+ err = au_dpages_init(&dpages, GFP_NOFS);
27453+ if (unlikely(err))
1facf9fc 27454+ goto out;
027c5e7a
AM
27455+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
27456+ if (unlikely(err))
1facf9fc 27457+ goto out_dpages;
1facf9fc 27458+
027c5e7a
AM
27459+ sigen = au_sigen(sb);
27460+ sbinfo = au_sbi(sb);
27461+ for (i = 0; i < dpages.ndpage; i++) {
1facf9fc 27462+ dpage = dpages.dpages + i;
27463+ dentries = dpage->dentries;
27464+ ndentry = dpage->ndentry;
027c5e7a 27465+ for (j = 0; j < ndentry; j++) {
1facf9fc 27466+ d = dentries[j];
7f2ca4b1
JR
27467+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
27468+ do_idop);
027c5e7a
AM
27469+ if (unlikely(e && !err))
27470+ err = e;
27471+ /* go on even err */
1facf9fc 27472+ }
27473+ }
27474+
4f0767ce 27475+out_dpages:
1facf9fc 27476+ au_dpages_free(&dpages);
4f0767ce 27477+out:
1facf9fc 27478+ return err;
27479+}
27480+
7f2ca4b1 27481+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
1facf9fc 27482+{
027c5e7a
AM
27483+ int err, e;
27484+ unsigned int sigen;
27485+ unsigned long long max, ull;
27486+ struct inode *inode, **array;
1facf9fc 27487+
027c5e7a
AM
27488+ array = au_iarray_alloc(sb, &max);
27489+ err = PTR_ERR(array);
27490+ if (IS_ERR(array))
27491+ goto out;
1facf9fc 27492+
27493+ err = 0;
027c5e7a
AM
27494+ sigen = au_sigen(sb);
27495+ for (ull = 0; ull < max; ull++) {
27496+ inode = array[ull];
076b876e
AM
27497+ if (unlikely(!inode))
27498+ break;
7f2ca4b1
JR
27499+
27500+ e = 0;
27501+ ii_write_lock_child(inode);
537831f9 27502+ if (au_iigen(inode, NULL) != sigen) {
027c5e7a 27503+ e = au_refresh_hinode_self(inode);
1facf9fc 27504+ if (unlikely(e)) {
7f2ca4b1 27505+ au_refresh_iop(inode, /*force_getattr*/1);
027c5e7a 27506+ pr_err("error %d, i%lu\n", e, inode->i_ino);
1facf9fc 27507+ if (!err)
27508+ err = e;
27509+ /* go on even if err */
27510+ }
27511+ }
7f2ca4b1
JR
27512+ if (!e && do_idop)
27513+ au_refresh_iop(inode, /*force_getattr*/0);
27514+ ii_write_unlock(inode);
1facf9fc 27515+ }
27516+
027c5e7a 27517+ au_iarray_free(array, max);
1facf9fc 27518+
4f0767ce 27519+out:
1facf9fc 27520+ return err;
27521+}
27522+
7f2ca4b1 27523+static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
1facf9fc 27524+{
027c5e7a
AM
27525+ int err, e;
27526+ unsigned int udba;
27527+ aufs_bindex_t bindex, bend;
1facf9fc 27528+ struct dentry *root;
27529+ struct inode *inode;
027c5e7a 27530+ struct au_branch *br;
7f2ca4b1 27531+ struct au_sbinfo *sbi;
1facf9fc 27532+
27533+ au_sigen_inc(sb);
7f2ca4b1
JR
27534+ sbi = au_sbi(sb);
27535+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
1facf9fc 27536+
27537+ root = sb->s_root;
27538+ DiMustNoWaiters(root);
27539+ inode = root->d_inode;
27540+ IiMustNoWaiters(inode);
1facf9fc 27541+
027c5e7a
AM
27542+ udba = au_opt_udba(sb);
27543+ bend = au_sbend(sb);
27544+ for (bindex = 0; bindex <= bend; bindex++) {
27545+ br = au_sbr(sb, bindex);
27546+ err = au_hnotify_reset_br(udba, br, br->br_perm);
1facf9fc 27547+ if (unlikely(err))
027c5e7a
AM
27548+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27549+ bindex, err);
27550+ /* go on even if err */
1facf9fc 27551+ }
027c5e7a 27552+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
1facf9fc 27553+
7f2ca4b1
JR
27554+ if (do_idop) {
27555+ if (au_ftest_si(sbi, NO_DREVAL)) {
27556+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
27557+ sb->s_d_op = &aufs_dop_noreval;
27558+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
27559+ sbi->si_iop_array = aufs_iop_nogetattr;
27560+ } else {
27561+ AuDebugOn(sb->s_d_op == &aufs_dop);
27562+ sb->s_d_op = &aufs_dop;
27563+ AuDebugOn(sbi->si_iop_array == aufs_iop);
27564+ sbi->si_iop_array = aufs_iop;
27565+ }
27566+ pr_info("reset to %pf and %pf\n",
27567+ sb->s_d_op, sbi->si_iop_array);
27568+ }
27569+
027c5e7a 27570+ di_write_unlock(root);
7f2ca4b1
JR
27571+ err = au_refresh_d(sb, do_idop);
27572+ e = au_refresh_i(sb, do_idop);
027c5e7a
AM
27573+ if (unlikely(e && !err))
27574+ err = e;
1facf9fc 27575+ /* aufs_write_lock() calls ..._child() */
27576+ di_write_lock_child(root);
027c5e7a
AM
27577+
27578+ au_cpup_attr_all(inode, /*force*/1);
27579+
27580+ if (unlikely(err))
27581+ AuIOErr("refresh failed, ignored, %d\n", err);
1facf9fc 27582+}
27583+
27584+/* stop extra interpretation of errno in mount(8), and strange error messages */
27585+static int cvt_err(int err)
27586+{
27587+ AuTraceErr(err);
27588+
27589+ switch (err) {
27590+ case -ENOENT:
27591+ case -ENOTDIR:
27592+ case -EEXIST:
27593+ case -EIO:
27594+ err = -EINVAL;
27595+ }
27596+ return err;
27597+}
27598+
27599+static int aufs_remount_fs(struct super_block *sb, int *flags, char *data)
27600+{
4a4d8108
AM
27601+ int err, do_dx;
27602+ unsigned int mntflags;
7f2ca4b1
JR
27603+ struct au_opts opts = {
27604+ .opt = NULL
27605+ };
1facf9fc 27606+ struct dentry *root;
27607+ struct inode *inode;
27608+ struct au_sbinfo *sbinfo;
27609+
27610+ err = 0;
27611+ root = sb->s_root;
27612+ if (!data || !*data) {
e49829fe
JR
27613+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27614+ if (!err) {
27615+ di_write_lock_child(root);
27616+ err = au_opts_verify(sb, *flags, /*pending*/0);
27617+ aufs_write_unlock(root);
27618+ }
1facf9fc 27619+ goto out;
27620+ }
27621+
27622+ err = -ENOMEM;
1facf9fc 27623+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27624+ if (unlikely(!opts.opt))
27625+ goto out;
27626+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27627+ opts.flags = AuOpts_REMOUNT;
27628+ opts.sb_flags = *flags;
27629+
27630+ /* parse it before aufs lock */
27631+ err = au_opts_parse(sb, data, &opts);
27632+ if (unlikely(err))
27633+ goto out_opts;
27634+
27635+ sbinfo = au_sbi(sb);
27636+ inode = root->d_inode;
27637+ mutex_lock(&inode->i_mutex);
e49829fe
JR
27638+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
27639+ if (unlikely(err))
27640+ goto out_mtx;
27641+ di_write_lock_child(root);
1facf9fc 27642+
27643+ /* au_opts_remount() may return an error */
27644+ err = au_opts_remount(sb, &opts);
27645+ au_opts_free(&opts);
27646+
027c5e7a 27647+ if (au_ftest_opts(opts.flags, REFRESH))
7f2ca4b1 27648+ au_remount_refresh(sb, au_ftest_opts(opts.flags, REFRESH_IDOP));
1facf9fc 27649+
4a4d8108
AM
27650+ if (au_ftest_opts(opts.flags, REFRESH_DYAOP)) {
27651+ mntflags = au_mntflags(sb);
27652+ do_dx = !!au_opt_test(mntflags, DIO);
27653+ au_dy_arefresh(do_dx);
27654+ }
27655+
076b876e 27656+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
1facf9fc 27657+ aufs_write_unlock(root);
953406b4 27658+
e49829fe
JR
27659+out_mtx:
27660+ mutex_unlock(&inode->i_mutex);
4f0767ce 27661+out_opts:
1facf9fc 27662+ free_page((unsigned long)opts.opt);
4f0767ce 27663+out:
1facf9fc 27664+ err = cvt_err(err);
27665+ AuTraceErr(err);
27666+ return err;
27667+}
27668+
4a4d8108 27669+static const struct super_operations aufs_sop = {
1facf9fc 27670+ .alloc_inode = aufs_alloc_inode,
27671+ .destroy_inode = aufs_destroy_inode,
b752ccd1 27672+ /* always deleting, no clearing */
1facf9fc 27673+ .drop_inode = generic_delete_inode,
27674+ .show_options = aufs_show_options,
27675+ .statfs = aufs_statfs,
27676+ .put_super = aufs_put_super,
537831f9 27677+ .sync_fs = aufs_sync_fs,
1facf9fc 27678+ .remount_fs = aufs_remount_fs
27679+};
27680+
27681+/* ---------------------------------------------------------------------- */
27682+
27683+static int alloc_root(struct super_block *sb)
27684+{
27685+ int err;
27686+ struct inode *inode;
27687+ struct dentry *root;
27688+
27689+ err = -ENOMEM;
27690+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
27691+ err = PTR_ERR(inode);
27692+ if (IS_ERR(inode))
27693+ goto out;
27694+
7f2ca4b1 27695+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
1facf9fc 27696+ inode->i_fop = &aufs_dir_fop;
27697+ inode->i_mode = S_IFDIR;
9dbd164d 27698+ set_nlink(inode, 2);
1facf9fc 27699+ unlock_new_inode(inode);
27700+
92d182d2 27701+ root = d_make_root(inode);
1facf9fc 27702+ if (unlikely(!root))
92d182d2 27703+ goto out;
1facf9fc 27704+ err = PTR_ERR(root);
27705+ if (IS_ERR(root))
92d182d2 27706+ goto out;
1facf9fc 27707+
4a4d8108 27708+ err = au_di_init(root);
1facf9fc 27709+ if (!err) {
27710+ sb->s_root = root;
27711+ return 0; /* success */
27712+ }
27713+ dput(root);
1facf9fc 27714+
4f0767ce 27715+out:
1facf9fc 27716+ return err;
1facf9fc 27717+}
27718+
27719+static int aufs_fill_super(struct super_block *sb, void *raw_data,
27720+ int silent __maybe_unused)
27721+{
27722+ int err;
7f2ca4b1
JR
27723+ struct au_opts opts = {
27724+ .opt = NULL
27725+ };
27726+ struct au_sbinfo *sbinfo;
1facf9fc 27727+ struct dentry *root;
27728+ struct inode *inode;
27729+ char *arg = raw_data;
27730+
27731+ if (unlikely(!arg || !*arg)) {
27732+ err = -EINVAL;
4a4d8108 27733+ pr_err("no arg\n");
1facf9fc 27734+ goto out;
27735+ }
27736+
27737+ err = -ENOMEM;
1facf9fc 27738+ opts.opt = (void *)__get_free_page(GFP_NOFS);
27739+ if (unlikely(!opts.opt))
27740+ goto out;
27741+ opts.max_opt = PAGE_SIZE / sizeof(*opts.opt);
27742+ opts.sb_flags = sb->s_flags;
27743+
27744+ err = au_si_alloc(sb);
27745+ if (unlikely(err))
27746+ goto out_opts;
7f2ca4b1 27747+ sbinfo = au_sbi(sb);
1facf9fc 27748+
27749+ /* all timestamps always follow the ones on the branch */
27750+ sb->s_flags |= MS_NOATIME | MS_NODIRATIME;
27751+ sb->s_op = &aufs_sop;
027c5e7a 27752+ sb->s_d_op = &aufs_dop;
1facf9fc 27753+ sb->s_magic = AUFS_SUPER_MAGIC;
27754+ sb->s_maxbytes = 0;
c1595e42 27755+ sb->s_stack_depth = 1;
1facf9fc 27756+ au_export_init(sb);
c1595e42 27757+ /* au_xattr_init(sb); */
1facf9fc 27758+
27759+ err = alloc_root(sb);
27760+ if (unlikely(err)) {
27761+ si_write_unlock(sb);
27762+ goto out_info;
27763+ }
27764+ root = sb->s_root;
27765+ inode = root->d_inode;
27766+
27767+ /*
27768+ * actually we can parse options regardless aufs lock here.
27769+ * but at remount time, parsing must be done before aufs lock.
27770+ * so we follow the same rule.
27771+ */
27772+ ii_write_lock_parent(inode);
27773+ aufs_write_unlock(root);
27774+ err = au_opts_parse(sb, arg, &opts);
27775+ if (unlikely(err))
27776+ goto out_root;
27777+
27778+ /* lock vfs_inode first, then aufs. */
27779+ mutex_lock(&inode->i_mutex);
1facf9fc 27780+ aufs_write_lock(root);
27781+ err = au_opts_mount(sb, &opts);
27782+ au_opts_free(&opts);
7f2ca4b1
JR
27783+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
27784+ sb->s_d_op = &aufs_dop_noreval;
27785+ pr_info("%pf\n", sb->s_d_op);
27786+ au_refresh_dop(root, /*force_reval*/0);
27787+ sbinfo->si_iop_array = aufs_iop_nogetattr;
27788+ au_refresh_iop(inode, /*force_getattr*/0);
27789+ }
1facf9fc 27790+ aufs_write_unlock(root);
27791+ mutex_unlock(&inode->i_mutex);
4a4d8108
AM
27792+ if (!err)
27793+ goto out_opts; /* success */
1facf9fc 27794+
4f0767ce 27795+out_root:
1facf9fc 27796+ dput(root);
27797+ sb->s_root = NULL;
4f0767ce 27798+out_info:
7f2ca4b1
JR
27799+ dbgaufs_si_fin(sbinfo);
27800+ kobject_put(&sbinfo->si_kobj);
1facf9fc 27801+ sb->s_fs_info = NULL;
4f0767ce 27802+out_opts:
1facf9fc 27803+ free_page((unsigned long)opts.opt);
4f0767ce 27804+out:
1facf9fc 27805+ AuTraceErr(err);
27806+ err = cvt_err(err);
27807+ AuTraceErr(err);
27808+ return err;
27809+}
27810+
27811+/* ---------------------------------------------------------------------- */
27812+
027c5e7a
AM
27813+static struct dentry *aufs_mount(struct file_system_type *fs_type, int flags,
27814+ const char *dev_name __maybe_unused,
27815+ void *raw_data)
1facf9fc 27816+{
027c5e7a 27817+ struct dentry *root;
1facf9fc 27818+ struct super_block *sb;
27819+
27820+ /* all timestamps always follow the ones on the branch */
27821+ /* mnt->mnt_flags |= MNT_NOATIME | MNT_NODIRATIME; */
027c5e7a
AM
27822+ root = mount_nodev(fs_type, flags, raw_data, aufs_fill_super);
27823+ if (IS_ERR(root))
27824+ goto out;
27825+
27826+ sb = root->d_sb;
27827+ si_write_lock(sb, !AuLock_FLUSH);
27828+ sysaufs_brs_add(sb, 0);
27829+ si_write_unlock(sb);
27830+ au_sbilist_add(sb);
27831+
27832+out:
27833+ return root;
1facf9fc 27834+}
27835+
e49829fe
JR
27836+static void aufs_kill_sb(struct super_block *sb)
27837+{
27838+ struct au_sbinfo *sbinfo;
27839+
27840+ sbinfo = au_sbi(sb);
27841+ if (sbinfo) {
27842+ au_sbilist_del(sb);
27843+ aufs_write_lock(sb->s_root);
076b876e 27844+ au_fhsm_fin(sb);
e49829fe
JR
27845+ if (sbinfo->si_wbr_create_ops->fin)
27846+ sbinfo->si_wbr_create_ops->fin(sb);
27847+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
27848+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
7f2ca4b1 27849+ au_remount_refresh(sb, /*do_idop*/0);
e49829fe
JR
27850+ }
27851+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
27852+ au_plink_put(sb, /*verbose*/1);
27853+ au_xino_clr(sb);
1e00d052 27854+ sbinfo->si_sb = NULL;
e49829fe 27855+ aufs_write_unlock(sb->s_root);
e49829fe
JR
27856+ au_nwt_flush(&sbinfo->si_nowait);
27857+ }
98d9a5b1 27858+ kill_anon_super(sb);
e49829fe
JR
27859+}
27860+
1facf9fc 27861+struct file_system_type aufs_fs_type = {
27862+ .name = AUFS_FSTYPE,
c06a8ce3
AM
27863+ /* a race between rename and others */
27864+ .fs_flags = FS_RENAME_DOES_D_MOVE,
027c5e7a 27865+ .mount = aufs_mount,
e49829fe 27866+ .kill_sb = aufs_kill_sb,
1facf9fc 27867+ /* no need to __module_get() and module_put(). */
27868+ .owner = THIS_MODULE,
27869+};
7f207e10
AM
27870diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
27871--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
27872+++ linux/fs/aufs/super.h 2016-02-28 11:27:01.280579017 +0100
27873@@ -0,0 +1,640 @@
1facf9fc 27874+/*
7f2ca4b1 27875+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 27876+ *
27877+ * This program, aufs is free software; you can redistribute it and/or modify
27878+ * it under the terms of the GNU General Public License as published by
27879+ * the Free Software Foundation; either version 2 of the License, or
27880+ * (at your option) any later version.
dece6358
AM
27881+ *
27882+ * This program is distributed in the hope that it will be useful,
27883+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27884+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27885+ * GNU General Public License for more details.
27886+ *
27887+ * You should have received a copy of the GNU General Public License
523b37e3 27888+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 27889+ */
27890+
27891+/*
27892+ * super_block operations
27893+ */
27894+
27895+#ifndef __AUFS_SUPER_H__
27896+#define __AUFS_SUPER_H__
27897+
27898+#ifdef __KERNEL__
27899+
27900+#include <linux/fs.h>
1facf9fc 27901+#include "rwsem.h"
27902+#include "spl.h"
27903+#include "wkq.h"
27904+
27905+typedef ssize_t (*au_readf_t)(struct file *, char __user *, size_t, loff_t *);
27906+typedef ssize_t (*au_writef_t)(struct file *, const char __user *, size_t,
27907+ loff_t *);
27908+
27909+/* policies to select one among multiple writable branches */
27910+struct au_wbr_copyup_operations {
27911+ int (*copyup)(struct dentry *dentry);
27912+};
27913+
392086de
AM
27914+#define AuWbr_DIR 1 /* target is a dir */
27915+#define AuWbr_PARENT (1 << 1) /* always require a parent */
27916+
27917+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
27918+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
27919+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
27920+
1facf9fc 27921+struct au_wbr_create_operations {
392086de 27922+ int (*create)(struct dentry *dentry, unsigned int flags);
1facf9fc 27923+ int (*init)(struct super_block *sb);
27924+ int (*fin)(struct super_block *sb);
27925+};
27926+
27927+struct au_wbr_mfs {
27928+ struct mutex mfs_lock; /* protect this structure */
27929+ unsigned long mfs_jiffy;
27930+ unsigned long mfs_expire;
27931+ aufs_bindex_t mfs_bindex;
27932+
27933+ unsigned long long mfsrr_bytes;
27934+ unsigned long long mfsrr_watermark;
27935+};
27936+
86dc4139
AM
27937+struct pseudo_link {
27938+ union {
27939+ struct hlist_node hlist;
27940+ struct rcu_head rcu;
27941+ };
27942+ struct inode *inode;
27943+};
27944+
27945+#define AuPlink_NHASH 100
27946+static inline int au_plink_hash(ino_t ino)
27947+{
27948+ return ino % AuPlink_NHASH;
27949+}
27950+
076b876e
AM
27951+/* File-based Hierarchical Storage Management */
27952+struct au_fhsm {
27953+#ifdef CONFIG_AUFS_FHSM
27954+ /* allow only one process who can receive the notification */
27955+ spinlock_t fhsm_spin;
27956+ pid_t fhsm_pid;
27957+ wait_queue_head_t fhsm_wqh;
27958+ atomic_t fhsm_readable;
27959+
c1595e42 27960+ /* these are protected by si_rwsem */
076b876e 27961+ unsigned long fhsm_expire;
c1595e42 27962+ aufs_bindex_t fhsm_bottom;
076b876e
AM
27963+#endif
27964+};
27965+
1facf9fc 27966+struct au_branch;
27967+struct au_sbinfo {
27968+ /* nowait tasks in the system-wide workqueue */
27969+ struct au_nowait_tasks si_nowait;
27970+
b752ccd1
AM
27971+ /*
27972+ * tried sb->s_umount, but failed due to the dependecy between i_mutex.
27973+ * rwsem for au_sbinfo is necessary.
27974+ */
dece6358 27975+ struct au_rwsem si_rwsem;
1facf9fc 27976+
b752ccd1
AM
27977+ /* prevent recursive locking in deleting inode */
27978+ struct {
27979+ unsigned long *bitmap;
27980+ spinlock_t tree_lock;
27981+ struct radix_tree_root tree;
27982+ } au_si_pid;
27983+
7f207e10 27984+ /*
523b37e3
AM
27985+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
27986+ * remount.
7f207e10
AM
27987+ */
27988+ atomic_long_t si_ninodes, si_nfiles;
27989+
1facf9fc 27990+ /* branch management */
27991+ unsigned int si_generation;
27992+
7f2ca4b1 27993+ /* see AuSi_ flags */
1facf9fc 27994+ unsigned char au_si_status;
27995+
27996+ aufs_bindex_t si_bend;
7f207e10
AM
27997+
27998+ /* dirty trick to keep br_id plus */
27999+ unsigned int si_last_br_id :
28000+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
1facf9fc 28001+ struct au_branch **si_branch;
28002+
28003+ /* policy to select a writable branch */
28004+ unsigned char si_wbr_copyup;
28005+ unsigned char si_wbr_create;
28006+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
28007+ struct au_wbr_create_operations *si_wbr_create_ops;
28008+
28009+ /* round robin */
28010+ atomic_t si_wbr_rr_next;
28011+
28012+ /* most free space */
28013+ struct au_wbr_mfs si_wbr_mfs;
28014+
076b876e
AM
28015+ /* File-based Hierarchical Storage Management */
28016+ struct au_fhsm si_fhsm;
28017+
1facf9fc 28018+ /* mount flags */
28019+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
28020+ unsigned int si_mntflags;
28021+
28022+ /* external inode number (bitmap and translation table) */
28023+ au_readf_t si_xread;
28024+ au_writef_t si_xwrite;
28025+ struct file *si_xib;
28026+ struct mutex si_xib_mtx; /* protect xib members */
28027+ unsigned long *si_xib_buf;
28028+ unsigned long si_xib_last_pindex;
28029+ int si_xib_next_bit;
28030+ aufs_bindex_t si_xino_brid;
392086de
AM
28031+ unsigned long si_xino_jiffy;
28032+ unsigned long si_xino_expire;
1facf9fc 28033+ /* reserved for future use */
28034+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
28035+
28036+#ifdef CONFIG_AUFS_EXPORT
28037+ /* i_generation */
28038+ struct file *si_xigen;
28039+ atomic_t si_xigen_next;
28040+#endif
28041+
7f2ca4b1
JR
28042+ /* dirty trick to suppoer atomic_open */
28043+ struct au_sphlhead si_aopen;
28044+
1facf9fc 28045+ /* vdir parameters */
e49829fe 28046+ unsigned long si_rdcache; /* max cache time in jiffies */
1facf9fc 28047+ unsigned int si_rdblk; /* deblk size */
28048+ unsigned int si_rdhash; /* hash size */
28049+
28050+ /*
28051+ * If the number of whiteouts are larger than si_dirwh, leave all of
28052+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
28053+ * future fsck.aufs or kernel thread will remove them later.
28054+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
28055+ */
28056+ unsigned int si_dirwh;
28057+
1facf9fc 28058+ /* pseudo_link list */
86dc4139 28059+ struct au_sphlhead si_plink[AuPlink_NHASH];
1facf9fc 28060+ wait_queue_head_t si_plink_wq;
4a4d8108 28061+ spinlock_t si_plink_maint_lock;
e49829fe 28062+ pid_t si_plink_maint_pid;
1facf9fc 28063+
523b37e3
AM
28064+ /* file list */
28065+ struct au_sphlhead si_files;
28066+
7f2ca4b1
JR
28067+ /* with/without getattr, brother of sb->s_d_op */
28068+ struct inode_operations *si_iop_array;
28069+
1facf9fc 28070+ /*
28071+ * sysfs and lifetime management.
28072+ * this is not a small structure and it may be a waste of memory in case
28073+ * of sysfs is disabled, particulary when many aufs-es are mounted.
28074+ * but using sysfs is majority.
28075+ */
28076+ struct kobject si_kobj;
28077+#ifdef CONFIG_DEBUG_FS
86dc4139
AM
28078+ struct dentry *si_dbgaufs;
28079+ struct dentry *si_dbgaufs_plink;
28080+ struct dentry *si_dbgaufs_xib;
1facf9fc 28081+#ifdef CONFIG_AUFS_EXPORT
28082+ struct dentry *si_dbgaufs_xigen;
28083+#endif
28084+#endif
28085+
e49829fe
JR
28086+#ifdef CONFIG_AUFS_SBILIST
28087+ struct list_head si_list;
28088+#endif
28089+
1facf9fc 28090+ /* dirty, necessary for unmounting, sysfs and sysrq */
28091+ struct super_block *si_sb;
28092+};
28093+
dece6358
AM
28094+/* sbinfo status flags */
28095+/*
28096+ * set true when refresh_dirs() failed at remount time.
28097+ * then try refreshing dirs at access time again.
28098+ * if it is false, refreshing dirs at access time is unnecesary
28099+ */
027c5e7a 28100+#define AuSi_FAILED_REFRESH_DIR 1
076b876e 28101+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
7f2ca4b1 28102+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
076b876e
AM
28103+
28104+#ifndef CONFIG_AUFS_FHSM
28105+#undef AuSi_FHSM
28106+#define AuSi_FHSM 0
28107+#endif
28108+
dece6358
AM
28109+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
28110+ unsigned int flag)
28111+{
28112+ AuRwMustAnyLock(&sbi->si_rwsem);
28113+ return sbi->au_si_status & flag;
28114+}
28115+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
28116+#define au_fset_si(sbinfo, name) do { \
28117+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28118+ (sbinfo)->au_si_status |= AuSi_##name; \
28119+} while (0)
28120+#define au_fclr_si(sbinfo, name) do { \
28121+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
28122+ (sbinfo)->au_si_status &= ~AuSi_##name; \
28123+} while (0)
28124+
1facf9fc 28125+/* ---------------------------------------------------------------------- */
28126+
28127+/* policy to select one among writable branches */
4a4d8108
AM
28128+#define AuWbrCopyup(sbinfo, ...) \
28129+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
28130+#define AuWbrCreate(sbinfo, ...) \
28131+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
1facf9fc 28132+
28133+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
28134+#define AuLock_DW 1 /* write-lock dentry */
28135+#define AuLock_IR (1 << 1) /* read-lock inode */
28136+#define AuLock_IW (1 << 2) /* write-lock inode */
28137+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
7f2ca4b1 28138+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
e49829fe
JR
28139+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
28140+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
027c5e7a 28141+#define AuLock_GEN (1 << 7) /* test digen/iigen */
1facf9fc 28142+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
7f207e10
AM
28143+#define au_fset_lock(flags, name) \
28144+ do { (flags) |= AuLock_##name; } while (0)
28145+#define au_fclr_lock(flags, name) \
28146+ do { (flags) &= ~AuLock_##name; } while (0)
1facf9fc 28147+
28148+/* ---------------------------------------------------------------------- */
28149+
28150+/* super.c */
28151+extern struct file_system_type aufs_fs_type;
28152+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
7f207e10
AM
28153+typedef unsigned long long (*au_arraycb_t)(void *array, unsigned long long max,
28154+ void *arg);
7f207e10
AM
28155+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb, void *arg);
28156+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
28157+void au_iarray_free(struct inode **a, unsigned long long max);
1facf9fc 28158+
28159+/* sbinfo.c */
28160+void au_si_free(struct kobject *kobj);
28161+int au_si_alloc(struct super_block *sb);
28162+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr);
28163+
28164+unsigned int au_sigen_inc(struct super_block *sb);
28165+aufs_bindex_t au_new_br_id(struct super_block *sb);
28166+
e49829fe
JR
28167+int si_read_lock(struct super_block *sb, int flags);
28168+int si_write_lock(struct super_block *sb, int flags);
28169+int aufs_read_lock(struct dentry *dentry, int flags);
1facf9fc 28170+void aufs_read_unlock(struct dentry *dentry, int flags);
28171+void aufs_write_lock(struct dentry *dentry);
28172+void aufs_write_unlock(struct dentry *dentry);
e49829fe 28173+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
1facf9fc 28174+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
28175+
b752ccd1
AM
28176+int si_pid_test_slow(struct super_block *sb);
28177+void si_pid_set_slow(struct super_block *sb);
28178+void si_pid_clr_slow(struct super_block *sb);
28179+
1facf9fc 28180+/* wbr_policy.c */
28181+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
28182+extern struct au_wbr_create_operations au_wbr_create_ops[];
28183+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
c2b27bf2 28184+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
076b876e 28185+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart);
c2b27bf2
AM
28186+
28187+/* mvdown.c */
28188+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
1facf9fc 28189+
076b876e
AM
28190+#ifdef CONFIG_AUFS_FHSM
28191+/* fhsm.c */
28192+
28193+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
28194+{
28195+ pid_t pid;
28196+
28197+ spin_lock(&fhsm->fhsm_spin);
28198+ pid = fhsm->fhsm_pid;
28199+ spin_unlock(&fhsm->fhsm_spin);
28200+
28201+ return pid;
28202+}
28203+
28204+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
28205+void au_fhsm_wrote_all(struct super_block *sb, int force);
28206+int au_fhsm_fd(struct super_block *sb, int oflags);
28207+int au_fhsm_br_alloc(struct au_branch *br);
c1595e42 28208+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
076b876e
AM
28209+void au_fhsm_fin(struct super_block *sb);
28210+void au_fhsm_init(struct au_sbinfo *sbinfo);
28211+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
28212+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
28213+#else
28214+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
28215+ int force)
28216+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
28217+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
c1595e42
JR
28218+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
28219+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
28220+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
076b876e
AM
28221+AuStubVoid(au_fhsm_fin, struct super_block *sb)
28222+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
28223+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
28224+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
28225+#endif
28226+
1facf9fc 28227+/* ---------------------------------------------------------------------- */
28228+
28229+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
28230+{
28231+ return sb->s_fs_info;
28232+}
28233+
28234+/* ---------------------------------------------------------------------- */
28235+
28236+#ifdef CONFIG_AUFS_EXPORT
a2a7ad62 28237+int au_test_nfsd(void);
1facf9fc 28238+void au_export_init(struct super_block *sb);
b752ccd1 28239+void au_xigen_inc(struct inode *inode);
1facf9fc 28240+int au_xigen_new(struct inode *inode);
28241+int au_xigen_set(struct super_block *sb, struct file *base);
28242+void au_xigen_clr(struct super_block *sb);
28243+
28244+static inline int au_busy_or_stale(void)
28245+{
b752ccd1 28246+ if (!au_test_nfsd())
1facf9fc 28247+ return -EBUSY;
28248+ return -ESTALE;
28249+}
28250+#else
b752ccd1 28251+AuStubInt0(au_test_nfsd, void)
a2a7ad62 28252+AuStubVoid(au_export_init, struct super_block *sb)
b752ccd1 28253+AuStubVoid(au_xigen_inc, struct inode *inode)
4a4d8108
AM
28254+AuStubInt0(au_xigen_new, struct inode *inode)
28255+AuStubInt0(au_xigen_set, struct super_block *sb, struct file *base)
28256+AuStubVoid(au_xigen_clr, struct super_block *sb)
c1595e42 28257+AuStub(int, au_busy_or_stale, return -EBUSY, void)
1facf9fc 28258+#endif /* CONFIG_AUFS_EXPORT */
28259+
28260+/* ---------------------------------------------------------------------- */
28261+
e49829fe
JR
28262+#ifdef CONFIG_AUFS_SBILIST
28263+/* module.c */
28264+extern struct au_splhead au_sbilist;
28265+
28266+static inline void au_sbilist_init(void)
28267+{
28268+ au_spl_init(&au_sbilist);
28269+}
28270+
28271+static inline void au_sbilist_add(struct super_block *sb)
28272+{
28273+ au_spl_add(&au_sbi(sb)->si_list, &au_sbilist);
28274+}
28275+
28276+static inline void au_sbilist_del(struct super_block *sb)
28277+{
28278+ au_spl_del(&au_sbi(sb)->si_list, &au_sbilist);
28279+}
53392da6
AM
28280+
28281+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
28282+static inline void au_sbilist_lock(void)
28283+{
28284+ spin_lock(&au_sbilist.spin);
28285+}
28286+
28287+static inline void au_sbilist_unlock(void)
28288+{
28289+ spin_unlock(&au_sbilist.spin);
28290+}
28291+#define AuGFP_SBILIST GFP_ATOMIC
28292+#else
28293+AuStubVoid(au_sbilist_lock, void)
28294+AuStubVoid(au_sbilist_unlock, void)
28295+#define AuGFP_SBILIST GFP_NOFS
28296+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
e49829fe
JR
28297+#else
28298+AuStubVoid(au_sbilist_init, void)
c1595e42
JR
28299+AuStubVoid(au_sbilist_add, struct super_block *sb)
28300+AuStubVoid(au_sbilist_del, struct super_block *sb)
53392da6
AM
28301+AuStubVoid(au_sbilist_lock, void)
28302+AuStubVoid(au_sbilist_unlock, void)
28303+#define AuGFP_SBILIST GFP_NOFS
e49829fe
JR
28304+#endif
28305+
28306+/* ---------------------------------------------------------------------- */
28307+
1facf9fc 28308+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
28309+{
dece6358 28310+ /*
c1595e42 28311+ * This function is a dynamic '__init' function actually,
dece6358
AM
28312+ * so the tiny check for si_rwsem is unnecessary.
28313+ */
28314+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
1facf9fc 28315+#ifdef CONFIG_DEBUG_FS
28316+ sbinfo->si_dbgaufs = NULL;
86dc4139 28317+ sbinfo->si_dbgaufs_plink = NULL;
1facf9fc 28318+ sbinfo->si_dbgaufs_xib = NULL;
28319+#ifdef CONFIG_AUFS_EXPORT
28320+ sbinfo->si_dbgaufs_xigen = NULL;
28321+#endif
28322+#endif
28323+}
28324+
28325+/* ---------------------------------------------------------------------- */
28326+
b752ccd1
AM
28327+static inline pid_t si_pid_bit(void)
28328+{
28329+ /* the origin of pid is 1, but the bitmap's is 0 */
28330+ return current->pid - 1;
28331+}
28332+
28333+static inline int si_pid_test(struct super_block *sb)
28334+{
076b876e
AM
28335+ pid_t bit;
28336+
28337+ bit = si_pid_bit();
b752ccd1
AM
28338+ if (bit < PID_MAX_DEFAULT)
28339+ return test_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
c1595e42 28340+ return si_pid_test_slow(sb);
b752ccd1
AM
28341+}
28342+
28343+static inline void si_pid_set(struct super_block *sb)
28344+{
076b876e
AM
28345+ pid_t bit;
28346+
28347+ bit = si_pid_bit();
b752ccd1
AM
28348+ if (bit < PID_MAX_DEFAULT) {
28349+ AuDebugOn(test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28350+ set_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28351+ /* smp_mb(); */
28352+ } else
28353+ si_pid_set_slow(sb);
28354+}
28355+
28356+static inline void si_pid_clr(struct super_block *sb)
28357+{
076b876e
AM
28358+ pid_t bit;
28359+
28360+ bit = si_pid_bit();
b752ccd1
AM
28361+ if (bit < PID_MAX_DEFAULT) {
28362+ AuDebugOn(!test_bit(bit, au_sbi(sb)->au_si_pid.bitmap));
28363+ clear_bit(bit, au_sbi(sb)->au_si_pid.bitmap);
28364+ /* smp_mb(); */
28365+ } else
28366+ si_pid_clr_slow(sb);
28367+}
28368+
28369+/* ---------------------------------------------------------------------- */
28370+
1facf9fc 28371+/* lock superblock. mainly for entry point functions */
28372+/*
b752ccd1
AM
28373+ * __si_read_lock, __si_write_lock,
28374+ * __si_read_unlock, __si_write_unlock, __si_downgrade_lock
1facf9fc 28375+ */
b752ccd1 28376+AuSimpleRwsemFuncs(__si, struct super_block *sb, &au_sbi(sb)->si_rwsem);
1facf9fc 28377+
dece6358
AM
28378+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
28379+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
28380+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
28381+
b752ccd1
AM
28382+static inline void si_noflush_read_lock(struct super_block *sb)
28383+{
28384+ __si_read_lock(sb);
28385+ si_pid_set(sb);
28386+}
28387+
28388+static inline int si_noflush_read_trylock(struct super_block *sb)
28389+{
076b876e
AM
28390+ int locked;
28391+
28392+ locked = __si_read_trylock(sb);
b752ccd1
AM
28393+ if (locked)
28394+ si_pid_set(sb);
28395+ return locked;
28396+}
28397+
28398+static inline void si_noflush_write_lock(struct super_block *sb)
28399+{
28400+ __si_write_lock(sb);
28401+ si_pid_set(sb);
28402+}
28403+
28404+static inline int si_noflush_write_trylock(struct super_block *sb)
28405+{
076b876e
AM
28406+ int locked;
28407+
28408+ locked = __si_write_trylock(sb);
b752ccd1
AM
28409+ if (locked)
28410+ si_pid_set(sb);
28411+ return locked;
28412+}
28413+
7f2ca4b1 28414+#if 0 /* reserved */
1facf9fc 28415+static inline int si_read_trylock(struct super_block *sb, int flags)
28416+{
28417+ if (au_ftest_lock(flags, FLUSH))
28418+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28419+ return si_noflush_read_trylock(sb);
28420+}
e49829fe 28421+#endif
1facf9fc 28422+
b752ccd1
AM
28423+static inline void si_read_unlock(struct super_block *sb)
28424+{
28425+ si_pid_clr(sb);
28426+ __si_read_unlock(sb);
28427+}
28428+
7f2ca4b1 28429+#if 0 /* reserved */
1facf9fc 28430+static inline int si_write_trylock(struct super_block *sb, int flags)
28431+{
28432+ if (au_ftest_lock(flags, FLUSH))
28433+ au_nwt_flush(&au_sbi(sb)->si_nowait);
28434+ return si_noflush_write_trylock(sb);
28435+}
b752ccd1
AM
28436+#endif
28437+
28438+static inline void si_write_unlock(struct super_block *sb)
28439+{
28440+ si_pid_clr(sb);
28441+ __si_write_unlock(sb);
28442+}
28443+
7f2ca4b1 28444+#if 0 /* reserved */
b752ccd1
AM
28445+static inline void si_downgrade_lock(struct super_block *sb)
28446+{
28447+ __si_downgrade_lock(sb);
28448+}
28449+#endif
1facf9fc 28450+
28451+/* ---------------------------------------------------------------------- */
28452+
28453+static inline aufs_bindex_t au_sbend(struct super_block *sb)
28454+{
dece6358 28455+ SiMustAnyLock(sb);
1facf9fc 28456+ return au_sbi(sb)->si_bend;
28457+}
28458+
28459+static inline unsigned int au_mntflags(struct super_block *sb)
28460+{
dece6358 28461+ SiMustAnyLock(sb);
1facf9fc 28462+ return au_sbi(sb)->si_mntflags;
28463+}
28464+
28465+static inline unsigned int au_sigen(struct super_block *sb)
28466+{
dece6358 28467+ SiMustAnyLock(sb);
1facf9fc 28468+ return au_sbi(sb)->si_generation;
28469+}
28470+
7f207e10
AM
28471+static inline void au_ninodes_inc(struct super_block *sb)
28472+{
28473+ atomic_long_inc(&au_sbi(sb)->si_ninodes);
28474+}
28475+
28476+static inline void au_ninodes_dec(struct super_block *sb)
28477+{
28478+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
28479+ atomic_long_dec(&au_sbi(sb)->si_ninodes);
28480+}
28481+
28482+static inline void au_nfiles_inc(struct super_block *sb)
28483+{
28484+ atomic_long_inc(&au_sbi(sb)->si_nfiles);
28485+}
28486+
28487+static inline void au_nfiles_dec(struct super_block *sb)
28488+{
28489+ AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
28490+ atomic_long_dec(&au_sbi(sb)->si_nfiles);
28491+}
28492+
1facf9fc 28493+static inline struct au_branch *au_sbr(struct super_block *sb,
28494+ aufs_bindex_t bindex)
28495+{
dece6358 28496+ SiMustAnyLock(sb);
1facf9fc 28497+ return au_sbi(sb)->si_branch[0 + bindex];
28498+}
28499+
28500+static inline void au_xino_brid_set(struct super_block *sb, aufs_bindex_t brid)
28501+{
dece6358 28502+ SiMustWriteLock(sb);
1facf9fc 28503+ au_sbi(sb)->si_xino_brid = brid;
28504+}
28505+
28506+static inline aufs_bindex_t au_xino_brid(struct super_block *sb)
28507+{
dece6358 28508+ SiMustAnyLock(sb);
1facf9fc 28509+ return au_sbi(sb)->si_xino_brid;
28510+}
28511+
28512+#endif /* __KERNEL__ */
28513+#endif /* __AUFS_SUPER_H__ */
7f207e10
AM
28514diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
28515--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 28516+++ linux/fs/aufs/sysaufs.c 2016-02-28 11:27:01.280579017 +0100
523b37e3 28517@@ -0,0 +1,104 @@
1facf9fc 28518+/*
7f2ca4b1 28519+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28520+ *
28521+ * This program, aufs is free software; you can redistribute it and/or modify
28522+ * it under the terms of the GNU General Public License as published by
28523+ * the Free Software Foundation; either version 2 of the License, or
28524+ * (at your option) any later version.
dece6358
AM
28525+ *
28526+ * This program is distributed in the hope that it will be useful,
28527+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28528+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28529+ * GNU General Public License for more details.
28530+ *
28531+ * You should have received a copy of the GNU General Public License
523b37e3 28532+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28533+ */
28534+
28535+/*
28536+ * sysfs interface and lifetime management
28537+ * they are necessary regardless sysfs is disabled.
28538+ */
28539+
1facf9fc 28540+#include <linux/random.h>
1facf9fc 28541+#include "aufs.h"
28542+
28543+unsigned long sysaufs_si_mask;
e49829fe 28544+struct kset *sysaufs_kset;
1facf9fc 28545+
28546+#define AuSiAttr(_name) { \
28547+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
28548+ .show = sysaufs_si_##_name, \
28549+}
28550+
28551+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
28552+struct attribute *sysaufs_si_attrs[] = {
28553+ &sysaufs_si_attr_xi_path.attr,
28554+ NULL,
28555+};
28556+
4a4d8108 28557+static const struct sysfs_ops au_sbi_ops = {
1facf9fc 28558+ .show = sysaufs_si_show
28559+};
28560+
28561+static struct kobj_type au_sbi_ktype = {
28562+ .release = au_si_free,
28563+ .sysfs_ops = &au_sbi_ops,
28564+ .default_attrs = sysaufs_si_attrs
28565+};
28566+
28567+/* ---------------------------------------------------------------------- */
28568+
28569+int sysaufs_si_init(struct au_sbinfo *sbinfo)
28570+{
28571+ int err;
28572+
e49829fe 28573+ sbinfo->si_kobj.kset = sysaufs_kset;
1facf9fc 28574+ /* cf. sysaufs_name() */
28575+ err = kobject_init_and_add
e49829fe 28576+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
1facf9fc 28577+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
28578+
28579+ dbgaufs_si_null(sbinfo);
28580+ if (!err) {
28581+ err = dbgaufs_si_init(sbinfo);
28582+ if (unlikely(err))
28583+ kobject_put(&sbinfo->si_kobj);
28584+ }
28585+ return err;
28586+}
28587+
28588+void sysaufs_fin(void)
28589+{
28590+ dbgaufs_fin();
e49829fe
JR
28591+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
28592+ kset_unregister(sysaufs_kset);
1facf9fc 28593+}
28594+
28595+int __init sysaufs_init(void)
28596+{
28597+ int err;
28598+
28599+ do {
28600+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
28601+ } while (!sysaufs_si_mask);
28602+
4a4d8108 28603+ err = -EINVAL;
e49829fe
JR
28604+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
28605+ if (unlikely(!sysaufs_kset))
4a4d8108 28606+ goto out;
e49829fe
JR
28607+ err = PTR_ERR(sysaufs_kset);
28608+ if (IS_ERR(sysaufs_kset))
1facf9fc 28609+ goto out;
e49829fe 28610+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
1facf9fc 28611+ if (unlikely(err)) {
e49829fe 28612+ kset_unregister(sysaufs_kset);
1facf9fc 28613+ goto out;
28614+ }
28615+
28616+ err = dbgaufs_init();
28617+ if (unlikely(err))
28618+ sysaufs_fin();
4f0767ce 28619+out:
1facf9fc 28620+ return err;
28621+}
7f207e10
AM
28622diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
28623--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 28624+++ linux/fs/aufs/sysaufs.h 2016-02-28 11:27:01.280579017 +0100
c1595e42 28625@@ -0,0 +1,101 @@
1facf9fc 28626+/*
7f2ca4b1 28627+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28628+ *
28629+ * This program, aufs is free software; you can redistribute it and/or modify
28630+ * it under the terms of the GNU General Public License as published by
28631+ * the Free Software Foundation; either version 2 of the License, or
28632+ * (at your option) any later version.
dece6358
AM
28633+ *
28634+ * This program is distributed in the hope that it will be useful,
28635+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28636+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28637+ * GNU General Public License for more details.
28638+ *
28639+ * You should have received a copy of the GNU General Public License
523b37e3 28640+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28641+ */
28642+
28643+/*
28644+ * sysfs interface and mount lifetime management
28645+ */
28646+
28647+#ifndef __SYSAUFS_H__
28648+#define __SYSAUFS_H__
28649+
28650+#ifdef __KERNEL__
28651+
1facf9fc 28652+#include <linux/sysfs.h>
1facf9fc 28653+#include "module.h"
28654+
dece6358
AM
28655+struct super_block;
28656+struct au_sbinfo;
28657+
1facf9fc 28658+struct sysaufs_si_attr {
28659+ struct attribute attr;
28660+ int (*show)(struct seq_file *seq, struct super_block *sb);
28661+};
28662+
28663+/* ---------------------------------------------------------------------- */
28664+
28665+/* sysaufs.c */
28666+extern unsigned long sysaufs_si_mask;
e49829fe 28667+extern struct kset *sysaufs_kset;
1facf9fc 28668+extern struct attribute *sysaufs_si_attrs[];
28669+int sysaufs_si_init(struct au_sbinfo *sbinfo);
28670+int __init sysaufs_init(void);
28671+void sysaufs_fin(void);
28672+
28673+/* ---------------------------------------------------------------------- */
28674+
28675+/* some people doesn't like to show a pointer in kernel */
28676+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
28677+{
28678+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
28679+}
28680+
28681+#define SysaufsSiNamePrefix "si_"
28682+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
28683+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
28684+{
28685+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
28686+ sysaufs_si_id(sbinfo));
28687+}
28688+
28689+struct au_branch;
28690+#ifdef CONFIG_SYSFS
28691+/* sysfs.c */
28692+extern struct attribute_group *sysaufs_attr_group;
28693+
28694+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
28695+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
28696+ char *buf);
076b876e
AM
28697+long au_brinfo_ioctl(struct file *file, unsigned long arg);
28698+#ifdef CONFIG_COMPAT
28699+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
28700+#endif
1facf9fc 28701+
28702+void sysaufs_br_init(struct au_branch *br);
28703+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
28704+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
28705+
28706+#define sysaufs_brs_init() do {} while (0)
28707+
28708+#else
28709+#define sysaufs_attr_group NULL
28710+
4a4d8108 28711+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
c1595e42
JR
28712+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
28713+ struct attribute *attr, char *buf)
4a4d8108
AM
28714+AuStubVoid(sysaufs_br_init, struct au_branch *br)
28715+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
28716+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
1facf9fc 28717+
28718+static inline void sysaufs_brs_init(void)
28719+{
28720+ sysaufs_brs = 0;
28721+}
28722+
28723+#endif /* CONFIG_SYSFS */
28724+
28725+#endif /* __KERNEL__ */
28726+#endif /* __SYSAUFS_H__ */
7f207e10
AM
28727diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
28728--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
28729+++ linux/fs/aufs/sysfs.c 2016-02-28 11:27:01.280579017 +0100
28730@@ -0,0 +1,376 @@
1facf9fc 28731+/*
7f2ca4b1 28732+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 28733+ *
28734+ * This program, aufs is free software; you can redistribute it and/or modify
28735+ * it under the terms of the GNU General Public License as published by
28736+ * the Free Software Foundation; either version 2 of the License, or
28737+ * (at your option) any later version.
dece6358
AM
28738+ *
28739+ * This program is distributed in the hope that it will be useful,
28740+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28741+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28742+ * GNU General Public License for more details.
28743+ *
28744+ * You should have received a copy of the GNU General Public License
523b37e3 28745+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 28746+ */
28747+
28748+/*
28749+ * sysfs interface
28750+ */
28751+
076b876e 28752+#include <linux/compat.h>
1facf9fc 28753+#include <linux/seq_file.h>
1facf9fc 28754+#include "aufs.h"
28755+
4a4d8108
AM
28756+#ifdef CONFIG_AUFS_FS_MODULE
28757+/* this entry violates the "one line per file" policy of sysfs */
28758+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
28759+ char *buf)
28760+{
28761+ ssize_t err;
28762+ static char *conf =
28763+/* this file is generated at compiling */
28764+#include "conf.str"
28765+ ;
28766+
28767+ err = snprintf(buf, PAGE_SIZE, conf);
28768+ if (unlikely(err >= PAGE_SIZE))
28769+ err = -EFBIG;
28770+ return err;
28771+}
28772+
28773+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
28774+#endif
28775+
1facf9fc 28776+static struct attribute *au_attr[] = {
4a4d8108
AM
28777+#ifdef CONFIG_AUFS_FS_MODULE
28778+ &au_config_attr.attr,
28779+#endif
1facf9fc 28780+ NULL, /* need to NULL terminate the list of attributes */
28781+};
28782+
28783+static struct attribute_group sysaufs_attr_group_body = {
28784+ .attrs = au_attr
28785+};
28786+
28787+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
28788+
28789+/* ---------------------------------------------------------------------- */
28790+
28791+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
28792+{
28793+ int err;
28794+
dece6358
AM
28795+ SiMustAnyLock(sb);
28796+
1facf9fc 28797+ err = 0;
28798+ if (au_opt_test(au_mntflags(sb), XINO)) {
28799+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
28800+ seq_putc(seq, '\n');
28801+ }
28802+ return err;
28803+}
28804+
28805+/*
28806+ * the lifetime of branch is independent from the entry under sysfs.
28807+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
28808+ * unlinked.
28809+ */
28810+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
392086de 28811+ aufs_bindex_t bindex, int idx)
1facf9fc 28812+{
1e00d052 28813+ int err;
1facf9fc 28814+ struct path path;
28815+ struct dentry *root;
28816+ struct au_branch *br;
076b876e 28817+ au_br_perm_str_t perm;
1facf9fc 28818+
28819+ AuDbg("b%d\n", bindex);
28820+
1e00d052 28821+ err = 0;
1facf9fc 28822+ root = sb->s_root;
28823+ di_read_lock_parent(root, !AuLock_IR);
28824+ br = au_sbr(sb, bindex);
392086de
AM
28825+
28826+ switch (idx) {
28827+ case AuBrSysfs_BR:
28828+ path.mnt = au_br_mnt(br);
28829+ path.dentry = au_h_dptr(root, bindex);
7f2ca4b1
JR
28830+ err = au_seq_path(seq, &path);
28831+ if (!err) {
28832+ au_optstr_br_perm(&perm, br->br_perm);
28833+ err = seq_printf(seq, "=%s\n", perm.a);
28834+ }
392086de
AM
28835+ break;
28836+ case AuBrSysfs_BRID:
28837+ err = seq_printf(seq, "%d\n", br->br_id);
392086de
AM
28838+ break;
28839+ }
076b876e
AM
28840+ di_read_unlock(root, !AuLock_IR);
28841+ if (err == -1)
28842+ err = -E2BIG;
392086de 28843+
1e00d052 28844+ return err;
1facf9fc 28845+}
28846+
28847+/* ---------------------------------------------------------------------- */
28848+
28849+static struct seq_file *au_seq(char *p, ssize_t len)
28850+{
28851+ struct seq_file *seq;
28852+
28853+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
28854+ if (seq) {
28855+ /* mutex_init(&seq.lock); */
28856+ seq->buf = p;
28857+ seq->size = len;
28858+ return seq; /* success */
28859+ }
28860+
28861+ seq = ERR_PTR(-ENOMEM);
28862+ return seq;
28863+}
28864+
392086de
AM
28865+#define SysaufsBr_PREFIX "br"
28866+#define SysaufsBrid_PREFIX "brid"
1facf9fc 28867+
28868+/* todo: file size may exceed PAGE_SIZE */
28869+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
1308ab2a 28870+ char *buf)
1facf9fc 28871+{
28872+ ssize_t err;
392086de 28873+ int idx;
1facf9fc 28874+ long l;
28875+ aufs_bindex_t bend;
28876+ struct au_sbinfo *sbinfo;
28877+ struct super_block *sb;
28878+ struct seq_file *seq;
28879+ char *name;
28880+ struct attribute **cattr;
28881+
28882+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
28883+ sb = sbinfo->si_sb;
1308ab2a 28884+
28885+ /*
28886+ * prevent a race condition between sysfs and aufs.
28887+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
28888+ * prohibits maintaining the sysfs entries.
28889+ * hew we acquire read lock after sysfs_get_active_two().
28890+ * on the other hand, the remount process may maintain the sysfs/aufs
28891+ * entries after acquiring write lock.
28892+ * it can cause a deadlock.
28893+ * simply we gave up processing read here.
28894+ */
28895+ err = -EBUSY;
28896+ if (unlikely(!si_noflush_read_trylock(sb)))
28897+ goto out;
1facf9fc 28898+
28899+ seq = au_seq(buf, PAGE_SIZE);
28900+ err = PTR_ERR(seq);
28901+ if (IS_ERR(seq))
1308ab2a 28902+ goto out_unlock;
1facf9fc 28903+
28904+ name = (void *)attr->name;
28905+ cattr = sysaufs_si_attrs;
28906+ while (*cattr) {
28907+ if (!strcmp(name, (*cattr)->name)) {
28908+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
28909+ ->show(seq, sb);
28910+ goto out_seq;
28911+ }
28912+ cattr++;
28913+ }
28914+
392086de
AM
28915+ if (!strncmp(name, SysaufsBrid_PREFIX,
28916+ sizeof(SysaufsBrid_PREFIX) - 1)) {
28917+ idx = AuBrSysfs_BRID;
28918+ name += sizeof(SysaufsBrid_PREFIX) - 1;
28919+ } else if (!strncmp(name, SysaufsBr_PREFIX,
28920+ sizeof(SysaufsBr_PREFIX) - 1)) {
28921+ idx = AuBrSysfs_BR;
1facf9fc 28922+ name += sizeof(SysaufsBr_PREFIX) - 1;
392086de
AM
28923+ } else
28924+ BUG();
28925+
28926+ err = kstrtol(name, 10, &l);
28927+ if (!err) {
28928+ bend = au_sbend(sb);
28929+ if (l <= bend)
28930+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
28931+ else
28932+ err = -ENOENT;
1facf9fc 28933+ }
1facf9fc 28934+
4f0767ce 28935+out_seq:
1facf9fc 28936+ if (!err) {
28937+ err = seq->count;
28938+ /* sysfs limit */
28939+ if (unlikely(err == PAGE_SIZE))
28940+ err = -EFBIG;
28941+ }
28942+ kfree(seq);
4f0767ce 28943+out_unlock:
1facf9fc 28944+ si_read_unlock(sb);
4f0767ce 28945+out:
1facf9fc 28946+ return err;
28947+}
28948+
28949+/* ---------------------------------------------------------------------- */
28950+
076b876e
AM
28951+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
28952+{
28953+ int err;
28954+ int16_t brid;
28955+ aufs_bindex_t bindex, bend;
28956+ size_t sz;
28957+ char *buf;
28958+ struct seq_file *seq;
28959+ struct au_branch *br;
28960+
28961+ si_read_lock(sb, AuLock_FLUSH);
28962+ bend = au_sbend(sb);
28963+ err = bend + 1;
28964+ if (!arg)
28965+ goto out;
28966+
28967+ err = -ENOMEM;
28968+ buf = (void *)__get_free_page(GFP_NOFS);
28969+ if (unlikely(!buf))
28970+ goto out;
28971+
28972+ seq = au_seq(buf, PAGE_SIZE);
28973+ err = PTR_ERR(seq);
28974+ if (IS_ERR(seq))
28975+ goto out_buf;
28976+
28977+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
28978+ for (bindex = 0; bindex <= bend; bindex++, arg++) {
28979+ err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
28980+ if (unlikely(err))
28981+ break;
28982+
28983+ br = au_sbr(sb, bindex);
28984+ brid = br->br_id;
28985+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
28986+ err = __put_user(brid, &arg->id);
28987+ if (unlikely(err))
28988+ break;
28989+
28990+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
28991+ err = __put_user(br->br_perm, &arg->perm);
28992+ if (unlikely(err))
28993+ break;
28994+
7f2ca4b1
JR
28995+ err = au_seq_path(seq, &br->br_path);
28996+ if (unlikely(err))
28997+ break;
076b876e
AM
28998+ err = seq_putc(seq, '\0');
28999+ if (!err && seq->count <= sz) {
29000+ err = copy_to_user(arg->path, seq->buf, seq->count);
29001+ seq->count = 0;
29002+ if (unlikely(err))
29003+ break;
29004+ } else {
29005+ err = -E2BIG;
29006+ goto out_seq;
29007+ }
29008+ }
29009+ if (unlikely(err))
29010+ err = -EFAULT;
29011+
29012+out_seq:
29013+ kfree(seq);
29014+out_buf:
29015+ free_page((unsigned long)buf);
29016+out:
29017+ si_read_unlock(sb);
29018+ return err;
29019+}
29020+
29021+long au_brinfo_ioctl(struct file *file, unsigned long arg)
29022+{
29023+ return au_brinfo(file->f_dentry->d_sb, (void __user *)arg);
29024+}
29025+
29026+#ifdef CONFIG_COMPAT
29027+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
29028+{
29029+ return au_brinfo(file->f_dentry->d_sb, compat_ptr(arg));
29030+}
29031+#endif
29032+
29033+/* ---------------------------------------------------------------------- */
29034+
1facf9fc 29035+void sysaufs_br_init(struct au_branch *br)
29036+{
392086de
AM
29037+ int i;
29038+ struct au_brsysfs *br_sysfs;
29039+ struct attribute *attr;
4a4d8108 29040+
392086de
AM
29041+ br_sysfs = br->br_sysfs;
29042+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29043+ attr = &br_sysfs->attr;
29044+ sysfs_attr_init(attr);
29045+ attr->name = br_sysfs->name;
29046+ attr->mode = S_IRUGO;
29047+ br_sysfs++;
29048+ }
1facf9fc 29049+}
29050+
29051+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
29052+{
29053+ struct au_branch *br;
29054+ struct kobject *kobj;
392086de
AM
29055+ struct au_brsysfs *br_sysfs;
29056+ int i;
1facf9fc 29057+ aufs_bindex_t bend;
29058+
29059+ dbgaufs_brs_del(sb, bindex);
29060+
29061+ if (!sysaufs_brs)
29062+ return;
29063+
29064+ kobj = &au_sbi(sb)->si_kobj;
29065+ bend = au_sbend(sb);
29066+ for (; bindex <= bend; bindex++) {
29067+ br = au_sbr(sb, bindex);
392086de
AM
29068+ br_sysfs = br->br_sysfs;
29069+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29070+ sysfs_remove_file(kobj, &br_sysfs->attr);
29071+ br_sysfs++;
29072+ }
1facf9fc 29073+ }
29074+}
29075+
29076+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
29077+{
392086de 29078+ int err, i;
1facf9fc 29079+ aufs_bindex_t bend;
29080+ struct kobject *kobj;
29081+ struct au_branch *br;
392086de 29082+ struct au_brsysfs *br_sysfs;
1facf9fc 29083+
29084+ dbgaufs_brs_add(sb, bindex);
29085+
29086+ if (!sysaufs_brs)
29087+ return;
29088+
29089+ kobj = &au_sbi(sb)->si_kobj;
29090+ bend = au_sbend(sb);
29091+ for (; bindex <= bend; bindex++) {
29092+ br = au_sbr(sb, bindex);
392086de
AM
29093+ br_sysfs = br->br_sysfs;
29094+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
29095+ SysaufsBr_PREFIX "%d", bindex);
29096+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
29097+ SysaufsBrid_PREFIX "%d", bindex);
29098+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
29099+ err = sysfs_create_file(kobj, &br_sysfs->attr);
29100+ if (unlikely(err))
29101+ pr_warn("failed %s under sysfs(%d)\n",
29102+ br_sysfs->name, err);
29103+ br_sysfs++;
29104+ }
1facf9fc 29105+ }
29106+}
7f207e10
AM
29107diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
29108--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 29109+++ linux/fs/aufs/sysrq.c 2016-02-28 11:27:01.280579017 +0100
076b876e 29110@@ -0,0 +1,157 @@
1facf9fc 29111+/*
7f2ca4b1 29112+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29113+ *
29114+ * This program, aufs is free software; you can redistribute it and/or modify
29115+ * it under the terms of the GNU General Public License as published by
29116+ * the Free Software Foundation; either version 2 of the License, or
29117+ * (at your option) any later version.
dece6358
AM
29118+ *
29119+ * This program is distributed in the hope that it will be useful,
29120+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29121+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29122+ * GNU General Public License for more details.
29123+ *
29124+ * You should have received a copy of the GNU General Public License
523b37e3 29125+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29126+ */
29127+
29128+/*
29129+ * magic sysrq hanlder
29130+ */
29131+
1facf9fc 29132+/* #include <linux/sysrq.h> */
027c5e7a 29133+#include <linux/writeback.h>
1facf9fc 29134+#include "aufs.h"
29135+
29136+/* ---------------------------------------------------------------------- */
29137+
29138+static void sysrq_sb(struct super_block *sb)
29139+{
29140+ char *plevel;
29141+ struct au_sbinfo *sbinfo;
29142+ struct file *file;
523b37e3
AM
29143+ struct au_sphlhead *files;
29144+ struct au_finfo *finfo;
1facf9fc 29145+
29146+ plevel = au_plevel;
29147+ au_plevel = KERN_WARNING;
1facf9fc 29148+
4a4d8108 29149+ /* since we define pr_fmt, call printk directly */
c06a8ce3
AM
29150+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
29151+
29152+ sbinfo = au_sbi(sb);
4a4d8108 29153+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
c06a8ce3 29154+ pr("superblock\n");
1facf9fc 29155+ au_dpri_sb(sb);
027c5e7a
AM
29156+
29157+#if 0
c06a8ce3 29158+ pr("root dentry\n");
1facf9fc 29159+ au_dpri_dentry(sb->s_root);
c06a8ce3 29160+ pr("root inode\n");
1facf9fc 29161+ au_dpri_inode(sb->s_root->d_inode);
027c5e7a
AM
29162+#endif
29163+
1facf9fc 29164+#if 0
027c5e7a
AM
29165+ do {
29166+ int err, i, j, ndentry;
29167+ struct au_dcsub_pages dpages;
29168+ struct au_dpage *dpage;
29169+
29170+ err = au_dpages_init(&dpages, GFP_ATOMIC);
29171+ if (unlikely(err))
29172+ break;
29173+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
29174+ if (!err)
29175+ for (i = 0; i < dpages.ndpage; i++) {
29176+ dpage = dpages.dpages + i;
29177+ ndentry = dpage->ndentry;
29178+ for (j = 0; j < ndentry; j++)
29179+ au_dpri_dentry(dpage->dentries[j]);
29180+ }
29181+ au_dpages_free(&dpages);
29182+ } while (0);
29183+#endif
29184+
29185+#if 1
29186+ {
29187+ struct inode *i;
076b876e 29188+
c06a8ce3 29189+ pr("isolated inode\n");
2cbb1c4b
JR
29190+ spin_lock(&inode_sb_list_lock);
29191+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
29192+ spin_lock(&i->i_lock);
b4510431 29193+ if (1 || hlist_empty(&i->i_dentry))
027c5e7a 29194+ au_dpri_inode(i);
2cbb1c4b
JR
29195+ spin_unlock(&i->i_lock);
29196+ }
29197+ spin_unlock(&inode_sb_list_lock);
027c5e7a 29198+ }
1facf9fc 29199+#endif
c06a8ce3 29200+ pr("files\n");
523b37e3
AM
29201+ files = &au_sbi(sb)->si_files;
29202+ spin_lock(&files->spin);
29203+ hlist_for_each_entry(finfo, &files->head, fi_hlist) {
4a4d8108 29204+ umode_t mode;
076b876e 29205+
523b37e3 29206+ file = finfo->fi_file;
c06a8ce3 29207+ mode = file_inode(file)->i_mode;
38d290e6 29208+ if (!special_file(mode))
1facf9fc 29209+ au_dpri_file(file);
523b37e3
AM
29210+ }
29211+ spin_unlock(&files->spin);
c06a8ce3 29212+ pr("done\n");
1facf9fc 29213+
c06a8ce3 29214+#undef pr
1facf9fc 29215+ au_plevel = plevel;
1facf9fc 29216+}
29217+
29218+/* ---------------------------------------------------------------------- */
29219+
29220+/* module parameter */
29221+static char *aufs_sysrq_key = "a";
29222+module_param_named(sysrq, aufs_sysrq_key, charp, S_IRUGO);
29223+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
29224+
0c5527e5 29225+static void au_sysrq(int key __maybe_unused)
1facf9fc 29226+{
1facf9fc 29227+ struct au_sbinfo *sbinfo;
29228+
027c5e7a 29229+ lockdep_off();
53392da6 29230+ au_sbilist_lock();
e49829fe 29231+ list_for_each_entry(sbinfo, &au_sbilist.head, si_list)
1facf9fc 29232+ sysrq_sb(sbinfo->si_sb);
53392da6 29233+ au_sbilist_unlock();
027c5e7a 29234+ lockdep_on();
1facf9fc 29235+}
29236+
29237+static struct sysrq_key_op au_sysrq_op = {
29238+ .handler = au_sysrq,
29239+ .help_msg = "Aufs",
29240+ .action_msg = "Aufs",
29241+ .enable_mask = SYSRQ_ENABLE_DUMP
29242+};
29243+
29244+/* ---------------------------------------------------------------------- */
29245+
29246+int __init au_sysrq_init(void)
29247+{
29248+ int err;
29249+ char key;
29250+
29251+ err = -1;
29252+ key = *aufs_sysrq_key;
29253+ if ('a' <= key && key <= 'z')
29254+ err = register_sysrq_key(key, &au_sysrq_op);
29255+ if (unlikely(err))
4a4d8108 29256+ pr_err("err %d, sysrq=%c\n", err, key);
1facf9fc 29257+ return err;
29258+}
29259+
29260+void au_sysrq_fin(void)
29261+{
29262+ int err;
076b876e 29263+
1facf9fc 29264+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
29265+ if (unlikely(err))
4a4d8108 29266+ pr_err("err %d (ignored)\n", err);
1facf9fc 29267+}
7f207e10
AM
29268diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
29269--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
29270+++ linux/fs/aufs/vdir.c 2016-02-28 11:27:01.283912420 +0100
29271@@ -0,0 +1,888 @@
1facf9fc 29272+/*
7f2ca4b1 29273+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 29274+ *
29275+ * This program, aufs is free software; you can redistribute it and/or modify
29276+ * it under the terms of the GNU General Public License as published by
29277+ * the Free Software Foundation; either version 2 of the License, or
29278+ * (at your option) any later version.
dece6358
AM
29279+ *
29280+ * This program is distributed in the hope that it will be useful,
29281+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29282+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29283+ * GNU General Public License for more details.
29284+ *
29285+ * You should have received a copy of the GNU General Public License
523b37e3 29286+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 29287+ */
29288+
29289+/*
29290+ * virtual or vertical directory
29291+ */
29292+
29293+#include "aufs.h"
29294+
dece6358 29295+static unsigned int calc_size(int nlen)
1facf9fc 29296+{
dece6358 29297+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
1facf9fc 29298+}
29299+
29300+static int set_deblk_end(union au_vdir_deblk_p *p,
29301+ union au_vdir_deblk_p *deblk_end)
29302+{
29303+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
29304+ p->de->de_str.len = 0;
29305+ /* smp_mb(); */
29306+ return 0;
29307+ }
29308+ return -1; /* error */
29309+}
29310+
29311+/* returns true or false */
29312+static int is_deblk_end(union au_vdir_deblk_p *p,
29313+ union au_vdir_deblk_p *deblk_end)
29314+{
29315+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
29316+ return !p->de->de_str.len;
29317+ return 1;
29318+}
29319+
29320+static unsigned char *last_deblk(struct au_vdir *vdir)
29321+{
29322+ return vdir->vd_deblk[vdir->vd_nblk - 1];
29323+}
29324+
29325+/* ---------------------------------------------------------------------- */
29326+
7f2ca4b1 29327+/* estimate the appropriate size for name hash table */
1308ab2a 29328+unsigned int au_rdhash_est(loff_t sz)
29329+{
29330+ unsigned int n;
29331+
29332+ n = UINT_MAX;
29333+ sz >>= 10;
29334+ if (sz < n)
29335+ n = sz;
29336+ if (sz < AUFS_RDHASH_DEF)
29337+ n = AUFS_RDHASH_DEF;
4a4d8108 29338+ /* pr_info("n %u\n", n); */
1308ab2a 29339+ return n;
29340+}
29341+
1facf9fc 29342+/*
29343+ * the allocated memory has to be freed by
dece6358 29344+ * au_nhash_wh_free() or au_nhash_de_free().
1facf9fc 29345+ */
dece6358 29346+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
1facf9fc 29347+{
1facf9fc 29348+ struct hlist_head *head;
dece6358 29349+ unsigned int u;
076b876e 29350+ size_t sz;
1facf9fc 29351+
076b876e
AM
29352+ sz = sizeof(*nhash->nh_head) * num_hash;
29353+ head = kmalloc(sz, gfp);
dece6358
AM
29354+ if (head) {
29355+ nhash->nh_num = num_hash;
29356+ nhash->nh_head = head;
29357+ for (u = 0; u < num_hash; u++)
1facf9fc 29358+ INIT_HLIST_HEAD(head++);
dece6358 29359+ return 0; /* success */
1facf9fc 29360+ }
1facf9fc 29361+
dece6358 29362+ return -ENOMEM;
1facf9fc 29363+}
29364+
dece6358
AM
29365+static void nhash_count(struct hlist_head *head)
29366+{
29367+#if 0
29368+ unsigned long n;
29369+ struct hlist_node *pos;
29370+
29371+ n = 0;
29372+ hlist_for_each(pos, head)
29373+ n++;
4a4d8108 29374+ pr_info("%lu\n", n);
dece6358
AM
29375+#endif
29376+}
29377+
29378+static void au_nhash_wh_do_free(struct hlist_head *head)
1facf9fc 29379+{
c06a8ce3
AM
29380+ struct au_vdir_wh *pos;
29381+ struct hlist_node *node;
1facf9fc 29382+
c06a8ce3
AM
29383+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
29384+ kfree(pos);
1facf9fc 29385+}
29386+
dece6358 29387+static void au_nhash_de_do_free(struct hlist_head *head)
1facf9fc 29388+{
c06a8ce3
AM
29389+ struct au_vdir_dehstr *pos;
29390+ struct hlist_node *node;
1facf9fc 29391+
c06a8ce3
AM
29392+ hlist_for_each_entry_safe(pos, node, head, hash)
29393+ au_cache_free_vdir_dehstr(pos);
1facf9fc 29394+}
29395+
dece6358
AM
29396+static void au_nhash_do_free(struct au_nhash *nhash,
29397+ void (*free)(struct hlist_head *head))
1facf9fc 29398+{
1308ab2a 29399+ unsigned int n;
1facf9fc 29400+ struct hlist_head *head;
1facf9fc 29401+
dece6358 29402+ n = nhash->nh_num;
1308ab2a 29403+ if (!n)
29404+ return;
29405+
dece6358 29406+ head = nhash->nh_head;
1308ab2a 29407+ while (n-- > 0) {
dece6358
AM
29408+ nhash_count(head);
29409+ free(head++);
1facf9fc 29410+ }
dece6358 29411+ kfree(nhash->nh_head);
1facf9fc 29412+}
29413+
dece6358 29414+void au_nhash_wh_free(struct au_nhash *whlist)
1facf9fc 29415+{
dece6358
AM
29416+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
29417+}
1facf9fc 29418+
dece6358
AM
29419+static void au_nhash_de_free(struct au_nhash *delist)
29420+{
29421+ au_nhash_do_free(delist, au_nhash_de_do_free);
1facf9fc 29422+}
29423+
29424+/* ---------------------------------------------------------------------- */
29425+
29426+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
29427+ int limit)
29428+{
29429+ int num;
29430+ unsigned int u, n;
29431+ struct hlist_head *head;
c06a8ce3 29432+ struct au_vdir_wh *pos;
1facf9fc 29433+
29434+ num = 0;
29435+ n = whlist->nh_num;
29436+ head = whlist->nh_head;
1308ab2a 29437+ for (u = 0; u < n; u++, head++)
c06a8ce3
AM
29438+ hlist_for_each_entry(pos, head, wh_hash)
29439+ if (pos->wh_bindex == btgt && ++num > limit)
1facf9fc 29440+ return 1;
1facf9fc 29441+ return 0;
29442+}
29443+
29444+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
dece6358 29445+ unsigned char *name,
1facf9fc 29446+ unsigned int len)
29447+{
dece6358
AM
29448+ unsigned int v;
29449+ /* const unsigned int magic_bit = 12; */
29450+
1308ab2a 29451+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
29452+
dece6358
AM
29453+ v = 0;
29454+ while (len--)
29455+ v += *name++;
29456+ /* v = hash_long(v, magic_bit); */
29457+ v %= nhash->nh_num;
29458+ return nhash->nh_head + v;
29459+}
29460+
29461+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
29462+ int nlen)
29463+{
29464+ return str->len == nlen && !memcmp(str->name, name, nlen);
1facf9fc 29465+}
29466+
29467+/* returns found or not */
dece6358 29468+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
1facf9fc 29469+{
29470+ struct hlist_head *head;
c06a8ce3 29471+ struct au_vdir_wh *pos;
1facf9fc 29472+ struct au_vdir_destr *str;
29473+
dece6358 29474+ head = au_name_hash(whlist, name, nlen);
c06a8ce3
AM
29475+ hlist_for_each_entry(pos, head, wh_hash) {
29476+ str = &pos->wh_str;
1facf9fc 29477+ AuDbg("%.*s\n", str->len, str->name);
dece6358
AM
29478+ if (au_nhash_test_name(str, name, nlen))
29479+ return 1;
29480+ }
29481+ return 0;
29482+}
29483+
29484+/* returns found(true) or not */
29485+static int test_known(struct au_nhash *delist, char *name, int nlen)
29486+{
29487+ struct hlist_head *head;
c06a8ce3 29488+ struct au_vdir_dehstr *pos;
dece6358
AM
29489+ struct au_vdir_destr *str;
29490+
29491+ head = au_name_hash(delist, name, nlen);
c06a8ce3
AM
29492+ hlist_for_each_entry(pos, head, hash) {
29493+ str = pos->str;
dece6358
AM
29494+ AuDbg("%.*s\n", str->len, str->name);
29495+ if (au_nhash_test_name(str, name, nlen))
1facf9fc 29496+ return 1;
29497+ }
29498+ return 0;
29499+}
29500+
dece6358
AM
29501+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
29502+ unsigned char d_type)
29503+{
29504+#ifdef CONFIG_AUFS_SHWH
29505+ wh->wh_ino = ino;
29506+ wh->wh_type = d_type;
29507+#endif
29508+}
29509+
29510+/* ---------------------------------------------------------------------- */
29511+
29512+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
29513+ unsigned int d_type, aufs_bindex_t bindex,
29514+ unsigned char shwh)
1facf9fc 29515+{
29516+ int err;
29517+ struct au_vdir_destr *str;
29518+ struct au_vdir_wh *wh;
29519+
dece6358 29520+ AuDbg("%.*s\n", nlen, name);
1308ab2a 29521+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
29522+
1facf9fc 29523+ err = -ENOMEM;
dece6358 29524+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
1facf9fc 29525+ if (unlikely(!wh))
29526+ goto out;
29527+
29528+ err = 0;
29529+ wh->wh_bindex = bindex;
dece6358
AM
29530+ if (shwh)
29531+ au_shwh_init_wh(wh, ino, d_type);
1facf9fc 29532+ str = &wh->wh_str;
dece6358
AM
29533+ str->len = nlen;
29534+ memcpy(str->name, name, nlen);
29535+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
1facf9fc 29536+ /* smp_mb(); */
29537+
4f0767ce 29538+out:
1facf9fc 29539+ return err;
29540+}
29541+
1facf9fc 29542+static int append_deblk(struct au_vdir *vdir)
29543+{
29544+ int err;
dece6358 29545+ unsigned long ul;
1facf9fc 29546+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29547+ union au_vdir_deblk_p p, deblk_end;
29548+ unsigned char **o;
29549+
29550+ err = -ENOMEM;
dece6358
AM
29551+ o = krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
29552+ GFP_NOFS);
1facf9fc 29553+ if (unlikely(!o))
29554+ goto out;
29555+
29556+ vdir->vd_deblk = o;
29557+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
29558+ if (p.deblk) {
29559+ ul = vdir->vd_nblk++;
29560+ vdir->vd_deblk[ul] = p.deblk;
29561+ vdir->vd_last.ul = ul;
29562+ vdir->vd_last.p.deblk = p.deblk;
29563+ deblk_end.deblk = p.deblk + deblk_sz;
29564+ err = set_deblk_end(&p, &deblk_end);
29565+ }
29566+
4f0767ce 29567+out:
1facf9fc 29568+ return err;
29569+}
29570+
dece6358
AM
29571+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
29572+ unsigned int d_type, struct au_nhash *delist)
29573+{
29574+ int err;
29575+ unsigned int sz;
29576+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
29577+ union au_vdir_deblk_p p, *room, deblk_end;
29578+ struct au_vdir_dehstr *dehstr;
29579+
29580+ p.deblk = last_deblk(vdir);
29581+ deblk_end.deblk = p.deblk + deblk_sz;
29582+ room = &vdir->vd_last.p;
29583+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
29584+ || !is_deblk_end(room, &deblk_end));
29585+
29586+ sz = calc_size(nlen);
29587+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
29588+ err = append_deblk(vdir);
29589+ if (unlikely(err))
29590+ goto out;
29591+
29592+ p.deblk = last_deblk(vdir);
29593+ deblk_end.deblk = p.deblk + deblk_sz;
29594+ /* smp_mb(); */
29595+ AuDebugOn(room->deblk != p.deblk);
29596+ }
29597+
29598+ err = -ENOMEM;
4a4d8108 29599+ dehstr = au_cache_alloc_vdir_dehstr();
dece6358
AM
29600+ if (unlikely(!dehstr))
29601+ goto out;
29602+
29603+ dehstr->str = &room->de->de_str;
29604+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
29605+ room->de->de_ino = ino;
29606+ room->de->de_type = d_type;
29607+ room->de->de_str.len = nlen;
29608+ memcpy(room->de->de_str.name, name, nlen);
29609+
29610+ err = 0;
29611+ room->deblk += sz;
29612+ if (unlikely(set_deblk_end(room, &deblk_end)))
29613+ err = append_deblk(vdir);
29614+ /* smp_mb(); */
29615+
4f0767ce 29616+out:
dece6358
AM
29617+ return err;
29618+}
29619+
29620+/* ---------------------------------------------------------------------- */
29621+
29622+void au_vdir_free(struct au_vdir *vdir)
29623+{
29624+ unsigned char **deblk;
29625+
29626+ deblk = vdir->vd_deblk;
29627+ while (vdir->vd_nblk--)
29628+ kfree(*deblk++);
29629+ kfree(vdir->vd_deblk);
29630+ au_cache_free_vdir(vdir);
29631+}
29632+
1308ab2a 29633+static struct au_vdir *alloc_vdir(struct file *file)
1facf9fc 29634+{
29635+ struct au_vdir *vdir;
1308ab2a 29636+ struct super_block *sb;
1facf9fc 29637+ int err;
29638+
1308ab2a 29639+ sb = file->f_dentry->d_sb;
dece6358
AM
29640+ SiMustAnyLock(sb);
29641+
1facf9fc 29642+ err = -ENOMEM;
29643+ vdir = au_cache_alloc_vdir();
29644+ if (unlikely(!vdir))
29645+ goto out;
29646+
29647+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
29648+ if (unlikely(!vdir->vd_deblk))
29649+ goto out_free;
29650+
29651+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
1308ab2a 29652+ if (!vdir->vd_deblk_sz) {
7f2ca4b1 29653+ /* estimate the appropriate size for deblk */
1308ab2a 29654+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
4a4d8108 29655+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
1308ab2a 29656+ }
1facf9fc 29657+ vdir->vd_nblk = 0;
29658+ vdir->vd_version = 0;
29659+ vdir->vd_jiffy = 0;
29660+ err = append_deblk(vdir);
29661+ if (!err)
29662+ return vdir; /* success */
29663+
29664+ kfree(vdir->vd_deblk);
29665+
4f0767ce 29666+out_free:
1facf9fc 29667+ au_cache_free_vdir(vdir);
4f0767ce 29668+out:
1facf9fc 29669+ vdir = ERR_PTR(err);
29670+ return vdir;
29671+}
29672+
29673+static int reinit_vdir(struct au_vdir *vdir)
29674+{
29675+ int err;
29676+ union au_vdir_deblk_p p, deblk_end;
29677+
29678+ while (vdir->vd_nblk > 1) {
29679+ kfree(vdir->vd_deblk[vdir->vd_nblk - 1]);
29680+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
29681+ vdir->vd_nblk--;
29682+ }
29683+ p.deblk = vdir->vd_deblk[0];
29684+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
29685+ err = set_deblk_end(&p, &deblk_end);
29686+ /* keep vd_dblk_sz */
29687+ vdir->vd_last.ul = 0;
29688+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29689+ vdir->vd_version = 0;
29690+ vdir->vd_jiffy = 0;
29691+ /* smp_mb(); */
29692+ return err;
29693+}
29694+
29695+/* ---------------------------------------------------------------------- */
29696+
1facf9fc 29697+#define AuFillVdir_CALLED 1
29698+#define AuFillVdir_WHABLE (1 << 1)
dece6358 29699+#define AuFillVdir_SHWH (1 << 2)
1facf9fc 29700+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
7f207e10
AM
29701+#define au_fset_fillvdir(flags, name) \
29702+ do { (flags) |= AuFillVdir_##name; } while (0)
29703+#define au_fclr_fillvdir(flags, name) \
29704+ do { (flags) &= ~AuFillVdir_##name; } while (0)
1facf9fc 29705+
dece6358
AM
29706+#ifndef CONFIG_AUFS_SHWH
29707+#undef AuFillVdir_SHWH
29708+#define AuFillVdir_SHWH 0
29709+#endif
29710+
1facf9fc 29711+struct fillvdir_arg {
392086de 29712+ struct dir_context ctx;
1facf9fc 29713+ struct file *file;
29714+ struct au_vdir *vdir;
dece6358
AM
29715+ struct au_nhash delist;
29716+ struct au_nhash whlist;
1facf9fc 29717+ aufs_bindex_t bindex;
29718+ unsigned int flags;
29719+ int err;
29720+};
29721+
392086de 29722+static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
1facf9fc 29723+ loff_t offset __maybe_unused, u64 h_ino,
29724+ unsigned int d_type)
29725+{
392086de 29726+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
1facf9fc 29727+ char *name = (void *)__name;
29728+ struct super_block *sb;
1facf9fc 29729+ ino_t ino;
dece6358 29730+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
1facf9fc 29731+
1facf9fc 29732+ arg->err = 0;
dece6358 29733+ sb = arg->file->f_dentry->d_sb;
1facf9fc 29734+ au_fset_fillvdir(arg->flags, CALLED);
29735+ /* smp_mb(); */
dece6358 29736+ if (nlen <= AUFS_WH_PFX_LEN
1facf9fc 29737+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
dece6358
AM
29738+ if (test_known(&arg->delist, name, nlen)
29739+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
29740+ goto out; /* already exists or whiteouted */
1facf9fc 29741+
dece6358 29742+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
4a4d8108
AM
29743+ if (!arg->err) {
29744+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
29745+ d_type = DT_UNKNOWN;
dece6358
AM
29746+ arg->err = append_de(arg->vdir, name, nlen, ino,
29747+ d_type, &arg->delist);
4a4d8108 29748+ }
1facf9fc 29749+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
29750+ name += AUFS_WH_PFX_LEN;
dece6358
AM
29751+ nlen -= AUFS_WH_PFX_LEN;
29752+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
29753+ goto out; /* already whiteouted */
1facf9fc 29754+
dece6358
AM
29755+ if (shwh)
29756+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
29757+ &ino);
4a4d8108
AM
29758+ if (!arg->err) {
29759+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
29760+ d_type = DT_UNKNOWN;
1facf9fc 29761+ arg->err = au_nhash_append_wh
dece6358
AM
29762+ (&arg->whlist, name, nlen, ino, d_type,
29763+ arg->bindex, shwh);
4a4d8108 29764+ }
1facf9fc 29765+ }
29766+
4f0767ce 29767+out:
1facf9fc 29768+ if (!arg->err)
29769+ arg->vdir->vd_jiffy = jiffies;
29770+ /* smp_mb(); */
29771+ AuTraceErr(arg->err);
29772+ return arg->err;
29773+}
29774+
dece6358
AM
29775+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
29776+ struct au_nhash *whlist, struct au_nhash *delist)
29777+{
29778+#ifdef CONFIG_AUFS_SHWH
29779+ int err;
29780+ unsigned int nh, u;
29781+ struct hlist_head *head;
c06a8ce3
AM
29782+ struct au_vdir_wh *pos;
29783+ struct hlist_node *n;
dece6358
AM
29784+ char *p, *o;
29785+ struct au_vdir_destr *destr;
29786+
29787+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
29788+
29789+ err = -ENOMEM;
537831f9 29790+ o = p = (void *)__get_free_page(GFP_NOFS);
dece6358
AM
29791+ if (unlikely(!p))
29792+ goto out;
29793+
29794+ err = 0;
29795+ nh = whlist->nh_num;
29796+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
29797+ p += AUFS_WH_PFX_LEN;
29798+ for (u = 0; u < nh; u++) {
29799+ head = whlist->nh_head + u;
c06a8ce3
AM
29800+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
29801+ destr = &pos->wh_str;
dece6358
AM
29802+ memcpy(p, destr->name, destr->len);
29803+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
c06a8ce3 29804+ pos->wh_ino, pos->wh_type, delist);
dece6358
AM
29805+ if (unlikely(err))
29806+ break;
29807+ }
29808+ }
29809+
537831f9 29810+ free_page((unsigned long)o);
dece6358 29811+
4f0767ce 29812+out:
dece6358
AM
29813+ AuTraceErr(err);
29814+ return err;
29815+#else
29816+ return 0;
29817+#endif
29818+}
29819+
1facf9fc 29820+static int au_do_read_vdir(struct fillvdir_arg *arg)
29821+{
29822+ int err;
dece6358 29823+ unsigned int rdhash;
1facf9fc 29824+ loff_t offset;
dece6358
AM
29825+ aufs_bindex_t bend, bindex, bstart;
29826+ unsigned char shwh;
1facf9fc 29827+ struct file *hf, *file;
29828+ struct super_block *sb;
29829+
1facf9fc 29830+ file = arg->file;
29831+ sb = file->f_dentry->d_sb;
dece6358
AM
29832+ SiMustAnyLock(sb);
29833+
29834+ rdhash = au_sbi(sb)->si_rdhash;
1308ab2a 29835+ if (!rdhash)
29836+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
dece6358
AM
29837+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
29838+ if (unlikely(err))
1facf9fc 29839+ goto out;
dece6358
AM
29840+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
29841+ if (unlikely(err))
1facf9fc 29842+ goto out_delist;
29843+
29844+ err = 0;
29845+ arg->flags = 0;
dece6358
AM
29846+ shwh = 0;
29847+ if (au_opt_test(au_mntflags(sb), SHWH)) {
29848+ shwh = 1;
29849+ au_fset_fillvdir(arg->flags, SHWH);
29850+ }
29851+ bstart = au_fbstart(file);
4a4d8108 29852+ bend = au_fbend_dir(file);
dece6358 29853+ for (bindex = bstart; !err && bindex <= bend; bindex++) {
4a4d8108 29854+ hf = au_hf_dir(file, bindex);
1facf9fc 29855+ if (!hf)
29856+ continue;
29857+
29858+ offset = vfsub_llseek(hf, 0, SEEK_SET);
29859+ err = offset;
29860+ if (unlikely(offset))
29861+ break;
29862+
29863+ arg->bindex = bindex;
29864+ au_fclr_fillvdir(arg->flags, WHABLE);
dece6358
AM
29865+ if (shwh
29866+ || (bindex != bend
29867+ && au_br_whable(au_sbr_perm(sb, bindex))))
1facf9fc 29868+ au_fset_fillvdir(arg->flags, WHABLE);
29869+ do {
29870+ arg->err = 0;
29871+ au_fclr_fillvdir(arg->flags, CALLED);
29872+ /* smp_mb(); */
392086de 29873+ err = vfsub_iterate_dir(hf, &arg->ctx);
1facf9fc 29874+ if (err >= 0)
29875+ err = arg->err;
29876+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
392086de
AM
29877+
29878+ /*
29879+ * dir_relax() may be good for concurrency, but aufs should not
29880+ * use it since it will cause a lockdep problem.
29881+ */
1facf9fc 29882+ }
dece6358
AM
29883+
29884+ if (!err && shwh)
29885+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
29886+
29887+ au_nhash_wh_free(&arg->whlist);
1facf9fc 29888+
4f0767ce 29889+out_delist:
dece6358 29890+ au_nhash_de_free(&arg->delist);
4f0767ce 29891+out:
1facf9fc 29892+ return err;
29893+}
29894+
29895+static int read_vdir(struct file *file, int may_read)
29896+{
29897+ int err;
29898+ unsigned long expire;
29899+ unsigned char do_read;
392086de
AM
29900+ struct fillvdir_arg arg = {
29901+ .ctx = {
29902+ .actor = au_diractor(fillvdir)
29903+ }
29904+ };
1facf9fc 29905+ struct inode *inode;
29906+ struct au_vdir *vdir, *allocated;
29907+
29908+ err = 0;
c06a8ce3 29909+ inode = file_inode(file);
1facf9fc 29910+ IMustLock(inode);
dece6358
AM
29911+ SiMustAnyLock(inode->i_sb);
29912+
1facf9fc 29913+ allocated = NULL;
29914+ do_read = 0;
29915+ expire = au_sbi(inode->i_sb)->si_rdcache;
29916+ vdir = au_ivdir(inode);
29917+ if (!vdir) {
29918+ do_read = 1;
1308ab2a 29919+ vdir = alloc_vdir(file);
1facf9fc 29920+ err = PTR_ERR(vdir);
29921+ if (IS_ERR(vdir))
29922+ goto out;
29923+ err = 0;
29924+ allocated = vdir;
29925+ } else if (may_read
29926+ && (inode->i_version != vdir->vd_version
29927+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
29928+ do_read = 1;
29929+ err = reinit_vdir(vdir);
29930+ if (unlikely(err))
29931+ goto out;
29932+ }
29933+
29934+ if (!do_read)
29935+ return 0; /* success */
29936+
29937+ arg.file = file;
29938+ arg.vdir = vdir;
29939+ err = au_do_read_vdir(&arg);
29940+ if (!err) {
392086de 29941+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
1facf9fc 29942+ vdir->vd_version = inode->i_version;
29943+ vdir->vd_last.ul = 0;
29944+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
29945+ if (allocated)
29946+ au_set_ivdir(inode, allocated);
29947+ } else if (allocated)
29948+ au_vdir_free(allocated);
29949+
4f0767ce 29950+out:
1facf9fc 29951+ return err;
29952+}
29953+
29954+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
29955+{
29956+ int err, rerr;
29957+ unsigned long ul, n;
29958+ const unsigned int deblk_sz = src->vd_deblk_sz;
29959+
29960+ AuDebugOn(tgt->vd_nblk != 1);
29961+
29962+ err = -ENOMEM;
29963+ if (tgt->vd_nblk < src->vd_nblk) {
29964+ unsigned char **p;
29965+
dece6358
AM
29966+ p = krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
29967+ GFP_NOFS);
1facf9fc 29968+ if (unlikely(!p))
29969+ goto out;
29970+ tgt->vd_deblk = p;
29971+ }
29972+
1308ab2a 29973+ if (tgt->vd_deblk_sz != deblk_sz) {
29974+ unsigned char *p;
29975+
29976+ tgt->vd_deblk_sz = deblk_sz;
29977+ p = krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS);
29978+ if (unlikely(!p))
29979+ goto out;
29980+ tgt->vd_deblk[0] = p;
29981+ }
1facf9fc 29982+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
1facf9fc 29983+ tgt->vd_version = src->vd_version;
29984+ tgt->vd_jiffy = src->vd_jiffy;
29985+
29986+ n = src->vd_nblk;
29987+ for (ul = 1; ul < n; ul++) {
dece6358
AM
29988+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
29989+ GFP_NOFS);
29990+ if (unlikely(!tgt->vd_deblk[ul]))
1facf9fc 29991+ goto out;
1308ab2a 29992+ tgt->vd_nblk++;
1facf9fc 29993+ }
1308ab2a 29994+ tgt->vd_nblk = n;
29995+ tgt->vd_last.ul = tgt->vd_last.ul;
29996+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
29997+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
29998+ - src->vd_deblk[src->vd_last.ul];
1facf9fc 29999+ /* smp_mb(); */
30000+ return 0; /* success */
30001+
4f0767ce 30002+out:
1facf9fc 30003+ rerr = reinit_vdir(tgt);
30004+ BUG_ON(rerr);
30005+ return err;
30006+}
30007+
30008+int au_vdir_init(struct file *file)
30009+{
30010+ int err;
30011+ struct inode *inode;
30012+ struct au_vdir *vdir_cache, *allocated;
30013+
392086de 30014+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30015+ err = read_vdir(file, !file->f_pos);
30016+ if (unlikely(err))
30017+ goto out;
30018+
30019+ allocated = NULL;
30020+ vdir_cache = au_fvdir_cache(file);
30021+ if (!vdir_cache) {
1308ab2a 30022+ vdir_cache = alloc_vdir(file);
1facf9fc 30023+ err = PTR_ERR(vdir_cache);
30024+ if (IS_ERR(vdir_cache))
30025+ goto out;
30026+ allocated = vdir_cache;
30027+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
392086de 30028+ /* test file->f_pos here instead of ctx->pos */
1facf9fc 30029+ err = reinit_vdir(vdir_cache);
30030+ if (unlikely(err))
30031+ goto out;
30032+ } else
30033+ return 0; /* success */
30034+
c06a8ce3 30035+ inode = file_inode(file);
1facf9fc 30036+ err = copy_vdir(vdir_cache, au_ivdir(inode));
30037+ if (!err) {
30038+ file->f_version = inode->i_version;
30039+ if (allocated)
30040+ au_set_fvdir_cache(file, allocated);
30041+ } else if (allocated)
30042+ au_vdir_free(allocated);
30043+
4f0767ce 30044+out:
1facf9fc 30045+ return err;
30046+}
30047+
30048+static loff_t calc_offset(struct au_vdir *vdir)
30049+{
30050+ loff_t offset;
30051+ union au_vdir_deblk_p p;
30052+
30053+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
30054+ offset = vdir->vd_last.p.deblk - p.deblk;
30055+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
30056+ return offset;
30057+}
30058+
30059+/* returns true or false */
392086de 30060+static int seek_vdir(struct file *file, struct dir_context *ctx)
1facf9fc 30061+{
30062+ int valid;
30063+ unsigned int deblk_sz;
30064+ unsigned long ul, n;
30065+ loff_t offset;
30066+ union au_vdir_deblk_p p, deblk_end;
30067+ struct au_vdir *vdir_cache;
30068+
30069+ valid = 1;
30070+ vdir_cache = au_fvdir_cache(file);
30071+ offset = calc_offset(vdir_cache);
30072+ AuDbg("offset %lld\n", offset);
392086de 30073+ if (ctx->pos == offset)
1facf9fc 30074+ goto out;
30075+
30076+ vdir_cache->vd_last.ul = 0;
30077+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
392086de 30078+ if (!ctx->pos)
1facf9fc 30079+ goto out;
30080+
30081+ valid = 0;
30082+ deblk_sz = vdir_cache->vd_deblk_sz;
392086de 30083+ ul = div64_u64(ctx->pos, deblk_sz);
1facf9fc 30084+ AuDbg("ul %lu\n", ul);
30085+ if (ul >= vdir_cache->vd_nblk)
30086+ goto out;
30087+
30088+ n = vdir_cache->vd_nblk;
30089+ for (; ul < n; ul++) {
30090+ p.deblk = vdir_cache->vd_deblk[ul];
30091+ deblk_end.deblk = p.deblk + deblk_sz;
30092+ offset = ul;
30093+ offset *= deblk_sz;
392086de 30094+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
1facf9fc 30095+ unsigned int l;
30096+
30097+ l = calc_size(p.de->de_str.len);
30098+ offset += l;
30099+ p.deblk += l;
30100+ }
30101+ if (!is_deblk_end(&p, &deblk_end)) {
30102+ valid = 1;
30103+ vdir_cache->vd_last.ul = ul;
30104+ vdir_cache->vd_last.p = p;
30105+ break;
30106+ }
30107+ }
30108+
4f0767ce 30109+out:
1facf9fc 30110+ /* smp_mb(); */
30111+ AuTraceErr(!valid);
30112+ return valid;
30113+}
30114+
392086de 30115+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
1facf9fc 30116+{
1facf9fc 30117+ unsigned int l, deblk_sz;
30118+ union au_vdir_deblk_p deblk_end;
30119+ struct au_vdir *vdir_cache;
30120+ struct au_vdir_de *de;
30121+
30122+ vdir_cache = au_fvdir_cache(file);
392086de 30123+ if (!seek_vdir(file, ctx))
1facf9fc 30124+ return 0;
30125+
30126+ deblk_sz = vdir_cache->vd_deblk_sz;
30127+ while (1) {
30128+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
30129+ deblk_end.deblk += deblk_sz;
30130+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
30131+ de = vdir_cache->vd_last.p.de;
30132+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
392086de 30133+ de->de_str.len, de->de_str.name, ctx->pos,
1facf9fc 30134+ (unsigned long)de->de_ino, de->de_type);
392086de
AM
30135+ if (unlikely(!dir_emit(ctx, de->de_str.name,
30136+ de->de_str.len, de->de_ino,
30137+ de->de_type))) {
1facf9fc 30138+ /* todo: ignore the error caused by udba? */
30139+ /* return err; */
30140+ return 0;
30141+ }
30142+
30143+ l = calc_size(de->de_str.len);
30144+ vdir_cache->vd_last.p.deblk += l;
392086de 30145+ ctx->pos += l;
1facf9fc 30146+ }
30147+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
30148+ vdir_cache->vd_last.ul++;
30149+ vdir_cache->vd_last.p.deblk
30150+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
392086de 30151+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
1facf9fc 30152+ continue;
30153+ }
30154+ break;
30155+ }
30156+
30157+ /* smp_mb(); */
30158+ return 0;
30159+}
7f207e10
AM
30160diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
30161--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
30162+++ linux/fs/aufs/vfsub.c 2016-02-28 11:27:01.283912420 +0100
30163@@ -0,0 +1,864 @@
1facf9fc 30164+/*
7f2ca4b1 30165+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 30166+ *
30167+ * This program, aufs is free software; you can redistribute it and/or modify
30168+ * it under the terms of the GNU General Public License as published by
30169+ * the Free Software Foundation; either version 2 of the License, or
30170+ * (at your option) any later version.
dece6358
AM
30171+ *
30172+ * This program is distributed in the hope that it will be useful,
30173+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30174+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30175+ * GNU General Public License for more details.
30176+ *
30177+ * You should have received a copy of the GNU General Public License
523b37e3 30178+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 30179+ */
30180+
30181+/*
30182+ * sub-routines for VFS
30183+ */
30184+
dece6358 30185+#include <linux/namei.h>
7f2ca4b1 30186+#include <linux/nsproxy.h>
dece6358
AM
30187+#include <linux/security.h>
30188+#include <linux/splice.h>
7f2ca4b1 30189+#include "../fs/mount.h"
1facf9fc 30190+#include "aufs.h"
30191+
7f2ca4b1
JR
30192+#ifdef CONFIG_AUFS_BR_FUSE
30193+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
30194+{
30195+ struct nsproxy *ns;
30196+
30197+ if (!au_test_fuse(h_sb) || !au_userns)
30198+ return 0;
30199+
30200+ ns = current->nsproxy;
30201+ /* no {get,put}_nsproxy(ns) */
30202+ return real_mount(mnt)->mnt_ns == ns->mnt_ns ? 0 : -EACCES;
30203+}
30204+#endif
30205+
30206+/* ---------------------------------------------------------------------- */
30207+
1facf9fc 30208+int vfsub_update_h_iattr(struct path *h_path, int *did)
30209+{
30210+ int err;
30211+ struct kstat st;
30212+ struct super_block *h_sb;
30213+
30214+ /* for remote fs, leave work for its getattr or d_revalidate */
30215+ /* for bad i_attr fs, handle them in aufs_getattr() */
30216+ /* still some fs may acquire i_mutex. we need to skip them */
30217+ err = 0;
30218+ if (!did)
30219+ did = &err;
30220+ h_sb = h_path->dentry->d_sb;
30221+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
30222+ if (*did)
c06a8ce3 30223+ err = vfs_getattr(h_path, &st);
1facf9fc 30224+
30225+ return err;
30226+}
30227+
30228+/* ---------------------------------------------------------------------- */
30229+
4a4d8108 30230+struct file *vfsub_dentry_open(struct path *path, int flags)
1308ab2a 30231+{
30232+ struct file *file;
30233+
b4510431 30234+ file = dentry_open(path, flags /* | __FMODE_NONOTIFY */,
7f207e10 30235+ current_cred());
2cbb1c4b
JR
30236+ if (!IS_ERR_OR_NULL(file)
30237+ && (file->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
30238+ i_readcount_inc(path->dentry->d_inode);
4a4d8108 30239+
1308ab2a 30240+ return file;
30241+}
30242+
1facf9fc 30243+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
30244+{
30245+ struct file *file;
30246+
2cbb1c4b 30247+ lockdep_off();
7f207e10 30248+ file = filp_open(path,
2cbb1c4b 30249+ oflags /* | __FMODE_NONOTIFY */,
7f207e10 30250+ mode);
2cbb1c4b 30251+ lockdep_on();
1facf9fc 30252+ if (IS_ERR(file))
30253+ goto out;
30254+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30255+
4f0767ce 30256+out:
1facf9fc 30257+ return file;
30258+}
30259+
7f2ca4b1
JR
30260+/*
30261+ * Ideally this function should call VFS:do_last() in order to keep all its
30262+ * checkings. But it is very hard for aufs to regenerate several VFS internal
30263+ * structure such as nameidata. This is a second (or third) best approach.
30264+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
30265+ */
30266+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
30267+ struct vfsub_aopen_args *args, struct au_branch *br)
30268+{
30269+ int err;
30270+ struct file *file = args->file;
30271+ /* copied from linux/fs/namei.c:atomic_open() */
30272+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
30273+
30274+ IMustLock(dir);
30275+ AuDebugOn(!dir->i_op->atomic_open);
30276+
30277+ err = au_br_test_oflag(args->open_flag, br);
30278+ if (unlikely(err))
30279+ goto out;
30280+
30281+ args->file->f_path.dentry = DENTRY_NOT_SET;
30282+ args->file->f_path.mnt = au_br_mnt(br);
30283+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
30284+ args->create_mode, args->opened);
30285+ if (err >= 0) {
30286+ /* some filesystems don't set FILE_CREATED while succeeded? */
30287+ if (*args->opened & FILE_CREATED)
30288+ fsnotify_create(dir, dentry);
30289+ } else
30290+ goto out;
30291+
30292+
30293+ if (!err) {
30294+ /* todo: call VFS:may_open() here */
30295+ err = open_check_o_direct(file);
30296+ /* todo: ima_file_check() too? */
30297+ if (!err && (args->open_flag & __FMODE_EXEC))
30298+ err = deny_write_access(file);
30299+ if (unlikely(err))
30300+ /* note that the file is created and still opened */
30301+ goto out;
30302+ }
30303+
30304+ atomic_inc(&br->br_count);
30305+ fsnotify_open(file);
30306+
30307+out:
30308+ return err;
30309+}
30310+
1facf9fc 30311+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
30312+{
30313+ int err;
30314+
1facf9fc 30315+ err = kern_path(name, flags, path);
1facf9fc 30316+ if (!err && path->dentry->d_inode)
30317+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30318+ return err;
30319+}
30320+
30321+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
30322+ int len)
30323+{
30324+ struct path path = {
30325+ .mnt = NULL
30326+ };
30327+
1308ab2a 30328+ /* VFS checks it too, but by WARN_ON_ONCE() */
1facf9fc 30329+ IMustLock(parent->d_inode);
30330+
30331+ path.dentry = lookup_one_len(name, parent, len);
30332+ if (IS_ERR(path.dentry))
30333+ goto out;
30334+ if (path.dentry->d_inode)
30335+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
30336+
4f0767ce 30337+out:
4a4d8108 30338+ AuTraceErrPtr(path.dentry);
1facf9fc 30339+ return path.dentry;
30340+}
30341+
b4510431 30342+void vfsub_call_lkup_one(void *args)
2cbb1c4b 30343+{
b4510431
AM
30344+ struct vfsub_lkup_one_args *a = args;
30345+ *a->errp = vfsub_lkup_one(a->name, a->parent);
2cbb1c4b
JR
30346+}
30347+
1facf9fc 30348+/* ---------------------------------------------------------------------- */
30349+
30350+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
30351+ struct dentry *d2, struct au_hinode *hdir2)
30352+{
30353+ struct dentry *d;
30354+
2cbb1c4b 30355+ lockdep_off();
1facf9fc 30356+ d = lock_rename(d1, d2);
2cbb1c4b 30357+ lockdep_on();
4a4d8108 30358+ au_hn_suspend(hdir1);
1facf9fc 30359+ if (hdir1 != hdir2)
4a4d8108 30360+ au_hn_suspend(hdir2);
1facf9fc 30361+
30362+ return d;
30363+}
30364+
30365+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
30366+ struct dentry *d2, struct au_hinode *hdir2)
30367+{
4a4d8108 30368+ au_hn_resume(hdir1);
1facf9fc 30369+ if (hdir1 != hdir2)
4a4d8108 30370+ au_hn_resume(hdir2);
2cbb1c4b 30371+ lockdep_off();
1facf9fc 30372+ unlock_rename(d1, d2);
2cbb1c4b 30373+ lockdep_on();
1facf9fc 30374+}
30375+
30376+/* ---------------------------------------------------------------------- */
30377+
b4510431 30378+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
1facf9fc 30379+{
30380+ int err;
30381+ struct dentry *d;
30382+
30383+ IMustLock(dir);
30384+
30385+ d = path->dentry;
30386+ path->dentry = d->d_parent;
b752ccd1 30387+ err = security_path_mknod(path, d, mode, 0);
1facf9fc 30388+ path->dentry = d;
30389+ if (unlikely(err))
30390+ goto out;
30391+
c1595e42 30392+ lockdep_off();
b4510431 30393+ err = vfs_create(dir, path->dentry, mode, want_excl);
c1595e42 30394+ lockdep_on();
1facf9fc 30395+ if (!err) {
30396+ struct path tmp = *path;
30397+ int did;
30398+
30399+ vfsub_update_h_iattr(&tmp, &did);
30400+ if (did) {
30401+ tmp.dentry = path->dentry->d_parent;
30402+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30403+ }
30404+ /*ignore*/
30405+ }
30406+
4f0767ce 30407+out:
1facf9fc 30408+ return err;
30409+}
30410+
30411+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
30412+{
30413+ int err;
30414+ struct dentry *d;
30415+
30416+ IMustLock(dir);
30417+
30418+ d = path->dentry;
30419+ path->dentry = d->d_parent;
b752ccd1 30420+ err = security_path_symlink(path, d, symname);
1facf9fc 30421+ path->dentry = d;
30422+ if (unlikely(err))
30423+ goto out;
30424+
c1595e42 30425+ lockdep_off();
1facf9fc 30426+ err = vfs_symlink(dir, path->dentry, symname);
c1595e42 30427+ lockdep_on();
1facf9fc 30428+ if (!err) {
30429+ struct path tmp = *path;
30430+ int did;
30431+
30432+ vfsub_update_h_iattr(&tmp, &did);
30433+ if (did) {
30434+ tmp.dentry = path->dentry->d_parent;
30435+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30436+ }
30437+ /*ignore*/
30438+ }
30439+
4f0767ce 30440+out:
1facf9fc 30441+ return err;
30442+}
30443+
30444+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
30445+{
30446+ int err;
30447+ struct dentry *d;
30448+
30449+ IMustLock(dir);
30450+
30451+ d = path->dentry;
30452+ path->dentry = d->d_parent;
027c5e7a 30453+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
1facf9fc 30454+ path->dentry = d;
30455+ if (unlikely(err))
30456+ goto out;
30457+
c1595e42 30458+ lockdep_off();
1facf9fc 30459+ err = vfs_mknod(dir, path->dentry, mode, dev);
c1595e42 30460+ lockdep_on();
1facf9fc 30461+ if (!err) {
30462+ struct path tmp = *path;
30463+ int did;
30464+
30465+ vfsub_update_h_iattr(&tmp, &did);
30466+ if (did) {
30467+ tmp.dentry = path->dentry->d_parent;
30468+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30469+ }
30470+ /*ignore*/
30471+ }
30472+
4f0767ce 30473+out:
1facf9fc 30474+ return err;
30475+}
30476+
30477+static int au_test_nlink(struct inode *inode)
30478+{
30479+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
30480+
30481+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
30482+ || inode->i_nlink < link_max)
30483+ return 0;
30484+ return -EMLINK;
30485+}
30486+
523b37e3
AM
30487+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
30488+ struct inode **delegated_inode)
1facf9fc 30489+{
30490+ int err;
30491+ struct dentry *d;
30492+
30493+ IMustLock(dir);
30494+
30495+ err = au_test_nlink(src_dentry->d_inode);
30496+ if (unlikely(err))
30497+ return err;
30498+
b4510431 30499+ /* we don't call may_linkat() */
1facf9fc 30500+ d = path->dentry;
30501+ path->dentry = d->d_parent;
b752ccd1 30502+ err = security_path_link(src_dentry, path, d);
1facf9fc 30503+ path->dentry = d;
30504+ if (unlikely(err))
30505+ goto out;
30506+
2cbb1c4b 30507+ lockdep_off();
523b37e3 30508+ err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
2cbb1c4b 30509+ lockdep_on();
1facf9fc 30510+ if (!err) {
30511+ struct path tmp = *path;
30512+ int did;
30513+
30514+ /* fuse has different memory inode for the same inumber */
30515+ vfsub_update_h_iattr(&tmp, &did);
30516+ if (did) {
30517+ tmp.dentry = path->dentry->d_parent;
30518+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30519+ tmp.dentry = src_dentry;
30520+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30521+ }
30522+ /*ignore*/
30523+ }
30524+
4f0767ce 30525+out:
1facf9fc 30526+ return err;
30527+}
30528+
30529+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
523b37e3
AM
30530+ struct inode *dir, struct path *path,
30531+ struct inode **delegated_inode)
1facf9fc 30532+{
30533+ int err;
30534+ struct path tmp = {
30535+ .mnt = path->mnt
30536+ };
30537+ struct dentry *d;
30538+
30539+ IMustLock(dir);
30540+ IMustLock(src_dir);
30541+
30542+ d = path->dentry;
30543+ path->dentry = d->d_parent;
30544+ tmp.dentry = src_dentry->d_parent;
38d290e6 30545+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
1facf9fc 30546+ path->dentry = d;
30547+ if (unlikely(err))
30548+ goto out;
30549+
2cbb1c4b 30550+ lockdep_off();
523b37e3 30551+ err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
38d290e6 30552+ delegated_inode, /*flags*/0);
2cbb1c4b 30553+ lockdep_on();
1facf9fc 30554+ if (!err) {
30555+ int did;
30556+
30557+ tmp.dentry = d->d_parent;
30558+ vfsub_update_h_iattr(&tmp, &did);
30559+ if (did) {
30560+ tmp.dentry = src_dentry;
30561+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30562+ tmp.dentry = src_dentry->d_parent;
30563+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30564+ }
30565+ /*ignore*/
30566+ }
30567+
4f0767ce 30568+out:
1facf9fc 30569+ return err;
30570+}
30571+
30572+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
30573+{
30574+ int err;
30575+ struct dentry *d;
30576+
30577+ IMustLock(dir);
30578+
30579+ d = path->dentry;
30580+ path->dentry = d->d_parent;
b752ccd1 30581+ err = security_path_mkdir(path, d, mode);
1facf9fc 30582+ path->dentry = d;
30583+ if (unlikely(err))
30584+ goto out;
30585+
c1595e42 30586+ lockdep_off();
1facf9fc 30587+ err = vfs_mkdir(dir, path->dentry, mode);
c1595e42 30588+ lockdep_on();
1facf9fc 30589+ if (!err) {
30590+ struct path tmp = *path;
30591+ int did;
30592+
30593+ vfsub_update_h_iattr(&tmp, &did);
30594+ if (did) {
30595+ tmp.dentry = path->dentry->d_parent;
30596+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
30597+ }
30598+ /*ignore*/
30599+ }
30600+
4f0767ce 30601+out:
1facf9fc 30602+ return err;
30603+}
30604+
30605+int vfsub_rmdir(struct inode *dir, struct path *path)
30606+{
30607+ int err;
30608+ struct dentry *d;
30609+
30610+ IMustLock(dir);
30611+
30612+ d = path->dentry;
30613+ path->dentry = d->d_parent;
b752ccd1 30614+ err = security_path_rmdir(path, d);
1facf9fc 30615+ path->dentry = d;
30616+ if (unlikely(err))
30617+ goto out;
30618+
2cbb1c4b 30619+ lockdep_off();
1facf9fc 30620+ err = vfs_rmdir(dir, path->dentry);
2cbb1c4b 30621+ lockdep_on();
1facf9fc 30622+ if (!err) {
30623+ struct path tmp = {
30624+ .dentry = path->dentry->d_parent,
30625+ .mnt = path->mnt
30626+ };
30627+
30628+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30629+ }
30630+
4f0767ce 30631+out:
1facf9fc 30632+ return err;
30633+}
30634+
30635+/* ---------------------------------------------------------------------- */
30636+
9dbd164d 30637+/* todo: support mmap_sem? */
1facf9fc 30638+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
30639+ loff_t *ppos)
30640+{
30641+ ssize_t err;
30642+
2cbb1c4b 30643+ lockdep_off();
1facf9fc 30644+ err = vfs_read(file, ubuf, count, ppos);
2cbb1c4b 30645+ lockdep_on();
1facf9fc 30646+ if (err >= 0)
30647+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30648+ return err;
30649+}
30650+
30651+/* todo: kernel_read()? */
30652+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
30653+ loff_t *ppos)
30654+{
30655+ ssize_t err;
30656+ mm_segment_t oldfs;
b752ccd1
AM
30657+ union {
30658+ void *k;
30659+ char __user *u;
30660+ } buf;
1facf9fc 30661+
b752ccd1 30662+ buf.k = kbuf;
1facf9fc 30663+ oldfs = get_fs();
30664+ set_fs(KERNEL_DS);
b752ccd1 30665+ err = vfsub_read_u(file, buf.u, count, ppos);
1facf9fc 30666+ set_fs(oldfs);
30667+ return err;
30668+}
30669+
30670+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
30671+ loff_t *ppos)
30672+{
30673+ ssize_t err;
30674+
2cbb1c4b 30675+ lockdep_off();
1facf9fc 30676+ err = vfs_write(file, ubuf, count, ppos);
2cbb1c4b 30677+ lockdep_on();
1facf9fc 30678+ if (err >= 0)
30679+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30680+ return err;
30681+}
30682+
30683+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
30684+{
30685+ ssize_t err;
30686+ mm_segment_t oldfs;
b752ccd1
AM
30687+ union {
30688+ void *k;
30689+ const char __user *u;
30690+ } buf;
1facf9fc 30691+
b752ccd1 30692+ buf.k = kbuf;
1facf9fc 30693+ oldfs = get_fs();
30694+ set_fs(KERNEL_DS);
b752ccd1 30695+ err = vfsub_write_u(file, buf.u, count, ppos);
1facf9fc 30696+ set_fs(oldfs);
30697+ return err;
30698+}
30699+
4a4d8108
AM
30700+int vfsub_flush(struct file *file, fl_owner_t id)
30701+{
30702+ int err;
30703+
30704+ err = 0;
523b37e3 30705+ if (file->f_op->flush) {
2cbb1c4b
JR
30706+ if (!au_test_nfs(file->f_dentry->d_sb))
30707+ err = file->f_op->flush(file, id);
30708+ else {
30709+ lockdep_off();
30710+ err = file->f_op->flush(file, id);
30711+ lockdep_on();
30712+ }
4a4d8108
AM
30713+ if (!err)
30714+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
30715+ /*ignore*/
30716+ }
30717+ return err;
30718+}
30719+
392086de 30720+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
1facf9fc 30721+{
30722+ int err;
30723+
523b37e3 30724+ AuDbg("%pD, ctx{%pf, %llu}\n", file, ctx->actor, ctx->pos);
392086de 30725+
2cbb1c4b 30726+ lockdep_off();
392086de 30727+ err = iterate_dir(file, ctx);
2cbb1c4b 30728+ lockdep_on();
1facf9fc 30729+ if (err >= 0)
30730+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
30731+ return err;
30732+}
30733+
30734+long vfsub_splice_to(struct file *in, loff_t *ppos,
30735+ struct pipe_inode_info *pipe, size_t len,
30736+ unsigned int flags)
30737+{
30738+ long err;
30739+
2cbb1c4b 30740+ lockdep_off();
0fc653ad 30741+ err = do_splice_to(in, ppos, pipe, len, flags);
2cbb1c4b 30742+ lockdep_on();
4a4d8108 30743+ file_accessed(in);
1facf9fc 30744+ if (err >= 0)
30745+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
30746+ return err;
30747+}
30748+
30749+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
30750+ loff_t *ppos, size_t len, unsigned int flags)
30751+{
30752+ long err;
30753+
2cbb1c4b 30754+ lockdep_off();
0fc653ad 30755+ err = do_splice_from(pipe, out, ppos, len, flags);
2cbb1c4b 30756+ lockdep_on();
1facf9fc 30757+ if (err >= 0)
30758+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
30759+ return err;
30760+}
30761+
53392da6
AM
30762+int vfsub_fsync(struct file *file, struct path *path, int datasync)
30763+{
30764+ int err;
30765+
30766+ /* file can be NULL */
30767+ lockdep_off();
30768+ err = vfs_fsync(file, datasync);
30769+ lockdep_on();
30770+ if (!err) {
30771+ if (!path) {
30772+ AuDebugOn(!file);
30773+ path = &file->f_path;
30774+ }
30775+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
30776+ }
30777+ return err;
30778+}
30779+
1facf9fc 30780+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
30781+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
30782+ struct file *h_file)
30783+{
30784+ int err;
30785+ struct inode *h_inode;
c06a8ce3 30786+ struct super_block *h_sb;
1facf9fc 30787+
1facf9fc 30788+ if (!h_file) {
c06a8ce3
AM
30789+ err = vfsub_truncate(h_path, length);
30790+ goto out;
1facf9fc 30791+ }
30792+
c06a8ce3
AM
30793+ h_inode = h_path->dentry->d_inode;
30794+ h_sb = h_inode->i_sb;
30795+ lockdep_off();
30796+ sb_start_write(h_sb);
30797+ lockdep_on();
1facf9fc 30798+ err = locks_verify_truncate(h_inode, h_file, length);
30799+ if (!err)
953406b4 30800+ err = security_path_truncate(h_path);
2cbb1c4b
JR
30801+ if (!err) {
30802+ lockdep_off();
1facf9fc 30803+ err = do_truncate(h_path->dentry, length, attr, h_file);
2cbb1c4b
JR
30804+ lockdep_on();
30805+ }
c06a8ce3
AM
30806+ lockdep_off();
30807+ sb_end_write(h_sb);
30808+ lockdep_on();
1facf9fc 30809+
4f0767ce 30810+out:
1facf9fc 30811+ return err;
30812+}
30813+
30814+/* ---------------------------------------------------------------------- */
30815+
30816+struct au_vfsub_mkdir_args {
30817+ int *errp;
30818+ struct inode *dir;
30819+ struct path *path;
30820+ int mode;
30821+};
30822+
30823+static void au_call_vfsub_mkdir(void *args)
30824+{
30825+ struct au_vfsub_mkdir_args *a = args;
30826+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
30827+}
30828+
30829+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
30830+{
30831+ int err, do_sio, wkq_err;
30832+
30833+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30834+ if (!do_sio) {
30835+ lockdep_off();
1facf9fc 30836+ err = vfsub_mkdir(dir, path, mode);
c1595e42
JR
30837+ lockdep_on();
30838+ } else {
1facf9fc 30839+ struct au_vfsub_mkdir_args args = {
30840+ .errp = &err,
30841+ .dir = dir,
30842+ .path = path,
30843+ .mode = mode
30844+ };
30845+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
30846+ if (unlikely(wkq_err))
30847+ err = wkq_err;
30848+ }
30849+
30850+ return err;
30851+}
30852+
30853+struct au_vfsub_rmdir_args {
30854+ int *errp;
30855+ struct inode *dir;
30856+ struct path *path;
30857+};
30858+
30859+static void au_call_vfsub_rmdir(void *args)
30860+{
30861+ struct au_vfsub_rmdir_args *a = args;
30862+ *a->errp = vfsub_rmdir(a->dir, a->path);
30863+}
30864+
30865+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
30866+{
30867+ int err, do_sio, wkq_err;
30868+
30869+ do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
c1595e42
JR
30870+ if (!do_sio) {
30871+ lockdep_off();
1facf9fc 30872+ err = vfsub_rmdir(dir, path);
c1595e42
JR
30873+ lockdep_on();
30874+ } else {
1facf9fc 30875+ struct au_vfsub_rmdir_args args = {
30876+ .errp = &err,
30877+ .dir = dir,
30878+ .path = path
30879+ };
30880+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
30881+ if (unlikely(wkq_err))
30882+ err = wkq_err;
30883+ }
30884+
30885+ return err;
30886+}
30887+
30888+/* ---------------------------------------------------------------------- */
30889+
30890+struct notify_change_args {
30891+ int *errp;
30892+ struct path *path;
30893+ struct iattr *ia;
523b37e3 30894+ struct inode **delegated_inode;
1facf9fc 30895+};
30896+
30897+static void call_notify_change(void *args)
30898+{
30899+ struct notify_change_args *a = args;
30900+ struct inode *h_inode;
30901+
30902+ h_inode = a->path->dentry->d_inode;
30903+ IMustLock(h_inode);
30904+
30905+ *a->errp = -EPERM;
30906+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
c1595e42 30907+ lockdep_off();
523b37e3
AM
30908+ *a->errp = notify_change(a->path->dentry, a->ia,
30909+ a->delegated_inode);
c1595e42 30910+ lockdep_on();
1facf9fc 30911+ if (!*a->errp)
30912+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
30913+ }
30914+ AuTraceErr(*a->errp);
30915+}
30916+
523b37e3
AM
30917+int vfsub_notify_change(struct path *path, struct iattr *ia,
30918+ struct inode **delegated_inode)
1facf9fc 30919+{
30920+ int err;
30921+ struct notify_change_args args = {
523b37e3
AM
30922+ .errp = &err,
30923+ .path = path,
30924+ .ia = ia,
30925+ .delegated_inode = delegated_inode
1facf9fc 30926+ };
30927+
30928+ call_notify_change(&args);
30929+
30930+ return err;
30931+}
30932+
523b37e3
AM
30933+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
30934+ struct inode **delegated_inode)
1facf9fc 30935+{
30936+ int err, wkq_err;
30937+ struct notify_change_args args = {
523b37e3
AM
30938+ .errp = &err,
30939+ .path = path,
30940+ .ia = ia,
30941+ .delegated_inode = delegated_inode
1facf9fc 30942+ };
30943+
30944+ wkq_err = au_wkq_wait(call_notify_change, &args);
30945+ if (unlikely(wkq_err))
30946+ err = wkq_err;
30947+
30948+ return err;
30949+}
30950+
30951+/* ---------------------------------------------------------------------- */
30952+
30953+struct unlink_args {
30954+ int *errp;
30955+ struct inode *dir;
30956+ struct path *path;
523b37e3 30957+ struct inode **delegated_inode;
1facf9fc 30958+};
30959+
30960+static void call_unlink(void *args)
30961+{
30962+ struct unlink_args *a = args;
30963+ struct dentry *d = a->path->dentry;
30964+ struct inode *h_inode;
30965+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
c1595e42 30966+ && au_dcount(d) == 1);
1facf9fc 30967+
30968+ IMustLock(a->dir);
30969+
30970+ a->path->dentry = d->d_parent;
30971+ *a->errp = security_path_unlink(a->path, d);
30972+ a->path->dentry = d;
30973+ if (unlikely(*a->errp))
30974+ return;
30975+
30976+ if (!stop_sillyrename)
30977+ dget(d);
30978+ h_inode = d->d_inode;
30979+ if (h_inode)
027c5e7a 30980+ ihold(h_inode);
1facf9fc 30981+
2cbb1c4b 30982+ lockdep_off();
523b37e3 30983+ *a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
2cbb1c4b 30984+ lockdep_on();
1facf9fc 30985+ if (!*a->errp) {
30986+ struct path tmp = {
30987+ .dentry = d->d_parent,
30988+ .mnt = a->path->mnt
30989+ };
30990+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
30991+ }
30992+
30993+ if (!stop_sillyrename)
30994+ dput(d);
30995+ if (h_inode)
30996+ iput(h_inode);
30997+
30998+ AuTraceErr(*a->errp);
30999+}
31000+
31001+/*
31002+ * @dir: must be locked.
31003+ * @dentry: target dentry.
31004+ */
523b37e3
AM
31005+int vfsub_unlink(struct inode *dir, struct path *path,
31006+ struct inode **delegated_inode, int force)
1facf9fc 31007+{
31008+ int err;
31009+ struct unlink_args args = {
523b37e3
AM
31010+ .errp = &err,
31011+ .dir = dir,
31012+ .path = path,
31013+ .delegated_inode = delegated_inode
1facf9fc 31014+ };
31015+
31016+ if (!force)
31017+ call_unlink(&args);
31018+ else {
31019+ int wkq_err;
31020+
31021+ wkq_err = au_wkq_wait(call_unlink, &args);
31022+ if (unlikely(wkq_err))
31023+ err = wkq_err;
31024+ }
31025+
31026+ return err;
31027+}
7f207e10
AM
31028diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
31029--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
31030+++ linux/fs/aufs/vfsub.h 2016-02-28 11:27:01.283912420 +0100
31031@@ -0,0 +1,315 @@
1facf9fc 31032+/*
7f2ca4b1 31033+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31034+ *
31035+ * This program, aufs is free software; you can redistribute it and/or modify
31036+ * it under the terms of the GNU General Public License as published by
31037+ * the Free Software Foundation; either version 2 of the License, or
31038+ * (at your option) any later version.
dece6358
AM
31039+ *
31040+ * This program is distributed in the hope that it will be useful,
31041+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31042+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31043+ * GNU General Public License for more details.
31044+ *
31045+ * You should have received a copy of the GNU General Public License
523b37e3 31046+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31047+ */
31048+
31049+/*
31050+ * sub-routines for VFS
31051+ */
31052+
31053+#ifndef __AUFS_VFSUB_H__
31054+#define __AUFS_VFSUB_H__
31055+
31056+#ifdef __KERNEL__
31057+
31058+#include <linux/fs.h>
b4510431 31059+#include <linux/mount.h>
7f2ca4b1 31060+#include <linux/posix_acl.h>
c1595e42 31061+#include <linux/xattr.h>
7f207e10 31062+#include "debug.h"
1facf9fc 31063+
7f207e10 31064+/* copied from linux/fs/internal.h */
2cbb1c4b 31065+/* todo: BAD approach!! */
c06a8ce3 31066+extern void __mnt_drop_write(struct vfsmount *);
2cbb1c4b 31067+extern spinlock_t inode_sb_list_lock;
7f2ca4b1 31068+extern int open_check_o_direct(struct file *f);
7f207e10
AM
31069+
31070+/* ---------------------------------------------------------------------- */
1facf9fc 31071+
31072+/* lock subclass for lower inode */
31073+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
31074+/* reduce? gave up. */
31075+enum {
c1595e42 31076+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
1facf9fc 31077+ AuLsc_I_PARENT, /* lower inode, parent first */
31078+ AuLsc_I_PARENT2, /* copyup dirs */
dece6358 31079+ AuLsc_I_PARENT3, /* copyup wh */
1facf9fc 31080+ AuLsc_I_CHILD,
31081+ AuLsc_I_CHILD2,
31082+ AuLsc_I_End
31083+};
31084+
31085+/* to debug easier, do not make them inlined functions */
31086+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
31087+#define IMustLock(i) MtxMustLock(&(i)->i_mutex)
31088+
31089+/* ---------------------------------------------------------------------- */
31090+
7f207e10
AM
31091+static inline void vfsub_drop_nlink(struct inode *inode)
31092+{
31093+ AuDebugOn(!inode->i_nlink);
31094+ drop_nlink(inode);
31095+}
31096+
027c5e7a
AM
31097+static inline void vfsub_dead_dir(struct inode *inode)
31098+{
31099+ AuDebugOn(!S_ISDIR(inode->i_mode));
31100+ inode->i_flags |= S_DEAD;
31101+ clear_nlink(inode);
31102+}
31103+
392086de
AM
31104+static inline int vfsub_native_ro(struct inode *inode)
31105+{
31106+ return (inode->i_sb->s_flags & MS_RDONLY)
31107+ || IS_RDONLY(inode)
31108+ /* || IS_APPEND(inode) */
31109+ || IS_IMMUTABLE(inode);
31110+}
31111+
7f2ca4b1
JR
31112+#ifdef CONFIG_AUFS_BR_FUSE
31113+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
31114+#else
31115+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
31116+#endif
31117+
7f207e10
AM
31118+/* ---------------------------------------------------------------------- */
31119+
31120+int vfsub_update_h_iattr(struct path *h_path, int *did);
31121+struct file *vfsub_dentry_open(struct path *path, int flags);
31122+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
7f2ca4b1
JR
31123+struct vfsub_aopen_args {
31124+ struct file *file;
31125+ unsigned int open_flag;
31126+ umode_t create_mode;
31127+ int *opened;
31128+};
31129+struct au_branch;
31130+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
31131+ struct vfsub_aopen_args *args, struct au_branch *br);
1facf9fc 31132+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
b4510431 31133+
1facf9fc 31134+struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
31135+ int len);
b4510431
AM
31136+
31137+struct vfsub_lkup_one_args {
31138+ struct dentry **errp;
31139+ struct qstr *name;
31140+ struct dentry *parent;
31141+};
31142+
31143+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
31144+ struct dentry *parent)
31145+{
31146+ return vfsub_lookup_one_len(name->name, parent, name->len);
31147+}
31148+
31149+void vfsub_call_lkup_one(void *args);
31150+
31151+/* ---------------------------------------------------------------------- */
31152+
31153+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
31154+{
31155+ int err;
076b876e 31156+
b4510431
AM
31157+ lockdep_off();
31158+ err = mnt_want_write(mnt);
31159+ lockdep_on();
31160+ return err;
31161+}
31162+
31163+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
31164+{
31165+ lockdep_off();
31166+ mnt_drop_write(mnt);
31167+ lockdep_on();
31168+}
1facf9fc 31169+
7f2ca4b1 31170+#if 0 /* reserved */
c06a8ce3
AM
31171+static inline void vfsub_mnt_drop_write_file(struct file *file)
31172+{
31173+ lockdep_off();
31174+ mnt_drop_write_file(file);
31175+ lockdep_on();
31176+}
7f2ca4b1 31177+#endif
c06a8ce3 31178+
1facf9fc 31179+/* ---------------------------------------------------------------------- */
31180+
31181+struct au_hinode;
31182+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
31183+ struct dentry *d2, struct au_hinode *hdir2);
31184+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
31185+ struct dentry *d2, struct au_hinode *hdir2);
31186+
537831f9
AM
31187+int vfsub_create(struct inode *dir, struct path *path, int mode,
31188+ bool want_excl);
1facf9fc 31189+int vfsub_symlink(struct inode *dir, struct path *path,
31190+ const char *symname);
31191+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
31192+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
523b37e3 31193+ struct path *path, struct inode **delegated_inode);
1facf9fc 31194+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
523b37e3
AM
31195+ struct inode *hdir, struct path *path,
31196+ struct inode **delegated_inode);
1facf9fc 31197+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
31198+int vfsub_rmdir(struct inode *dir, struct path *path);
31199+
31200+/* ---------------------------------------------------------------------- */
31201+
31202+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
31203+ loff_t *ppos);
31204+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
31205+ loff_t *ppos);
31206+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
31207+ loff_t *ppos);
31208+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
31209+ loff_t *ppos);
4a4d8108 31210+int vfsub_flush(struct file *file, fl_owner_t id);
392086de
AM
31211+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
31212+
31213+/* just for type-check */
31214+static inline filldir_t au_diractor(int (*func)(struct dir_context *,
31215+ const char *, int, loff_t, u64,
31216+ unsigned))
31217+{
31218+ return (filldir_t)func;
31219+}
31220+
c06a8ce3
AM
31221+static inline loff_t vfsub_f_size_read(struct file *file)
31222+{
31223+ return i_size_read(file_inode(file));
31224+}
31225+
4a4d8108
AM
31226+static inline unsigned int vfsub_file_flags(struct file *file)
31227+{
31228+ unsigned int flags;
31229+
31230+ spin_lock(&file->f_lock);
31231+ flags = file->f_flags;
31232+ spin_unlock(&file->f_lock);
31233+
31234+ return flags;
31235+}
1308ab2a 31236+
7f2ca4b1 31237+#if 0 /* reserved */
1facf9fc 31238+static inline void vfsub_file_accessed(struct file *h_file)
31239+{
31240+ file_accessed(h_file);
31241+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
31242+}
7f2ca4b1 31243+#endif
1facf9fc 31244+
31245+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
31246+ struct dentry *h_dentry)
31247+{
31248+ struct path h_path = {
31249+ .dentry = h_dentry,
31250+ .mnt = h_mnt
31251+ };
92d182d2 31252+ touch_atime(&h_path);
1facf9fc 31253+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
31254+}
31255+
0c3ec466
AM
31256+static inline int vfsub_update_time(struct inode *h_inode, struct timespec *ts,
31257+ int flags)
31258+{
31259+ return update_time(h_inode, ts, flags);
31260+ /* no vfsub_update_h_iattr() since we don't have struct path */
31261+}
31262+
7f2ca4b1
JR
31263+#ifdef CONFIG_FS_POSIX_ACL
31264+static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
31265+{
31266+ int err;
31267+
31268+ err = posix_acl_chmod(h_inode, h_mode);
31269+ if (err == -EOPNOTSUPP)
31270+ err = 0;
31271+ return err;
31272+}
31273+#else
31274+AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
31275+#endif
31276+
4a4d8108
AM
31277+long vfsub_splice_to(struct file *in, loff_t *ppos,
31278+ struct pipe_inode_info *pipe, size_t len,
31279+ unsigned int flags);
31280+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
31281+ loff_t *ppos, size_t len, unsigned int flags);
c06a8ce3
AM
31282+
31283+static inline long vfsub_truncate(struct path *path, loff_t length)
31284+{
31285+ long err;
076b876e 31286+
c06a8ce3
AM
31287+ lockdep_off();
31288+ err = vfs_truncate(path, length);
31289+ lockdep_on();
31290+ return err;
31291+}
31292+
4a4d8108
AM
31293+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
31294+ struct file *h_file);
53392da6 31295+int vfsub_fsync(struct file *file, struct path *path, int datasync);
4a4d8108 31296+
1facf9fc 31297+/* ---------------------------------------------------------------------- */
31298+
31299+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
31300+{
31301+ loff_t err;
31302+
2cbb1c4b 31303+ lockdep_off();
1facf9fc 31304+ err = vfs_llseek(file, offset, origin);
2cbb1c4b 31305+ lockdep_on();
1facf9fc 31306+ return err;
31307+}
31308+
31309+/* ---------------------------------------------------------------------- */
31310+
4a4d8108
AM
31311+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
31312+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
523b37e3
AM
31313+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
31314+ struct inode **delegated_inode);
31315+int vfsub_notify_change(struct path *path, struct iattr *ia,
31316+ struct inode **delegated_inode);
31317+int vfsub_unlink(struct inode *dir, struct path *path,
31318+ struct inode **delegated_inode, int force);
4a4d8108 31319+
c1595e42
JR
31320+/* ---------------------------------------------------------------------- */
31321+
31322+static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
31323+ const void *value, size_t size, int flags)
31324+{
31325+ int err;
31326+
31327+ lockdep_off();
31328+ err = vfs_setxattr(dentry, name, value, size, flags);
31329+ lockdep_on();
31330+
31331+ return err;
31332+}
31333+
31334+static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
31335+{
31336+ int err;
31337+
31338+ lockdep_off();
31339+ err = vfs_removexattr(dentry, name);
31340+ lockdep_on();
31341+
31342+ return err;
31343+}
31344+
1facf9fc 31345+#endif /* __KERNEL__ */
31346+#endif /* __AUFS_VFSUB_H__ */
7f207e10
AM
31347diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
31348--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 31349+++ linux/fs/aufs/wbr_policy.c 2016-02-28 11:27:01.283912420 +0100
076b876e 31350@@ -0,0 +1,765 @@
1facf9fc 31351+/*
7f2ca4b1 31352+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 31353+ *
31354+ * This program, aufs is free software; you can redistribute it and/or modify
31355+ * it under the terms of the GNU General Public License as published by
31356+ * the Free Software Foundation; either version 2 of the License, or
31357+ * (at your option) any later version.
dece6358
AM
31358+ *
31359+ * This program is distributed in the hope that it will be useful,
31360+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31361+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31362+ * GNU General Public License for more details.
31363+ *
31364+ * You should have received a copy of the GNU General Public License
523b37e3 31365+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 31366+ */
31367+
31368+/*
31369+ * policies for selecting one among multiple writable branches
31370+ */
31371+
31372+#include <linux/statfs.h>
31373+#include "aufs.h"
31374+
31375+/* subset of cpup_attr() */
31376+static noinline_for_stack
31377+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
31378+{
31379+ int err, sbits;
31380+ struct iattr ia;
31381+ struct inode *h_isrc;
31382+
31383+ h_isrc = h_src->d_inode;
31384+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
31385+ ia.ia_mode = h_isrc->i_mode;
31386+ ia.ia_uid = h_isrc->i_uid;
31387+ ia.ia_gid = h_isrc->i_gid;
31388+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
86dc4139 31389+ au_cpup_attr_flags(h_path->dentry->d_inode, h_isrc->i_flags);
523b37e3
AM
31390+ /* no delegation since it is just created */
31391+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31392+
31393+ /* is this nfs only? */
31394+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
31395+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
31396+ ia.ia_mode = h_isrc->i_mode;
523b37e3 31397+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
1facf9fc 31398+ }
31399+
31400+ return err;
31401+}
31402+
31403+#define AuCpdown_PARENT_OPQ 1
31404+#define AuCpdown_WHED (1 << 1)
31405+#define AuCpdown_MADE_DIR (1 << 2)
31406+#define AuCpdown_DIROPQ (1 << 3)
31407+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
7f207e10
AM
31408+#define au_fset_cpdown(flags, name) \
31409+ do { (flags) |= AuCpdown_##name; } while (0)
31410+#define au_fclr_cpdown(flags, name) \
31411+ do { (flags) &= ~AuCpdown_##name; } while (0)
1facf9fc 31412+
1facf9fc 31413+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
c2b27bf2 31414+ unsigned int *flags)
1facf9fc 31415+{
31416+ int err;
31417+ struct dentry *opq_dentry;
31418+
31419+ opq_dentry = au_diropq_create(dentry, bdst);
31420+ err = PTR_ERR(opq_dentry);
31421+ if (IS_ERR(opq_dentry))
31422+ goto out;
31423+ dput(opq_dentry);
c2b27bf2 31424+ au_fset_cpdown(*flags, DIROPQ);
1facf9fc 31425+
4f0767ce 31426+out:
1facf9fc 31427+ return err;
31428+}
31429+
31430+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
31431+ struct inode *dir, aufs_bindex_t bdst)
31432+{
31433+ int err;
31434+ struct path h_path;
31435+ struct au_branch *br;
31436+
31437+ br = au_sbr(dentry->d_sb, bdst);
31438+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
31439+ err = PTR_ERR(h_path.dentry);
31440+ if (IS_ERR(h_path.dentry))
31441+ goto out;
31442+
31443+ err = 0;
31444+ if (h_path.dentry->d_inode) {
86dc4139 31445+ h_path.mnt = au_br_mnt(br);
1facf9fc 31446+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
31447+ dentry);
31448+ }
31449+ dput(h_path.dentry);
31450+
4f0767ce 31451+out:
1facf9fc 31452+ return err;
31453+}
31454+
31455+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
86dc4139 31456+ struct au_pin *pin,
1facf9fc 31457+ struct dentry *h_parent, void *arg)
31458+{
31459+ int err, rerr;
4a4d8108 31460+ aufs_bindex_t bopq, bstart;
1facf9fc 31461+ struct path h_path;
31462+ struct dentry *parent;
31463+ struct inode *h_dir, *h_inode, *inode, *dir;
c2b27bf2 31464+ unsigned int *flags = arg;
1facf9fc 31465+
31466+ bstart = au_dbstart(dentry);
31467+ /* dentry is di-locked */
31468+ parent = dget_parent(dentry);
31469+ dir = parent->d_inode;
31470+ h_dir = h_parent->d_inode;
31471+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
31472+ IMustLock(h_dir);
31473+
86dc4139 31474+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
1facf9fc 31475+ if (unlikely(err < 0))
31476+ goto out;
31477+ h_path.dentry = au_h_dptr(dentry, bdst);
31478+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
31479+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path,
31480+ S_IRWXU | S_IRUGO | S_IXUGO);
31481+ if (unlikely(err))
31482+ goto out_put;
c2b27bf2 31483+ au_fset_cpdown(*flags, MADE_DIR);
1facf9fc 31484+
1facf9fc 31485+ bopq = au_dbdiropq(dentry);
c2b27bf2
AM
31486+ au_fclr_cpdown(*flags, WHED);
31487+ au_fclr_cpdown(*flags, DIROPQ);
1facf9fc 31488+ if (au_dbwh(dentry) == bdst)
c2b27bf2
AM
31489+ au_fset_cpdown(*flags, WHED);
31490+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
31491+ au_fset_cpdown(*flags, PARENT_OPQ);
1facf9fc 31492+ h_inode = h_path.dentry->d_inode;
31493+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
c2b27bf2
AM
31494+ if (au_ftest_cpdown(*flags, WHED)) {
31495+ err = au_cpdown_dir_opq(dentry, bdst, flags);
1facf9fc 31496+ if (unlikely(err)) {
31497+ mutex_unlock(&h_inode->i_mutex);
31498+ goto out_dir;
31499+ }
31500+ }
31501+
31502+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
31503+ mutex_unlock(&h_inode->i_mutex);
31504+ if (unlikely(err))
31505+ goto out_opq;
31506+
c2b27bf2 31507+ if (au_ftest_cpdown(*flags, WHED)) {
1facf9fc 31508+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
31509+ if (unlikely(err))
31510+ goto out_opq;
31511+ }
31512+
31513+ inode = dentry->d_inode;
31514+ if (au_ibend(inode) < bdst)
31515+ au_set_ibend(inode, bdst);
31516+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
31517+ au_hi_flags(inode, /*isdir*/1));
076b876e 31518+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
1facf9fc 31519+ goto out; /* success */
31520+
31521+ /* revert */
4f0767ce 31522+out_opq:
c2b27bf2 31523+ if (au_ftest_cpdown(*flags, DIROPQ)) {
1facf9fc 31524+ mutex_lock_nested(&h_inode->i_mutex, AuLsc_I_CHILD);
31525+ rerr = au_diropq_remove(dentry, bdst);
31526+ mutex_unlock(&h_inode->i_mutex);
31527+ if (unlikely(rerr)) {
523b37e3
AM
31528+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
31529+ dentry, bdst, rerr);
1facf9fc 31530+ err = -EIO;
31531+ goto out;
31532+ }
31533+ }
4f0767ce 31534+out_dir:
c2b27bf2 31535+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
1facf9fc 31536+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
31537+ if (unlikely(rerr)) {
523b37e3
AM
31538+ AuIOErr("failed removing %pd b%d (%d)\n",
31539+ dentry, bdst, rerr);
1facf9fc 31540+ err = -EIO;
31541+ }
31542+ }
4f0767ce 31543+out_put:
1facf9fc 31544+ au_set_h_dptr(dentry, bdst, NULL);
31545+ if (au_dbend(dentry) == bdst)
31546+ au_update_dbend(dentry);
4f0767ce 31547+out:
1facf9fc 31548+ dput(parent);
31549+ return err;
31550+}
31551+
31552+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
31553+{
31554+ int err;
c2b27bf2 31555+ unsigned int flags;
1facf9fc 31556+
c2b27bf2
AM
31557+ flags = 0;
31558+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
1facf9fc 31559+
31560+ return err;
31561+}
31562+
31563+/* ---------------------------------------------------------------------- */
31564+
31565+/* policies for create */
31566+
c2b27bf2 31567+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
4a4d8108
AM
31568+{
31569+ int err, i, j, ndentry;
31570+ aufs_bindex_t bopq;
31571+ struct au_dcsub_pages dpages;
31572+ struct au_dpage *dpage;
31573+ struct dentry **dentries, *parent, *d;
31574+
31575+ err = au_dpages_init(&dpages, GFP_NOFS);
31576+ if (unlikely(err))
31577+ goto out;
31578+ parent = dget_parent(dentry);
027c5e7a 31579+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
4a4d8108
AM
31580+ if (unlikely(err))
31581+ goto out_free;
31582+
31583+ err = bindex;
31584+ for (i = 0; i < dpages.ndpage; i++) {
31585+ dpage = dpages.dpages + i;
31586+ dentries = dpage->dentries;
31587+ ndentry = dpage->ndentry;
31588+ for (j = 0; j < ndentry; j++) {
31589+ d = dentries[j];
31590+ di_read_lock_parent2(d, !AuLock_IR);
31591+ bopq = au_dbdiropq(d);
31592+ di_read_unlock(d, !AuLock_IR);
31593+ if (bopq >= 0 && bopq < err)
31594+ err = bopq;
31595+ }
31596+ }
31597+
31598+out_free:
31599+ dput(parent);
31600+ au_dpages_free(&dpages);
31601+out:
31602+ return err;
31603+}
31604+
1facf9fc 31605+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
31606+{
31607+ for (; bindex >= 0; bindex--)
31608+ if (!au_br_rdonly(au_sbr(sb, bindex)))
31609+ return bindex;
31610+ return -EROFS;
31611+}
31612+
31613+/* top down parent */
392086de
AM
31614+static int au_wbr_create_tdp(struct dentry *dentry,
31615+ unsigned int flags __maybe_unused)
1facf9fc 31616+{
31617+ int err;
31618+ aufs_bindex_t bstart, bindex;
31619+ struct super_block *sb;
31620+ struct dentry *parent, *h_parent;
31621+
31622+ sb = dentry->d_sb;
31623+ bstart = au_dbstart(dentry);
31624+ err = bstart;
31625+ if (!au_br_rdonly(au_sbr(sb, bstart)))
31626+ goto out;
31627+
31628+ err = -EROFS;
31629+ parent = dget_parent(dentry);
31630+ for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
31631+ h_parent = au_h_dptr(parent, bindex);
31632+ if (!h_parent || !h_parent->d_inode)
31633+ continue;
31634+
31635+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
31636+ err = bindex;
31637+ break;
31638+ }
31639+ }
31640+ dput(parent);
31641+
31642+ /* bottom up here */
4a4d8108 31643+ if (unlikely(err < 0)) {
1facf9fc 31644+ err = au_wbr_bu(sb, bstart - 1);
4a4d8108
AM
31645+ if (err >= 0)
31646+ err = au_wbr_nonopq(dentry, err);
31647+ }
1facf9fc 31648+
4f0767ce 31649+out:
1facf9fc 31650+ AuDbg("b%d\n", err);
31651+ return err;
31652+}
31653+
31654+/* ---------------------------------------------------------------------- */
31655+
31656+/* an exception for the policy other than tdp */
31657+static int au_wbr_create_exp(struct dentry *dentry)
31658+{
31659+ int err;
31660+ aufs_bindex_t bwh, bdiropq;
31661+ struct dentry *parent;
31662+
31663+ err = -1;
31664+ bwh = au_dbwh(dentry);
31665+ parent = dget_parent(dentry);
31666+ bdiropq = au_dbdiropq(parent);
31667+ if (bwh >= 0) {
31668+ if (bdiropq >= 0)
31669+ err = min(bdiropq, bwh);
31670+ else
31671+ err = bwh;
31672+ AuDbg("%d\n", err);
31673+ } else if (bdiropq >= 0) {
31674+ err = bdiropq;
31675+ AuDbg("%d\n", err);
31676+ }
31677+ dput(parent);
31678+
4a4d8108
AM
31679+ if (err >= 0)
31680+ err = au_wbr_nonopq(dentry, err);
31681+
1facf9fc 31682+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
31683+ err = -1;
31684+
31685+ AuDbg("%d\n", err);
31686+ return err;
31687+}
31688+
31689+/* ---------------------------------------------------------------------- */
31690+
31691+/* round robin */
31692+static int au_wbr_create_init_rr(struct super_block *sb)
31693+{
31694+ int err;
31695+
31696+ err = au_wbr_bu(sb, au_sbend(sb));
31697+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
dece6358 31698+ /* smp_mb(); */
1facf9fc 31699+
31700+ AuDbg("b%d\n", err);
31701+ return err;
31702+}
31703+
392086de 31704+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
1facf9fc 31705+{
31706+ int err, nbr;
31707+ unsigned int u;
31708+ aufs_bindex_t bindex, bend;
31709+ struct super_block *sb;
31710+ atomic_t *next;
31711+
31712+ err = au_wbr_create_exp(dentry);
31713+ if (err >= 0)
31714+ goto out;
31715+
31716+ sb = dentry->d_sb;
31717+ next = &au_sbi(sb)->si_wbr_rr_next;
31718+ bend = au_sbend(sb);
31719+ nbr = bend + 1;
31720+ for (bindex = 0; bindex <= bend; bindex++) {
392086de 31721+ if (!au_ftest_wbr(flags, DIR)) {
1facf9fc 31722+ err = atomic_dec_return(next) + 1;
31723+ /* modulo for 0 is meaningless */
31724+ if (unlikely(!err))
31725+ err = atomic_dec_return(next) + 1;
31726+ } else
31727+ err = atomic_read(next);
31728+ AuDbg("%d\n", err);
31729+ u = err;
31730+ err = u % nbr;
31731+ AuDbg("%d\n", err);
31732+ if (!au_br_rdonly(au_sbr(sb, err)))
31733+ break;
31734+ err = -EROFS;
31735+ }
31736+
4a4d8108
AM
31737+ if (err >= 0)
31738+ err = au_wbr_nonopq(dentry, err);
31739+
4f0767ce 31740+out:
1facf9fc 31741+ AuDbg("%d\n", err);
31742+ return err;
31743+}
31744+
31745+/* ---------------------------------------------------------------------- */
31746+
31747+/* most free space */
392086de 31748+static void au_mfs(struct dentry *dentry, struct dentry *parent)
1facf9fc 31749+{
31750+ struct super_block *sb;
31751+ struct au_branch *br;
31752+ struct au_wbr_mfs *mfs;
392086de 31753+ struct dentry *h_parent;
1facf9fc 31754+ aufs_bindex_t bindex, bend;
31755+ int err;
31756+ unsigned long long b, bavail;
7f207e10 31757+ struct path h_path;
1facf9fc 31758+ /* reduce the stack usage */
31759+ struct kstatfs *st;
31760+
31761+ st = kmalloc(sizeof(*st), GFP_NOFS);
31762+ if (unlikely(!st)) {
31763+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
31764+ return;
31765+ }
31766+
31767+ bavail = 0;
31768+ sb = dentry->d_sb;
31769+ mfs = &au_sbi(sb)->si_wbr_mfs;
dece6358 31770+ MtxMustLock(&mfs->mfs_lock);
1facf9fc 31771+ mfs->mfs_bindex = -EROFS;
31772+ mfs->mfsrr_bytes = 0;
392086de
AM
31773+ if (!parent) {
31774+ bindex = 0;
31775+ bend = au_sbend(sb);
31776+ } else {
31777+ bindex = au_dbstart(parent);
31778+ bend = au_dbtaildir(parent);
31779+ }
31780+
31781+ for (; bindex <= bend; bindex++) {
31782+ if (parent) {
31783+ h_parent = au_h_dptr(parent, bindex);
31784+ if (!h_parent || !h_parent->d_inode)
31785+ continue;
31786+ }
1facf9fc 31787+ br = au_sbr(sb, bindex);
31788+ if (au_br_rdonly(br))
31789+ continue;
31790+
31791+ /* sb->s_root for NFS is unreliable */
86dc4139 31792+ h_path.mnt = au_br_mnt(br);
7f207e10
AM
31793+ h_path.dentry = h_path.mnt->mnt_root;
31794+ err = vfs_statfs(&h_path, st);
1facf9fc 31795+ if (unlikely(err)) {
31796+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
31797+ continue;
31798+ }
31799+
31800+ /* when the available size is equal, select the lower one */
31801+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
31802+ || sizeof(b) < sizeof(st->f_bsize));
31803+ b = st->f_bavail * st->f_bsize;
31804+ br->br_wbr->wbr_bytes = b;
31805+ if (b >= bavail) {
31806+ bavail = b;
31807+ mfs->mfs_bindex = bindex;
31808+ mfs->mfs_jiffy = jiffies;
31809+ }
31810+ }
31811+
31812+ mfs->mfsrr_bytes = bavail;
31813+ AuDbg("b%d\n", mfs->mfs_bindex);
31814+ kfree(st);
31815+}
31816+
392086de 31817+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31818+{
31819+ int err;
392086de 31820+ struct dentry *parent;
1facf9fc 31821+ struct super_block *sb;
31822+ struct au_wbr_mfs *mfs;
31823+
31824+ err = au_wbr_create_exp(dentry);
31825+ if (err >= 0)
31826+ goto out;
31827+
31828+ sb = dentry->d_sb;
392086de
AM
31829+ parent = NULL;
31830+ if (au_ftest_wbr(flags, PARENT))
31831+ parent = dget_parent(dentry);
1facf9fc 31832+ mfs = &au_sbi(sb)->si_wbr_mfs;
31833+ mutex_lock(&mfs->mfs_lock);
31834+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
31835+ || mfs->mfs_bindex < 0
31836+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
392086de 31837+ au_mfs(dentry, parent);
1facf9fc 31838+ mutex_unlock(&mfs->mfs_lock);
31839+ err = mfs->mfs_bindex;
392086de 31840+ dput(parent);
1facf9fc 31841+
4a4d8108
AM
31842+ if (err >= 0)
31843+ err = au_wbr_nonopq(dentry, err);
31844+
4f0767ce 31845+out:
1facf9fc 31846+ AuDbg("b%d\n", err);
31847+ return err;
31848+}
31849+
31850+static int au_wbr_create_init_mfs(struct super_block *sb)
31851+{
31852+ struct au_wbr_mfs *mfs;
31853+
31854+ mfs = &au_sbi(sb)->si_wbr_mfs;
31855+ mutex_init(&mfs->mfs_lock);
31856+ mfs->mfs_jiffy = 0;
31857+ mfs->mfs_bindex = -EROFS;
31858+
31859+ return 0;
31860+}
31861+
31862+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
31863+{
31864+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
31865+ return 0;
31866+}
31867+
31868+/* ---------------------------------------------------------------------- */
31869+
31870+/* most free space and then round robin */
392086de 31871+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
1facf9fc 31872+{
31873+ int err;
31874+ struct au_wbr_mfs *mfs;
31875+
392086de 31876+ err = au_wbr_create_mfs(dentry, flags);
1facf9fc 31877+ if (err >= 0) {
31878+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
dece6358 31879+ mutex_lock(&mfs->mfs_lock);
1facf9fc 31880+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
392086de 31881+ err = au_wbr_create_rr(dentry, flags);
dece6358 31882+ mutex_unlock(&mfs->mfs_lock);
1facf9fc 31883+ }
31884+
31885+ AuDbg("b%d\n", err);
31886+ return err;
31887+}
31888+
31889+static int au_wbr_create_init_mfsrr(struct super_block *sb)
31890+{
31891+ int err;
31892+
31893+ au_wbr_create_init_mfs(sb); /* ignore */
31894+ err = au_wbr_create_init_rr(sb);
31895+
31896+ return err;
31897+}
31898+
31899+/* ---------------------------------------------------------------------- */
31900+
31901+/* top down parent and most free space */
392086de 31902+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
1facf9fc 31903+{
31904+ int err, e2;
31905+ unsigned long long b;
31906+ aufs_bindex_t bindex, bstart, bend;
31907+ struct super_block *sb;
31908+ struct dentry *parent, *h_parent;
31909+ struct au_branch *br;
31910+
392086de 31911+ err = au_wbr_create_tdp(dentry, flags);
1facf9fc 31912+ if (unlikely(err < 0))
31913+ goto out;
31914+ parent = dget_parent(dentry);
31915+ bstart = au_dbstart(parent);
31916+ bend = au_dbtaildir(parent);
31917+ if (bstart == bend)
31918+ goto out_parent; /* success */
31919+
392086de 31920+ e2 = au_wbr_create_mfs(dentry, flags);
1facf9fc 31921+ if (e2 < 0)
31922+ goto out_parent; /* success */
31923+
31924+ /* when the available size is equal, select upper one */
31925+ sb = dentry->d_sb;
31926+ br = au_sbr(sb, err);
31927+ b = br->br_wbr->wbr_bytes;
31928+ AuDbg("b%d, %llu\n", err, b);
31929+
31930+ for (bindex = bstart; bindex <= bend; bindex++) {
31931+ h_parent = au_h_dptr(parent, bindex);
31932+ if (!h_parent || !h_parent->d_inode)
31933+ continue;
31934+
31935+ br = au_sbr(sb, bindex);
31936+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
31937+ b = br->br_wbr->wbr_bytes;
31938+ err = bindex;
31939+ AuDbg("b%d, %llu\n", err, b);
31940+ }
31941+ }
31942+
4a4d8108
AM
31943+ if (err >= 0)
31944+ err = au_wbr_nonopq(dentry, err);
31945+
4f0767ce 31946+out_parent:
1facf9fc 31947+ dput(parent);
4f0767ce 31948+out:
1facf9fc 31949+ AuDbg("b%d\n", err);
31950+ return err;
31951+}
31952+
31953+/* ---------------------------------------------------------------------- */
31954+
392086de
AM
31955+/*
31956+ * - top down parent
31957+ * - most free space with parent
31958+ * - most free space round-robin regardless parent
31959+ */
31960+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
31961+{
31962+ int err;
31963+ unsigned long long watermark;
31964+ struct super_block *sb;
31965+ struct au_branch *br;
31966+ struct au_wbr_mfs *mfs;
31967+
31968+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
31969+ if (unlikely(err < 0))
31970+ goto out;
31971+
31972+ sb = dentry->d_sb;
31973+ br = au_sbr(sb, err);
31974+ mfs = &au_sbi(sb)->si_wbr_mfs;
31975+ mutex_lock(&mfs->mfs_lock);
31976+ watermark = mfs->mfsrr_watermark;
31977+ mutex_unlock(&mfs->mfs_lock);
31978+ if (br->br_wbr->wbr_bytes < watermark)
31979+ /* regardless the parent dir */
31980+ err = au_wbr_create_mfsrr(dentry, flags);
31981+
31982+out:
31983+ AuDbg("b%d\n", err);
31984+ return err;
31985+}
31986+
31987+/* ---------------------------------------------------------------------- */
31988+
1facf9fc 31989+/* policies for copyup */
31990+
31991+/* top down parent */
31992+static int au_wbr_copyup_tdp(struct dentry *dentry)
31993+{
392086de 31994+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
1facf9fc 31995+}
31996+
31997+/* bottom up parent */
31998+static int au_wbr_copyup_bup(struct dentry *dentry)
31999+{
32000+ int err;
32001+ aufs_bindex_t bindex, bstart;
32002+ struct dentry *parent, *h_parent;
32003+ struct super_block *sb;
32004+
32005+ err = -EROFS;
32006+ sb = dentry->d_sb;
32007+ parent = dget_parent(dentry);
32008+ bstart = au_dbstart(parent);
32009+ for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
32010+ h_parent = au_h_dptr(parent, bindex);
32011+ if (!h_parent || !h_parent->d_inode)
32012+ continue;
32013+
32014+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
32015+ err = bindex;
32016+ break;
32017+ }
32018+ }
32019+ dput(parent);
32020+
32021+ /* bottom up here */
32022+ if (unlikely(err < 0))
32023+ err = au_wbr_bu(sb, bstart - 1);
32024+
32025+ AuDbg("b%d\n", err);
32026+ return err;
32027+}
32028+
32029+/* bottom up */
076b876e 32030+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
1facf9fc 32031+{
32032+ int err;
32033+
4a4d8108
AM
32034+ err = au_wbr_bu(dentry->d_sb, bstart);
32035+ AuDbg("b%d\n", err);
32036+ if (err > bstart)
32037+ err = au_wbr_nonopq(dentry, err);
1facf9fc 32038+
32039+ AuDbg("b%d\n", err);
32040+ return err;
32041+}
32042+
076b876e
AM
32043+static int au_wbr_copyup_bu(struct dentry *dentry)
32044+{
32045+ int err;
32046+ aufs_bindex_t bstart;
32047+
32048+ bstart = au_dbstart(dentry);
32049+ err = au_wbr_do_copyup_bu(dentry, bstart);
32050+ return err;
32051+}
32052+
1facf9fc 32053+/* ---------------------------------------------------------------------- */
32054+
32055+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
32056+ [AuWbrCopyup_TDP] = {
32057+ .copyup = au_wbr_copyup_tdp
32058+ },
32059+ [AuWbrCopyup_BUP] = {
32060+ .copyup = au_wbr_copyup_bup
32061+ },
32062+ [AuWbrCopyup_BU] = {
32063+ .copyup = au_wbr_copyup_bu
32064+ }
32065+};
32066+
32067+struct au_wbr_create_operations au_wbr_create_ops[] = {
32068+ [AuWbrCreate_TDP] = {
32069+ .create = au_wbr_create_tdp
32070+ },
32071+ [AuWbrCreate_RR] = {
32072+ .create = au_wbr_create_rr,
32073+ .init = au_wbr_create_init_rr
32074+ },
32075+ [AuWbrCreate_MFS] = {
32076+ .create = au_wbr_create_mfs,
32077+ .init = au_wbr_create_init_mfs,
32078+ .fin = au_wbr_create_fin_mfs
32079+ },
32080+ [AuWbrCreate_MFSV] = {
32081+ .create = au_wbr_create_mfs,
32082+ .init = au_wbr_create_init_mfs,
32083+ .fin = au_wbr_create_fin_mfs
32084+ },
32085+ [AuWbrCreate_MFSRR] = {
32086+ .create = au_wbr_create_mfsrr,
32087+ .init = au_wbr_create_init_mfsrr,
32088+ .fin = au_wbr_create_fin_mfs
32089+ },
32090+ [AuWbrCreate_MFSRRV] = {
32091+ .create = au_wbr_create_mfsrr,
32092+ .init = au_wbr_create_init_mfsrr,
32093+ .fin = au_wbr_create_fin_mfs
32094+ },
32095+ [AuWbrCreate_PMFS] = {
32096+ .create = au_wbr_create_pmfs,
32097+ .init = au_wbr_create_init_mfs,
32098+ .fin = au_wbr_create_fin_mfs
32099+ },
32100+ [AuWbrCreate_PMFSV] = {
32101+ .create = au_wbr_create_pmfs,
32102+ .init = au_wbr_create_init_mfs,
32103+ .fin = au_wbr_create_fin_mfs
392086de
AM
32104+ },
32105+ [AuWbrCreate_PMFSRR] = {
32106+ .create = au_wbr_create_pmfsrr,
32107+ .init = au_wbr_create_init_mfsrr,
32108+ .fin = au_wbr_create_fin_mfs
32109+ },
32110+ [AuWbrCreate_PMFSRRV] = {
32111+ .create = au_wbr_create_pmfsrr,
32112+ .init = au_wbr_create_init_mfsrr,
32113+ .fin = au_wbr_create_fin_mfs
1facf9fc 32114+ }
32115+};
7f207e10
AM
32116diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
32117--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
32118+++ linux/fs/aufs/whout.c 2016-02-28 11:27:01.283912420 +0100
32119@@ -0,0 +1,1061 @@
1facf9fc 32120+/*
7f2ca4b1 32121+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 32122+ *
32123+ * This program, aufs is free software; you can redistribute it and/or modify
32124+ * it under the terms of the GNU General Public License as published by
32125+ * the Free Software Foundation; either version 2 of the License, or
32126+ * (at your option) any later version.
dece6358
AM
32127+ *
32128+ * This program is distributed in the hope that it will be useful,
32129+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32130+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32131+ * GNU General Public License for more details.
32132+ *
32133+ * You should have received a copy of the GNU General Public License
523b37e3 32134+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 32135+ */
32136+
32137+/*
32138+ * whiteout for logical deletion and opaque directory
32139+ */
32140+
1facf9fc 32141+#include "aufs.h"
32142+
32143+#define WH_MASK S_IRUGO
32144+
32145+/*
32146+ * If a directory contains this file, then it is opaque. We start with the
32147+ * .wh. flag so that it is blocked by lookup.
32148+ */
0c3ec466
AM
32149+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
32150+ sizeof(AUFS_WH_DIROPQ) - 1);
1facf9fc 32151+
32152+/*
32153+ * generate whiteout name, which is NOT terminated by NULL.
32154+ * @name: original d_name.name
32155+ * @len: original d_name.len
32156+ * @wh: whiteout qstr
32157+ * returns zero when succeeds, otherwise error.
32158+ * succeeded value as wh->name should be freed by kfree().
32159+ */
32160+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
32161+{
32162+ char *p;
32163+
32164+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
32165+ return -ENAMETOOLONG;
32166+
32167+ wh->len = name->len + AUFS_WH_PFX_LEN;
32168+ p = kmalloc(wh->len, GFP_NOFS);
32169+ wh->name = p;
32170+ if (p) {
32171+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32172+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
32173+ /* smp_mb(); */
32174+ return 0;
32175+ }
32176+ return -ENOMEM;
32177+}
32178+
32179+/* ---------------------------------------------------------------------- */
32180+
32181+/*
32182+ * test if the @wh_name exists under @h_parent.
32183+ * @try_sio specifies the necessary of super-io.
32184+ */
076b876e 32185+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio)
1facf9fc 32186+{
32187+ int err;
32188+ struct dentry *wh_dentry;
1facf9fc 32189+
1facf9fc 32190+ if (!try_sio)
b4510431 32191+ wh_dentry = vfsub_lkup_one(wh_name, h_parent);
1facf9fc 32192+ else
076b876e 32193+ wh_dentry = au_sio_lkup_one(wh_name, h_parent);
1facf9fc 32194+ err = PTR_ERR(wh_dentry);
7f2ca4b1
JR
32195+ if (IS_ERR(wh_dentry)) {
32196+ if (err == -ENAMETOOLONG)
32197+ err = 0;
1facf9fc 32198+ goto out;
7f2ca4b1 32199+ }
1facf9fc 32200+
32201+ err = 0;
32202+ if (!wh_dentry->d_inode)
32203+ goto out_wh; /* success */
32204+
32205+ err = 1;
32206+ if (S_ISREG(wh_dentry->d_inode->i_mode))
32207+ goto out_wh; /* success */
32208+
32209+ err = -EIO;
523b37e3
AM
32210+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
32211+ wh_dentry, wh_dentry->d_inode->i_mode);
1facf9fc 32212+
4f0767ce 32213+out_wh:
1facf9fc 32214+ dput(wh_dentry);
4f0767ce 32215+out:
1facf9fc 32216+ return err;
32217+}
32218+
32219+/*
32220+ * test if the @h_dentry sets opaque or not.
32221+ */
076b876e 32222+int au_diropq_test(struct dentry *h_dentry)
1facf9fc 32223+{
32224+ int err;
32225+ struct inode *h_dir;
32226+
32227+ h_dir = h_dentry->d_inode;
076b876e 32228+ err = au_wh_test(h_dentry, &diropq_name,
1facf9fc 32229+ au_test_h_perm_sio(h_dir, MAY_EXEC));
32230+ return err;
32231+}
32232+
32233+/*
32234+ * returns a negative dentry whose name is unique and temporary.
32235+ */
32236+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
32237+ struct qstr *prefix)
32238+{
1facf9fc 32239+ struct dentry *dentry;
32240+ int i;
027c5e7a 32241+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
4a4d8108 32242+ *name, *p;
027c5e7a 32243+ /* strict atomic_t is unnecessary here */
1facf9fc 32244+ static unsigned short cnt;
32245+ struct qstr qs;
32246+
4a4d8108
AM
32247+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
32248+
1facf9fc 32249+ name = defname;
027c5e7a
AM
32250+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
32251+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
1facf9fc 32252+ dentry = ERR_PTR(-ENAMETOOLONG);
4a4d8108 32253+ if (unlikely(qs.len > NAME_MAX))
1facf9fc 32254+ goto out;
32255+ dentry = ERR_PTR(-ENOMEM);
32256+ name = kmalloc(qs.len + 1, GFP_NOFS);
32257+ if (unlikely(!name))
32258+ goto out;
32259+ }
32260+
32261+ /* doubly whiteout-ed */
32262+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
32263+ p = name + AUFS_WH_PFX_LEN * 2;
32264+ memcpy(p, prefix->name, prefix->len);
32265+ p += prefix->len;
32266+ *p++ = '.';
4a4d8108 32267+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
1facf9fc 32268+
32269+ qs.name = name;
32270+ for (i = 0; i < 3; i++) {
b752ccd1 32271+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
076b876e 32272+ dentry = au_sio_lkup_one(&qs, h_parent);
1facf9fc 32273+ if (IS_ERR(dentry) || !dentry->d_inode)
32274+ goto out_name;
32275+ dput(dentry);
32276+ }
0c3ec466 32277+ /* pr_warn("could not get random name\n"); */
1facf9fc 32278+ dentry = ERR_PTR(-EEXIST);
32279+ AuDbg("%.*s\n", AuLNPair(&qs));
32280+ BUG();
32281+
4f0767ce 32282+out_name:
1facf9fc 32283+ if (name != defname)
32284+ kfree(name);
4f0767ce 32285+out:
4a4d8108 32286+ AuTraceErrPtr(dentry);
1facf9fc 32287+ return dentry;
1facf9fc 32288+}
32289+
32290+/*
32291+ * rename the @h_dentry on @br to the whiteouted temporary name.
32292+ */
32293+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
32294+{
32295+ int err;
32296+ struct path h_path = {
86dc4139 32297+ .mnt = au_br_mnt(br)
1facf9fc 32298+ };
523b37e3 32299+ struct inode *h_dir, *delegated;
1facf9fc 32300+ struct dentry *h_parent;
32301+
32302+ h_parent = h_dentry->d_parent; /* dir inode is locked */
32303+ h_dir = h_parent->d_inode;
32304+ IMustLock(h_dir);
32305+
32306+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
32307+ err = PTR_ERR(h_path.dentry);
32308+ if (IS_ERR(h_path.dentry))
32309+ goto out;
32310+
32311+ /* under the same dir, no need to lock_rename() */
523b37e3
AM
32312+ delegated = NULL;
32313+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated);
1facf9fc 32314+ AuTraceErr(err);
523b37e3
AM
32315+ if (unlikely(err == -EWOULDBLOCK)) {
32316+ pr_warn("cannot retry for NFSv4 delegation"
32317+ " for an internal rename\n");
32318+ iput(delegated);
32319+ }
1facf9fc 32320+ dput(h_path.dentry);
32321+
4f0767ce 32322+out:
4a4d8108 32323+ AuTraceErr(err);
1facf9fc 32324+ return err;
32325+}
32326+
32327+/* ---------------------------------------------------------------------- */
32328+/*
32329+ * functions for removing a whiteout
32330+ */
32331+
32332+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
32333+{
523b37e3
AM
32334+ int err, force;
32335+ struct inode *delegated;
1facf9fc 32336+
32337+ /*
32338+ * forces superio when the dir has a sticky bit.
32339+ * this may be a violation of unix fs semantics.
32340+ */
32341+ force = (h_dir->i_mode & S_ISVTX)
0c3ec466 32342+ && !uid_eq(current_fsuid(), h_path->dentry->d_inode->i_uid);
523b37e3
AM
32343+ delegated = NULL;
32344+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
32345+ if (unlikely(err == -EWOULDBLOCK)) {
32346+ pr_warn("cannot retry for NFSv4 delegation"
32347+ " for an internal unlink\n");
32348+ iput(delegated);
32349+ }
32350+ return err;
1facf9fc 32351+}
32352+
32353+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
32354+ struct dentry *dentry)
32355+{
32356+ int err;
32357+
32358+ err = do_unlink_wh(h_dir, h_path);
32359+ if (!err && dentry)
32360+ au_set_dbwh(dentry, -1);
32361+
32362+ return err;
32363+}
32364+
32365+static int unlink_wh_name(struct dentry *h_parent, struct qstr *wh,
32366+ struct au_branch *br)
32367+{
32368+ int err;
32369+ struct path h_path = {
86dc4139 32370+ .mnt = au_br_mnt(br)
1facf9fc 32371+ };
32372+
32373+ err = 0;
b4510431 32374+ h_path.dentry = vfsub_lkup_one(wh, h_parent);
1facf9fc 32375+ if (IS_ERR(h_path.dentry))
32376+ err = PTR_ERR(h_path.dentry);
32377+ else {
32378+ if (h_path.dentry->d_inode
32379+ && S_ISREG(h_path.dentry->d_inode->i_mode))
32380+ err = do_unlink_wh(h_parent->d_inode, &h_path);
32381+ dput(h_path.dentry);
32382+ }
32383+
32384+ return err;
32385+}
32386+
32387+/* ---------------------------------------------------------------------- */
32388+/*
32389+ * initialize/clean whiteout for a branch
32390+ */
32391+
32392+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
32393+ const int isdir)
32394+{
32395+ int err;
523b37e3 32396+ struct inode *delegated;
1facf9fc 32397+
32398+ if (!whpath->dentry->d_inode)
32399+ return;
32400+
86dc4139
AM
32401+ if (isdir)
32402+ err = vfsub_rmdir(h_dir, whpath);
523b37e3
AM
32403+ else {
32404+ delegated = NULL;
32405+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
32406+ if (unlikely(err == -EWOULDBLOCK)) {
32407+ pr_warn("cannot retry for NFSv4 delegation"
32408+ " for an internal unlink\n");
32409+ iput(delegated);
32410+ }
32411+ }
1facf9fc 32412+ if (unlikely(err))
523b37e3
AM
32413+ pr_warn("failed removing %pd (%d), ignored.\n",
32414+ whpath->dentry, err);
1facf9fc 32415+}
32416+
32417+static int test_linkable(struct dentry *h_root)
32418+{
32419+ struct inode *h_dir = h_root->d_inode;
32420+
32421+ if (h_dir->i_op->link)
32422+ return 0;
32423+
523b37e3
AM
32424+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
32425+ h_root, au_sbtype(h_root->d_sb));
1facf9fc 32426+ return -ENOSYS;
32427+}
32428+
32429+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
32430+static int au_whdir(struct inode *h_dir, struct path *path)
32431+{
32432+ int err;
32433+
32434+ err = -EEXIST;
32435+ if (!path->dentry->d_inode) {
32436+ int mode = S_IRWXU;
32437+
32438+ if (au_test_nfs(path->dentry->d_sb))
32439+ mode |= S_IXUGO;
86dc4139 32440+ err = vfsub_mkdir(h_dir, path, mode);
7f2ca4b1 32441+ } else if (d_is_dir(path->dentry))
1facf9fc 32442+ err = 0;
32443+ else
523b37e3 32444+ pr_err("unknown %pd exists\n", path->dentry);
1facf9fc 32445+
32446+ return err;
32447+}
32448+
32449+struct au_wh_base {
32450+ const struct qstr *name;
32451+ struct dentry *dentry;
32452+};
32453+
32454+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
32455+ struct path *h_path)
32456+{
32457+ h_path->dentry = base[AuBrWh_BASE].dentry;
32458+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32459+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32460+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32461+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32462+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32463+}
32464+
32465+/*
32466+ * returns tri-state,
c1595e42 32467+ * minus: error, caller should print the message
1facf9fc 32468+ * zero: succuess
c1595e42 32469+ * plus: error, caller should NOT print the message
1facf9fc 32470+ */
32471+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
32472+ int do_plink, struct au_wh_base base[],
32473+ struct path *h_path)
32474+{
32475+ int err;
32476+ struct inode *h_dir;
32477+
32478+ h_dir = h_root->d_inode;
32479+ h_path->dentry = base[AuBrWh_BASE].dentry;
32480+ au_wh_clean(h_dir, h_path, /*isdir*/0);
32481+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32482+ if (do_plink) {
32483+ err = test_linkable(h_root);
32484+ if (unlikely(err)) {
32485+ err = 1;
32486+ goto out;
32487+ }
32488+
32489+ err = au_whdir(h_dir, h_path);
32490+ if (unlikely(err))
32491+ goto out;
32492+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32493+ } else
32494+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32495+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32496+ err = au_whdir(h_dir, h_path);
32497+ if (unlikely(err))
32498+ goto out;
32499+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32500+
4f0767ce 32501+out:
1facf9fc 32502+ return err;
32503+}
32504+
32505+/*
32506+ * for the moment, aufs supports the branch filesystem which does not support
32507+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
32508+ * copyup failed. finally, such filesystem will not be used as the writable
32509+ * branch.
32510+ *
32511+ * returns tri-state, see above.
32512+ */
32513+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
32514+ int do_plink, struct au_wh_base base[],
32515+ struct path *h_path)
32516+{
32517+ int err;
32518+ struct inode *h_dir;
32519+
1308ab2a 32520+ WbrWhMustWriteLock(wbr);
32521+
1facf9fc 32522+ err = test_linkable(h_root);
32523+ if (unlikely(err)) {
32524+ err = 1;
32525+ goto out;
32526+ }
32527+
32528+ /*
32529+ * todo: should this create be done in /sbin/mount.aufs helper?
32530+ */
32531+ err = -EEXIST;
32532+ h_dir = h_root->d_inode;
32533+ if (!base[AuBrWh_BASE].dentry->d_inode) {
86dc4139
AM
32534+ h_path->dentry = base[AuBrWh_BASE].dentry;
32535+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
1facf9fc 32536+ } else if (S_ISREG(base[AuBrWh_BASE].dentry->d_inode->i_mode))
32537+ err = 0;
32538+ else
523b37e3 32539+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
1facf9fc 32540+ if (unlikely(err))
32541+ goto out;
32542+
32543+ h_path->dentry = base[AuBrWh_PLINK].dentry;
32544+ if (do_plink) {
32545+ err = au_whdir(h_dir, h_path);
32546+ if (unlikely(err))
32547+ goto out;
32548+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
32549+ } else
32550+ au_wh_clean(h_dir, h_path, /*isdir*/1);
32551+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
32552+
32553+ h_path->dentry = base[AuBrWh_ORPH].dentry;
32554+ err = au_whdir(h_dir, h_path);
32555+ if (unlikely(err))
32556+ goto out;
32557+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
32558+
4f0767ce 32559+out:
1facf9fc 32560+ return err;
32561+}
32562+
32563+/*
32564+ * initialize the whiteout base file/dir for @br.
32565+ */
86dc4139 32566+int au_wh_init(struct au_branch *br, struct super_block *sb)
1facf9fc 32567+{
32568+ int err, i;
32569+ const unsigned char do_plink
32570+ = !!au_opt_test(au_mntflags(sb), PLINK);
1facf9fc 32571+ struct inode *h_dir;
86dc4139
AM
32572+ struct path path = br->br_path;
32573+ struct dentry *h_root = path.dentry;
1facf9fc 32574+ struct au_wbr *wbr = br->br_wbr;
32575+ static const struct qstr base_name[] = {
0c3ec466
AM
32576+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
32577+ sizeof(AUFS_BASE_NAME) - 1),
32578+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
32579+ sizeof(AUFS_PLINKDIR_NAME) - 1),
32580+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
32581+ sizeof(AUFS_ORPHDIR_NAME) - 1)
1facf9fc 32582+ };
32583+ struct au_wh_base base[] = {
32584+ [AuBrWh_BASE] = {
32585+ .name = base_name + AuBrWh_BASE,
32586+ .dentry = NULL
32587+ },
32588+ [AuBrWh_PLINK] = {
32589+ .name = base_name + AuBrWh_PLINK,
32590+ .dentry = NULL
32591+ },
32592+ [AuBrWh_ORPH] = {
32593+ .name = base_name + AuBrWh_ORPH,
32594+ .dentry = NULL
32595+ }
32596+ };
32597+
1308ab2a 32598+ if (wbr)
32599+ WbrWhMustWriteLock(wbr);
1facf9fc 32600+
1facf9fc 32601+ for (i = 0; i < AuBrWh_Last; i++) {
32602+ /* doubly whiteouted */
32603+ struct dentry *d;
32604+
32605+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
32606+ err = PTR_ERR(d);
32607+ if (IS_ERR(d))
32608+ goto out;
32609+
32610+ base[i].dentry = d;
32611+ AuDebugOn(wbr
32612+ && wbr->wbr_wh[i]
32613+ && wbr->wbr_wh[i] != base[i].dentry);
32614+ }
32615+
32616+ if (wbr)
32617+ for (i = 0; i < AuBrWh_Last; i++) {
32618+ dput(wbr->wbr_wh[i]);
32619+ wbr->wbr_wh[i] = NULL;
32620+ }
32621+
32622+ err = 0;
1e00d052 32623+ if (!au_br_writable(br->br_perm)) {
4a4d8108 32624+ h_dir = h_root->d_inode;
1facf9fc 32625+ au_wh_init_ro(h_dir, base, &path);
1e00d052 32626+ } else if (!au_br_wh_linkable(br->br_perm)) {
1facf9fc 32627+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
32628+ if (err > 0)
32629+ goto out;
32630+ else if (err)
32631+ goto out_err;
1e00d052 32632+ } else {
1facf9fc 32633+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
32634+ if (err > 0)
32635+ goto out;
32636+ else if (err)
32637+ goto out_err;
1facf9fc 32638+ }
32639+ goto out; /* success */
32640+
4f0767ce 32641+out_err:
523b37e3
AM
32642+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
32643+ err, h_root, au_sbtype(h_root->d_sb));
4f0767ce 32644+out:
1facf9fc 32645+ for (i = 0; i < AuBrWh_Last; i++)
32646+ dput(base[i].dentry);
32647+ return err;
32648+}
32649+
32650+/* ---------------------------------------------------------------------- */
32651+/*
32652+ * whiteouts are all hard-linked usually.
32653+ * when its link count reaches a ceiling, we create a new whiteout base
32654+ * asynchronously.
32655+ */
32656+
32657+struct reinit_br_wh {
32658+ struct super_block *sb;
32659+ struct au_branch *br;
32660+};
32661+
32662+static void reinit_br_wh(void *arg)
32663+{
32664+ int err;
32665+ aufs_bindex_t bindex;
32666+ struct path h_path;
32667+ struct reinit_br_wh *a = arg;
32668+ struct au_wbr *wbr;
523b37e3 32669+ struct inode *dir, *delegated;
1facf9fc 32670+ struct dentry *h_root;
32671+ struct au_hinode *hdir;
32672+
32673+ err = 0;
32674+ wbr = a->br->br_wbr;
32675+ /* big aufs lock */
32676+ si_noflush_write_lock(a->sb);
32677+ if (!au_br_writable(a->br->br_perm))
32678+ goto out;
32679+ bindex = au_br_index(a->sb, a->br->br_id);
32680+ if (unlikely(bindex < 0))
32681+ goto out;
32682+
1308ab2a 32683+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
1facf9fc 32684+ dir = a->sb->s_root->d_inode;
1facf9fc 32685+ hdir = au_hi(dir, bindex);
32686+ h_root = au_h_dptr(a->sb->s_root, bindex);
86dc4139 32687+ AuDebugOn(h_root != au_br_dentry(a->br));
1facf9fc 32688+
4a4d8108 32689+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
1facf9fc 32690+ wbr_wh_write_lock(wbr);
32691+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
32692+ h_root, a->br);
32693+ if (!err) {
86dc4139
AM
32694+ h_path.dentry = wbr->wbr_whbase;
32695+ h_path.mnt = au_br_mnt(a->br);
523b37e3
AM
32696+ delegated = NULL;
32697+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
32698+ /*force*/0);
32699+ if (unlikely(err == -EWOULDBLOCK)) {
32700+ pr_warn("cannot retry for NFSv4 delegation"
32701+ " for an internal unlink\n");
32702+ iput(delegated);
32703+ }
1facf9fc 32704+ } else {
523b37e3 32705+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
1facf9fc 32706+ err = 0;
32707+ }
32708+ dput(wbr->wbr_whbase);
32709+ wbr->wbr_whbase = NULL;
32710+ if (!err)
86dc4139 32711+ err = au_wh_init(a->br, a->sb);
1facf9fc 32712+ wbr_wh_write_unlock(wbr);
4a4d8108 32713+ au_hn_imtx_unlock(hdir);
1308ab2a 32714+ di_read_unlock(a->sb->s_root, AuLock_IR);
076b876e
AM
32715+ if (!err)
32716+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
1facf9fc 32717+
4f0767ce 32718+out:
1facf9fc 32719+ if (wbr)
32720+ atomic_dec(&wbr->wbr_wh_running);
32721+ atomic_dec(&a->br->br_count);
1facf9fc 32722+ si_write_unlock(a->sb);
027c5e7a 32723+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
1facf9fc 32724+ kfree(arg);
32725+ if (unlikely(err))
32726+ AuIOErr("err %d\n", err);
32727+}
32728+
32729+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
32730+{
32731+ int do_dec, wkq_err;
32732+ struct reinit_br_wh *arg;
32733+
32734+ do_dec = 1;
32735+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
32736+ goto out;
32737+
32738+ /* ignore ENOMEM */
32739+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
32740+ if (arg) {
32741+ /*
32742+ * dec(wh_running), kfree(arg) and dec(br_count)
32743+ * in reinit function
32744+ */
32745+ arg->sb = sb;
32746+ arg->br = br;
32747+ atomic_inc(&br->br_count);
53392da6 32748+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
1facf9fc 32749+ if (unlikely(wkq_err)) {
32750+ atomic_dec(&br->br_wbr->wbr_wh_running);
32751+ atomic_dec(&br->br_count);
32752+ kfree(arg);
32753+ }
32754+ do_dec = 0;
32755+ }
32756+
4f0767ce 32757+out:
1facf9fc 32758+ if (do_dec)
32759+ atomic_dec(&br->br_wbr->wbr_wh_running);
32760+}
32761+
32762+/* ---------------------------------------------------------------------- */
32763+
32764+/*
32765+ * create the whiteout @wh.
32766+ */
32767+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
32768+ struct dentry *wh)
32769+{
32770+ int err;
32771+ struct path h_path = {
32772+ .dentry = wh
32773+ };
32774+ struct au_branch *br;
32775+ struct au_wbr *wbr;
32776+ struct dentry *h_parent;
523b37e3 32777+ struct inode *h_dir, *delegated;
1facf9fc 32778+
32779+ h_parent = wh->d_parent; /* dir inode is locked */
32780+ h_dir = h_parent->d_inode;
32781+ IMustLock(h_dir);
32782+
32783+ br = au_sbr(sb, bindex);
86dc4139 32784+ h_path.mnt = au_br_mnt(br);
1facf9fc 32785+ wbr = br->br_wbr;
32786+ wbr_wh_read_lock(wbr);
32787+ if (wbr->wbr_whbase) {
523b37e3
AM
32788+ delegated = NULL;
32789+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
32790+ if (unlikely(err == -EWOULDBLOCK)) {
32791+ pr_warn("cannot retry for NFSv4 delegation"
32792+ " for an internal link\n");
32793+ iput(delegated);
32794+ }
1facf9fc 32795+ if (!err || err != -EMLINK)
32796+ goto out;
32797+
32798+ /* link count full. re-initialize br_whbase. */
32799+ kick_reinit_br_wh(sb, br);
32800+ }
32801+
32802+ /* return this error in this context */
b4510431 32803+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
076b876e
AM
32804+ if (!err)
32805+ au_fhsm_wrote(sb, bindex, /*force*/0);
1facf9fc 32806+
4f0767ce 32807+out:
1facf9fc 32808+ wbr_wh_read_unlock(wbr);
32809+ return err;
32810+}
32811+
32812+/* ---------------------------------------------------------------------- */
32813+
32814+/*
32815+ * create or remove the diropq.
32816+ */
32817+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
32818+ unsigned int flags)
32819+{
32820+ struct dentry *opq_dentry, *h_dentry;
32821+ struct super_block *sb;
32822+ struct au_branch *br;
32823+ int err;
32824+
32825+ sb = dentry->d_sb;
32826+ br = au_sbr(sb, bindex);
32827+ h_dentry = au_h_dptr(dentry, bindex);
b4510431 32828+ opq_dentry = vfsub_lkup_one(&diropq_name, h_dentry);
1facf9fc 32829+ if (IS_ERR(opq_dentry))
32830+ goto out;
32831+
32832+ if (au_ftest_diropq(flags, CREATE)) {
32833+ err = link_or_create_wh(sb, bindex, opq_dentry);
32834+ if (!err) {
32835+ au_set_dbdiropq(dentry, bindex);
32836+ goto out; /* success */
32837+ }
32838+ } else {
32839+ struct path tmp = {
32840+ .dentry = opq_dentry,
86dc4139 32841+ .mnt = au_br_mnt(br)
1facf9fc 32842+ };
32843+ err = do_unlink_wh(au_h_iptr(dentry->d_inode, bindex), &tmp);
32844+ if (!err)
32845+ au_set_dbdiropq(dentry, -1);
32846+ }
32847+ dput(opq_dentry);
32848+ opq_dentry = ERR_PTR(err);
32849+
4f0767ce 32850+out:
1facf9fc 32851+ return opq_dentry;
32852+}
32853+
32854+struct do_diropq_args {
32855+ struct dentry **errp;
32856+ struct dentry *dentry;
32857+ aufs_bindex_t bindex;
32858+ unsigned int flags;
32859+};
32860+
32861+static void call_do_diropq(void *args)
32862+{
32863+ struct do_diropq_args *a = args;
32864+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
32865+}
32866+
32867+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
32868+ unsigned int flags)
32869+{
32870+ struct dentry *diropq, *h_dentry;
32871+
32872+ h_dentry = au_h_dptr(dentry, bindex);
32873+ if (!au_test_h_perm_sio(h_dentry->d_inode, MAY_EXEC | MAY_WRITE))
32874+ diropq = do_diropq(dentry, bindex, flags);
32875+ else {
32876+ int wkq_err;
32877+ struct do_diropq_args args = {
32878+ .errp = &diropq,
32879+ .dentry = dentry,
32880+ .bindex = bindex,
32881+ .flags = flags
32882+ };
32883+
32884+ wkq_err = au_wkq_wait(call_do_diropq, &args);
32885+ if (unlikely(wkq_err))
32886+ diropq = ERR_PTR(wkq_err);
32887+ }
32888+
32889+ return diropq;
32890+}
32891+
32892+/* ---------------------------------------------------------------------- */
32893+
32894+/*
32895+ * lookup whiteout dentry.
32896+ * @h_parent: lower parent dentry which must exist and be locked
32897+ * @base_name: name of dentry which will be whiteouted
32898+ * returns dentry for whiteout.
32899+ */
32900+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
32901+ struct au_branch *br)
32902+{
32903+ int err;
32904+ struct qstr wh_name;
32905+ struct dentry *wh_dentry;
32906+
32907+ err = au_wh_name_alloc(&wh_name, base_name);
32908+ wh_dentry = ERR_PTR(err);
32909+ if (!err) {
b4510431 32910+ wh_dentry = vfsub_lkup_one(&wh_name, h_parent);
1facf9fc 32911+ kfree(wh_name.name);
32912+ }
32913+ return wh_dentry;
32914+}
32915+
32916+/*
32917+ * link/create a whiteout for @dentry on @bindex.
32918+ */
32919+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
32920+ struct dentry *h_parent)
32921+{
32922+ struct dentry *wh_dentry;
32923+ struct super_block *sb;
32924+ int err;
32925+
32926+ sb = dentry->d_sb;
32927+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
32928+ if (!IS_ERR(wh_dentry) && !wh_dentry->d_inode) {
32929+ err = link_or_create_wh(sb, bindex, wh_dentry);
076b876e 32930+ if (!err) {
1facf9fc 32931+ au_set_dbwh(dentry, bindex);
076b876e
AM
32932+ au_fhsm_wrote(sb, bindex, /*force*/0);
32933+ } else {
1facf9fc 32934+ dput(wh_dentry);
32935+ wh_dentry = ERR_PTR(err);
32936+ }
32937+ }
32938+
32939+ return wh_dentry;
32940+}
32941+
32942+/* ---------------------------------------------------------------------- */
32943+
32944+/* Delete all whiteouts in this directory on branch bindex. */
32945+static int del_wh_children(struct dentry *h_dentry, struct au_nhash *whlist,
32946+ aufs_bindex_t bindex, struct au_branch *br)
32947+{
32948+ int err;
32949+ unsigned long ul, n;
32950+ struct qstr wh_name;
32951+ char *p;
32952+ struct hlist_head *head;
c06a8ce3 32953+ struct au_vdir_wh *pos;
1facf9fc 32954+ struct au_vdir_destr *str;
32955+
32956+ err = -ENOMEM;
537831f9 32957+ p = (void *)__get_free_page(GFP_NOFS);
1facf9fc 32958+ wh_name.name = p;
32959+ if (unlikely(!wh_name.name))
32960+ goto out;
32961+
32962+ err = 0;
32963+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32964+ p += AUFS_WH_PFX_LEN;
32965+ n = whlist->nh_num;
32966+ head = whlist->nh_head;
32967+ for (ul = 0; !err && ul < n; ul++, head++) {
c06a8ce3
AM
32968+ hlist_for_each_entry(pos, head, wh_hash) {
32969+ if (pos->wh_bindex != bindex)
1facf9fc 32970+ continue;
32971+
c06a8ce3 32972+ str = &pos->wh_str;
1facf9fc 32973+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
32974+ memcpy(p, str->name, str->len);
32975+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
32976+ err = unlink_wh_name(h_dentry, &wh_name, br);
32977+ if (!err)
32978+ continue;
32979+ break;
32980+ }
32981+ AuIOErr("whiteout name too long %.*s\n",
32982+ str->len, str->name);
32983+ err = -EIO;
32984+ break;
32985+ }
32986+ }
537831f9 32987+ free_page((unsigned long)wh_name.name);
1facf9fc 32988+
4f0767ce 32989+out:
1facf9fc 32990+ return err;
32991+}
32992+
32993+struct del_wh_children_args {
32994+ int *errp;
32995+ struct dentry *h_dentry;
1308ab2a 32996+ struct au_nhash *whlist;
1facf9fc 32997+ aufs_bindex_t bindex;
32998+ struct au_branch *br;
32999+};
33000+
33001+static void call_del_wh_children(void *args)
33002+{
33003+ struct del_wh_children_args *a = args;
1308ab2a 33004+ *a->errp = del_wh_children(a->h_dentry, a->whlist, a->bindex, a->br);
1facf9fc 33005+}
33006+
33007+/* ---------------------------------------------------------------------- */
33008+
33009+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
33010+{
33011+ struct au_whtmp_rmdir *whtmp;
dece6358 33012+ int err;
1308ab2a 33013+ unsigned int rdhash;
dece6358
AM
33014+
33015+ SiMustAnyLock(sb);
1facf9fc 33016+
7f2ca4b1 33017+ whtmp = kzalloc(sizeof(*whtmp), gfp);
dece6358
AM
33018+ if (unlikely(!whtmp)) {
33019+ whtmp = ERR_PTR(-ENOMEM);
1facf9fc 33020+ goto out;
dece6358 33021+ }
1facf9fc 33022+
1308ab2a 33023+ /* no estimation for dir size */
33024+ rdhash = au_sbi(sb)->si_rdhash;
33025+ if (!rdhash)
33026+ rdhash = AUFS_RDHASH_DEF;
33027+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
33028+ if (unlikely(err)) {
33029+ kfree(whtmp);
33030+ whtmp = ERR_PTR(err);
33031+ }
dece6358 33032+
4f0767ce 33033+out:
dece6358 33034+ return whtmp;
1facf9fc 33035+}
33036+
33037+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
33038+{
027c5e7a
AM
33039+ if (whtmp->br)
33040+ atomic_dec(&whtmp->br->br_count);
1facf9fc 33041+ dput(whtmp->wh_dentry);
33042+ iput(whtmp->dir);
dece6358 33043+ au_nhash_wh_free(&whtmp->whlist);
1facf9fc 33044+ kfree(whtmp);
33045+}
33046+
33047+/*
33048+ * rmdir the whiteouted temporary named dir @h_dentry.
33049+ * @whlist: whiteouted children.
33050+ */
33051+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33052+ struct dentry *wh_dentry, struct au_nhash *whlist)
33053+{
33054+ int err;
7f2ca4b1 33055+ unsigned int h_nlink;
1facf9fc 33056+ struct path h_tmp;
33057+ struct inode *wh_inode, *h_dir;
33058+ struct au_branch *br;
33059+
33060+ h_dir = wh_dentry->d_parent->d_inode; /* dir inode is locked */
33061+ IMustLock(h_dir);
33062+
33063+ br = au_sbr(dir->i_sb, bindex);
33064+ wh_inode = wh_dentry->d_inode;
33065+ mutex_lock_nested(&wh_inode->i_mutex, AuLsc_I_CHILD);
33066+
33067+ /*
33068+ * someone else might change some whiteouts while we were sleeping.
33069+ * it means this whlist may have an obsoleted entry.
33070+ */
33071+ if (!au_test_h_perm_sio(wh_inode, MAY_EXEC | MAY_WRITE))
33072+ err = del_wh_children(wh_dentry, whlist, bindex, br);
33073+ else {
33074+ int wkq_err;
33075+ struct del_wh_children_args args = {
33076+ .errp = &err,
33077+ .h_dentry = wh_dentry,
1308ab2a 33078+ .whlist = whlist,
1facf9fc 33079+ .bindex = bindex,
33080+ .br = br
33081+ };
33082+
33083+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
33084+ if (unlikely(wkq_err))
33085+ err = wkq_err;
33086+ }
33087+ mutex_unlock(&wh_inode->i_mutex);
33088+
33089+ if (!err) {
33090+ h_tmp.dentry = wh_dentry;
86dc4139 33091+ h_tmp.mnt = au_br_mnt(br);
7f2ca4b1 33092+ h_nlink = h_dir->i_nlink;
1facf9fc 33093+ err = vfsub_rmdir(h_dir, &h_tmp);
7f2ca4b1
JR
33094+ /* some fs doesn't change the parent nlink in some cases */
33095+ h_nlink -= h_dir->i_nlink;
1facf9fc 33096+ }
33097+
33098+ if (!err) {
33099+ if (au_ibstart(dir) == bindex) {
7f207e10 33100+ /* todo: dir->i_mutex is necessary */
1facf9fc 33101+ au_cpup_attr_timesizes(dir);
7f2ca4b1
JR
33102+ if (h_nlink)
33103+ vfsub_drop_nlink(dir);
1facf9fc 33104+ }
33105+ return 0; /* success */
33106+ }
33107+
523b37e3 33108+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
1facf9fc 33109+ return err;
33110+}
33111+
33112+static void call_rmdir_whtmp(void *args)
33113+{
33114+ int err;
e49829fe 33115+ aufs_bindex_t bindex;
1facf9fc 33116+ struct au_whtmp_rmdir *a = args;
33117+ struct super_block *sb;
33118+ struct dentry *h_parent;
33119+ struct inode *h_dir;
1facf9fc 33120+ struct au_hinode *hdir;
33121+
33122+ /* rmdir by nfsd may cause deadlock with this i_mutex */
33123+ /* mutex_lock(&a->dir->i_mutex); */
e49829fe 33124+ err = -EROFS;
1facf9fc 33125+ sb = a->dir->i_sb;
e49829fe
JR
33126+ si_read_lock(sb, !AuLock_FLUSH);
33127+ if (!au_br_writable(a->br->br_perm))
33128+ goto out;
33129+ bindex = au_br_index(sb, a->br->br_id);
33130+ if (unlikely(bindex < 0))
1facf9fc 33131+ goto out;
33132+
33133+ err = -EIO;
1facf9fc 33134+ ii_write_lock_parent(a->dir);
33135+ h_parent = dget_parent(a->wh_dentry);
33136+ h_dir = h_parent->d_inode;
e49829fe 33137+ hdir = au_hi(a->dir, bindex);
86dc4139
AM
33138+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
33139+ if (unlikely(err))
33140+ goto out_mnt;
4a4d8108 33141+ au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
e49829fe
JR
33142+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
33143+ a->br);
86dc4139
AM
33144+ if (!err)
33145+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
4a4d8108 33146+ au_hn_imtx_unlock(hdir);
86dc4139
AM
33147+ vfsub_mnt_drop_write(au_br_mnt(a->br));
33148+
33149+out_mnt:
1facf9fc 33150+ dput(h_parent);
33151+ ii_write_unlock(a->dir);
4f0767ce 33152+out:
1facf9fc 33153+ /* mutex_unlock(&a->dir->i_mutex); */
1facf9fc 33154+ au_whtmp_rmdir_free(a);
027c5e7a
AM
33155+ si_read_unlock(sb);
33156+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33157+ if (unlikely(err))
33158+ AuIOErr("err %d\n", err);
33159+}
33160+
33161+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33162+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
33163+{
33164+ int wkq_err;
e49829fe 33165+ struct super_block *sb;
1facf9fc 33166+
33167+ IMustLock(dir);
33168+
33169+ /* all post-process will be done in do_rmdir_whtmp(). */
e49829fe 33170+ sb = dir->i_sb;
1facf9fc 33171+ args->dir = au_igrab(dir);
e49829fe
JR
33172+ args->br = au_sbr(sb, bindex);
33173+ atomic_inc(&args->br->br_count);
1facf9fc 33174+ args->wh_dentry = dget(wh_dentry);
53392da6 33175+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
1facf9fc 33176+ if (unlikely(wkq_err)) {
523b37e3 33177+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
1facf9fc 33178+ au_whtmp_rmdir_free(args);
33179+ }
33180+}
7f207e10
AM
33181diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
33182--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 33183+++ linux/fs/aufs/whout.h 2016-02-28 11:27:01.283912420 +0100
076b876e 33184@@ -0,0 +1,85 @@
1facf9fc 33185+/*
7f2ca4b1 33186+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33187+ *
33188+ * This program, aufs is free software; you can redistribute it and/or modify
33189+ * it under the terms of the GNU General Public License as published by
33190+ * the Free Software Foundation; either version 2 of the License, or
33191+ * (at your option) any later version.
dece6358
AM
33192+ *
33193+ * This program is distributed in the hope that it will be useful,
33194+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33195+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33196+ * GNU General Public License for more details.
33197+ *
33198+ * You should have received a copy of the GNU General Public License
523b37e3 33199+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33200+ */
33201+
33202+/*
33203+ * whiteout for logical deletion and opaque directory
33204+ */
33205+
33206+#ifndef __AUFS_WHOUT_H__
33207+#define __AUFS_WHOUT_H__
33208+
33209+#ifdef __KERNEL__
33210+
1facf9fc 33211+#include "dir.h"
33212+
33213+/* whout.c */
33214+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
076b876e
AM
33215+int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
33216+int au_diropq_test(struct dentry *h_dentry);
7f2ca4b1 33217+struct au_branch;
1facf9fc 33218+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
33219+ struct qstr *prefix);
33220+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
33221+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
33222+ struct dentry *dentry);
86dc4139 33223+int au_wh_init(struct au_branch *br, struct super_block *sb);
1facf9fc 33224+
33225+/* diropq flags */
33226+#define AuDiropq_CREATE 1
33227+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
7f207e10
AM
33228+#define au_fset_diropq(flags, name) \
33229+ do { (flags) |= AuDiropq_##name; } while (0)
33230+#define au_fclr_diropq(flags, name) \
33231+ do { (flags) &= ~AuDiropq_##name; } while (0)
1facf9fc 33232+
33233+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
33234+ unsigned int flags);
33235+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
33236+ struct au_branch *br);
33237+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
33238+ struct dentry *h_parent);
33239+
33240+/* real rmdir for the whiteout-ed dir */
33241+struct au_whtmp_rmdir {
33242+ struct inode *dir;
e49829fe 33243+ struct au_branch *br;
1facf9fc 33244+ struct dentry *wh_dentry;
dece6358 33245+ struct au_nhash whlist;
1facf9fc 33246+};
33247+
33248+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
33249+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
33250+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
33251+ struct dentry *wh_dentry, struct au_nhash *whlist);
33252+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
33253+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
33254+
33255+/* ---------------------------------------------------------------------- */
33256+
33257+static inline struct dentry *au_diropq_create(struct dentry *dentry,
33258+ aufs_bindex_t bindex)
33259+{
33260+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
33261+}
33262+
33263+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
33264+{
33265+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
33266+}
33267+
33268+#endif /* __KERNEL__ */
33269+#endif /* __AUFS_WHOUT_H__ */
7f207e10
AM
33270diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
33271--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 33272+++ linux/fs/aufs/wkq.c 2016-02-28 11:27:01.283912420 +0100
38d290e6 33273@@ -0,0 +1,213 @@
1facf9fc 33274+/*
7f2ca4b1 33275+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33276+ *
33277+ * This program, aufs is free software; you can redistribute it and/or modify
33278+ * it under the terms of the GNU General Public License as published by
33279+ * the Free Software Foundation; either version 2 of the License, or
33280+ * (at your option) any later version.
dece6358
AM
33281+ *
33282+ * This program is distributed in the hope that it will be useful,
33283+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33284+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33285+ * GNU General Public License for more details.
33286+ *
33287+ * You should have received a copy of the GNU General Public License
523b37e3 33288+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33289+ */
33290+
33291+/*
33292+ * workqueue for asynchronous/super-io operations
33293+ * todo: try new dredential scheme
33294+ */
33295+
dece6358 33296+#include <linux/module.h>
1facf9fc 33297+#include "aufs.h"
33298+
9dbd164d 33299+/* internal workqueue named AUFS_WKQ_NAME */
b752ccd1 33300+
9dbd164d 33301+static struct workqueue_struct *au_wkq;
1facf9fc 33302+
33303+struct au_wkinfo {
33304+ struct work_struct wk;
7f207e10 33305+ struct kobject *kobj;
1facf9fc 33306+
33307+ unsigned int flags; /* see wkq.h */
33308+
33309+ au_wkq_func_t func;
33310+ void *args;
33311+
1facf9fc 33312+ struct completion *comp;
33313+};
33314+
33315+/* ---------------------------------------------------------------------- */
33316+
1facf9fc 33317+static void wkq_func(struct work_struct *wk)
33318+{
33319+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
33320+
2dfbb274 33321+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
7f207e10
AM
33322+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
33323+
1facf9fc 33324+ wkinfo->func(wkinfo->args);
1facf9fc 33325+ if (au_ftest_wkq(wkinfo->flags, WAIT))
33326+ complete(wkinfo->comp);
33327+ else {
7f207e10 33328+ kobject_put(wkinfo->kobj);
9dbd164d 33329+ module_put(THIS_MODULE); /* todo: ?? */
1facf9fc 33330+ kfree(wkinfo);
33331+ }
33332+}
33333+
33334+/*
33335+ * Since struct completion is large, try allocating it dynamically.
33336+ */
c2b27bf2 33337+#if 1 /* defined(CONFIG_4KSTACKS) || defined(AuTest4KSTACKS) */
1facf9fc 33338+#define AuWkqCompDeclare(name) struct completion *comp = NULL
33339+
33340+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33341+{
33342+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
33343+ if (*comp) {
33344+ init_completion(*comp);
33345+ wkinfo->comp = *comp;
33346+ return 0;
33347+ }
33348+ return -ENOMEM;
33349+}
33350+
33351+static void au_wkq_comp_free(struct completion *comp)
33352+{
33353+ kfree(comp);
33354+}
33355+
33356+#else
33357+
33358+/* no braces */
33359+#define AuWkqCompDeclare(name) \
33360+ DECLARE_COMPLETION_ONSTACK(_ ## name); \
33361+ struct completion *comp = &_ ## name
33362+
33363+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
33364+{
33365+ wkinfo->comp = *comp;
33366+ return 0;
33367+}
33368+
33369+static void au_wkq_comp_free(struct completion *comp __maybe_unused)
33370+{
33371+ /* empty */
33372+}
33373+#endif /* 4KSTACKS */
33374+
53392da6 33375+static void au_wkq_run(struct au_wkinfo *wkinfo)
1facf9fc 33376+{
53392da6
AM
33377+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
33378+ if (au_wkq_test()) {
38d290e6
JR
33379+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
33380+ " due to a dead dir by UDBA?\n");
53392da6
AM
33381+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
33382+ }
33383+ } else
33384+ au_dbg_verify_kthread();
33385+
33386+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
a1f66529 33387+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
9dbd164d 33388+ queue_work(au_wkq, &wkinfo->wk);
4a4d8108
AM
33389+ } else {
33390+ INIT_WORK(&wkinfo->wk, wkq_func);
33391+ schedule_work(&wkinfo->wk);
33392+ }
1facf9fc 33393+}
33394+
7f207e10
AM
33395+/*
33396+ * Be careful. It is easy to make deadlock happen.
33397+ * processA: lock, wkq and wait
33398+ * processB: wkq and wait, lock in wkq
33399+ * --> deadlock
33400+ */
b752ccd1 33401+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
1facf9fc 33402+{
33403+ int err;
33404+ AuWkqCompDeclare(comp);
33405+ struct au_wkinfo wkinfo = {
b752ccd1 33406+ .flags = flags,
1facf9fc 33407+ .func = func,
33408+ .args = args
33409+ };
33410+
33411+ err = au_wkq_comp_alloc(&wkinfo, &comp);
33412+ if (!err) {
53392da6 33413+ au_wkq_run(&wkinfo);
1facf9fc 33414+ /* no timeout, no interrupt */
33415+ wait_for_completion(wkinfo.comp);
33416+ au_wkq_comp_free(comp);
4a4d8108 33417+ destroy_work_on_stack(&wkinfo.wk);
1facf9fc 33418+ }
33419+
33420+ return err;
33421+
33422+}
33423+
027c5e7a
AM
33424+/*
33425+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
33426+ * problem in a concurrent umounting.
33427+ */
53392da6
AM
33428+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33429+ unsigned int flags)
1facf9fc 33430+{
33431+ int err;
33432+ struct au_wkinfo *wkinfo;
33433+
33434+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
33435+
33436+ /*
33437+ * wkq_func() must free this wkinfo.
33438+ * it highly depends upon the implementation of workqueue.
33439+ */
33440+ err = 0;
33441+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
33442+ if (wkinfo) {
7f207e10 33443+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
53392da6 33444+ wkinfo->flags = flags & ~AuWkq_WAIT;
1facf9fc 33445+ wkinfo->func = func;
33446+ wkinfo->args = args;
33447+ wkinfo->comp = NULL;
7f207e10 33448+ kobject_get(wkinfo->kobj);
9dbd164d 33449+ __module_get(THIS_MODULE); /* todo: ?? */
1facf9fc 33450+
53392da6 33451+ au_wkq_run(wkinfo);
1facf9fc 33452+ } else {
33453+ err = -ENOMEM;
e49829fe 33454+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 33455+ }
33456+
33457+ return err;
33458+}
33459+
33460+/* ---------------------------------------------------------------------- */
33461+
33462+void au_nwt_init(struct au_nowait_tasks *nwt)
33463+{
33464+ atomic_set(&nwt->nw_len, 0);
4a4d8108 33465+ /* smp_mb(); */ /* atomic_set */
1facf9fc 33466+ init_waitqueue_head(&nwt->nw_wq);
33467+}
33468+
33469+void au_wkq_fin(void)
33470+{
9dbd164d 33471+ destroy_workqueue(au_wkq);
1facf9fc 33472+}
33473+
33474+int __init au_wkq_init(void)
33475+{
9dbd164d 33476+ int err;
b752ccd1
AM
33477+
33478+ err = 0;
86dc4139 33479+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
9dbd164d
AM
33480+ if (IS_ERR(au_wkq))
33481+ err = PTR_ERR(au_wkq);
33482+ else if (!au_wkq)
33483+ err = -ENOMEM;
b752ccd1
AM
33484+
33485+ return err;
1facf9fc 33486+}
7f207e10
AM
33487diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
33488--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 33489+++ linux/fs/aufs/wkq.h 2016-02-28 11:27:01.283912420 +0100
523b37e3 33490@@ -0,0 +1,91 @@
1facf9fc 33491+/*
7f2ca4b1 33492+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33493+ *
33494+ * This program, aufs is free software; you can redistribute it and/or modify
33495+ * it under the terms of the GNU General Public License as published by
33496+ * the Free Software Foundation; either version 2 of the License, or
33497+ * (at your option) any later version.
dece6358
AM
33498+ *
33499+ * This program is distributed in the hope that it will be useful,
33500+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33501+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33502+ * GNU General Public License for more details.
33503+ *
33504+ * You should have received a copy of the GNU General Public License
523b37e3 33505+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33506+ */
33507+
33508+/*
33509+ * workqueue for asynchronous/super-io operations
33510+ * todo: try new credentials management scheme
33511+ */
33512+
33513+#ifndef __AUFS_WKQ_H__
33514+#define __AUFS_WKQ_H__
33515+
33516+#ifdef __KERNEL__
33517+
dece6358
AM
33518+struct super_block;
33519+
1facf9fc 33520+/* ---------------------------------------------------------------------- */
33521+
33522+/*
33523+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
33524+ */
33525+struct au_nowait_tasks {
33526+ atomic_t nw_len;
33527+ wait_queue_head_t nw_wq;
33528+};
33529+
33530+/* ---------------------------------------------------------------------- */
33531+
33532+typedef void (*au_wkq_func_t)(void *args);
33533+
33534+/* wkq flags */
33535+#define AuWkq_WAIT 1
9dbd164d 33536+#define AuWkq_NEST (1 << 1)
1facf9fc 33537+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
7f207e10
AM
33538+#define au_fset_wkq(flags, name) \
33539+ do { (flags) |= AuWkq_##name; } while (0)
33540+#define au_fclr_wkq(flags, name) \
33541+ do { (flags) &= ~AuWkq_##name; } while (0)
1facf9fc 33542+
9dbd164d
AM
33543+#ifndef CONFIG_AUFS_HNOTIFY
33544+#undef AuWkq_NEST
33545+#define AuWkq_NEST 0
33546+#endif
33547+
1facf9fc 33548+/* wkq.c */
b752ccd1 33549+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
53392da6
AM
33550+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
33551+ unsigned int flags);
1facf9fc 33552+void au_nwt_init(struct au_nowait_tasks *nwt);
33553+int __init au_wkq_init(void);
33554+void au_wkq_fin(void);
33555+
33556+/* ---------------------------------------------------------------------- */
33557+
53392da6
AM
33558+static inline int au_wkq_test(void)
33559+{
33560+ return current->flags & PF_WQ_WORKER;
33561+}
33562+
b752ccd1 33563+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
1facf9fc 33564+{
b752ccd1 33565+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
1facf9fc 33566+}
33567+
33568+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
33569+{
e49829fe 33570+ if (atomic_dec_and_test(&nwt->nw_len))
1facf9fc 33571+ wake_up_all(&nwt->nw_wq);
33572+}
33573+
33574+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
33575+{
33576+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
33577+ return 0;
33578+}
33579+
33580+#endif /* __KERNEL__ */
33581+#endif /* __AUFS_WKQ_H__ */
c1595e42
JR
33582diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
33583--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
33584+++ linux/fs/aufs/xattr.c 2016-02-28 11:27:01.283912420 +0100
33585@@ -0,0 +1,344 @@
c1595e42 33586+/*
7f2ca4b1 33587+ * Copyright (C) 2014-2016 Junjiro R. Okajima
c1595e42
JR
33588+ *
33589+ * This program, aufs is free software; you can redistribute it and/or modify
33590+ * it under the terms of the GNU General Public License as published by
33591+ * the Free Software Foundation; either version 2 of the License, or
33592+ * (at your option) any later version.
33593+ *
33594+ * This program is distributed in the hope that it will be useful,
33595+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33596+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33597+ * GNU General Public License for more details.
33598+ *
33599+ * You should have received a copy of the GNU General Public License
33600+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33601+ */
33602+
33603+/*
33604+ * handling xattr functions
33605+ */
33606+
33607+#include <linux/xattr.h>
33608+#include "aufs.h"
33609+
33610+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
33611+{
33612+ if (!ignore_flags)
33613+ goto out;
33614+ switch (err) {
33615+ case -ENOMEM:
33616+ case -EDQUOT:
33617+ goto out;
33618+ }
33619+
33620+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
33621+ err = 0;
33622+ goto out;
33623+ }
33624+
33625+#define cmp(brattr, prefix) do { \
33626+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
33627+ XATTR_##prefix##_PREFIX_LEN)) { \
33628+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
33629+ err = 0; \
33630+ goto out; \
33631+ } \
33632+ } while (0)
33633+
33634+ cmp(SEC, SECURITY);
33635+ cmp(SYS, SYSTEM);
33636+ cmp(TR, TRUSTED);
33637+ cmp(USR, USER);
33638+#undef cmp
33639+
33640+ if (ignore_flags & AuBrAttr_ICEX_OTH)
33641+ err = 0;
33642+
33643+out:
33644+ return err;
33645+}
33646+
33647+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
33648+
33649+static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
7f2ca4b1
JR
33650+ char *name, char **buf, unsigned int ignore_flags,
33651+ unsigned int verbose)
c1595e42
JR
33652+{
33653+ int err;
33654+ ssize_t ssz;
33655+ struct inode *h_idst;
33656+
33657+ ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
33658+ err = ssz;
33659+ if (unlikely(err <= 0)) {
c1595e42
JR
33660+ if (err == -ENODATA
33661+ || (err == -EOPNOTSUPP
7f2ca4b1
JR
33662+ && ((ignore_flags & au_xattr_out_of_list)
33663+ || (au_test_nfs_noacl(h_src->d_inode)
33664+ && (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
33665+ || !strcmp(name,
33666+ XATTR_NAME_POSIX_ACL_DEFAULT))))
33667+ ))
c1595e42 33668+ err = 0;
7f2ca4b1
JR
33669+ if (err && (verbose || au_debug_test()))
33670+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33671+ goto out;
33672+ }
33673+
33674+ /* unlock it temporary */
33675+ h_idst = h_dst->d_inode;
33676+ mutex_unlock(&h_idst->i_mutex);
33677+ err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
33678+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33679+ if (unlikely(err)) {
7f2ca4b1
JR
33680+ if (verbose || au_debug_test())
33681+ pr_err("%s, err %d\n", name, err);
c1595e42
JR
33682+ err = au_xattr_ignore(err, name, ignore_flags);
33683+ }
33684+
33685+out:
33686+ return err;
33687+}
33688+
7f2ca4b1
JR
33689+int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
33690+ unsigned int verbose)
c1595e42
JR
33691+{
33692+ int err, unlocked, acl_access, acl_default;
33693+ ssize_t ssz;
33694+ struct inode *h_isrc, *h_idst;
33695+ char *value, *p, *o, *e;
33696+
33697+ /* try stopping to update the source inode while we are referencing */
7f2ca4b1 33698+ /* there should not be the parent-child relationship between them */
c1595e42
JR
33699+ h_isrc = h_src->d_inode;
33700+ h_idst = h_dst->d_inode;
33701+ mutex_unlock(&h_idst->i_mutex);
33702+ mutex_lock_nested(&h_isrc->i_mutex, AuLsc_I_CHILD);
33703+ mutex_lock_nested(&h_idst->i_mutex, AuLsc_I_CHILD2);
33704+ unlocked = 0;
33705+
33706+ /* some filesystems don't list POSIX ACL, for example tmpfs */
33707+ ssz = vfs_listxattr(h_src, NULL, 0);
33708+ err = ssz;
33709+ if (unlikely(err < 0)) {
33710+ AuTraceErr(err);
33711+ if (err == -ENODATA
33712+ || err == -EOPNOTSUPP)
33713+ err = 0; /* ignore */
33714+ goto out;
33715+ }
33716+
33717+ err = 0;
33718+ p = NULL;
33719+ o = NULL;
33720+ if (ssz) {
33721+ err = -ENOMEM;
33722+ p = kmalloc(ssz, GFP_NOFS);
33723+ o = p;
33724+ if (unlikely(!p))
33725+ goto out;
33726+ err = vfs_listxattr(h_src, p, ssz);
33727+ }
33728+ mutex_unlock(&h_isrc->i_mutex);
33729+ unlocked = 1;
33730+ AuDbg("err %d, ssz %zd\n", err, ssz);
33731+ if (unlikely(err < 0))
33732+ goto out_free;
33733+
33734+ err = 0;
33735+ e = p + ssz;
33736+ value = NULL;
33737+ acl_access = 0;
33738+ acl_default = 0;
33739+ while (!err && p < e) {
33740+ acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
33741+ sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
33742+ acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
33743+ sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
33744+ - 1);
7f2ca4b1
JR
33745+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
33746+ verbose);
c1595e42
JR
33747+ p += strlen(p) + 1;
33748+ }
33749+ AuTraceErr(err);
33750+ ignore_flags |= au_xattr_out_of_list;
33751+ if (!err && !acl_access) {
33752+ err = au_do_cpup_xattr(h_dst, h_src,
33753+ XATTR_NAME_POSIX_ACL_ACCESS, &value,
7f2ca4b1 33754+ ignore_flags, verbose);
c1595e42
JR
33755+ AuTraceErr(err);
33756+ }
33757+ if (!err && !acl_default) {
33758+ err = au_do_cpup_xattr(h_dst, h_src,
33759+ XATTR_NAME_POSIX_ACL_DEFAULT, &value,
7f2ca4b1 33760+ ignore_flags, verbose);
c1595e42
JR
33761+ AuTraceErr(err);
33762+ }
33763+
33764+ kfree(value);
33765+
33766+out_free:
33767+ kfree(o);
33768+out:
33769+ if (!unlocked)
33770+ mutex_unlock(&h_isrc->i_mutex);
33771+ AuTraceErr(err);
33772+ return err;
33773+}
33774+
33775+/* ---------------------------------------------------------------------- */
33776+
33777+enum {
33778+ AU_XATTR_LIST,
33779+ AU_XATTR_GET
33780+};
33781+
33782+struct au_lgxattr {
33783+ int type;
33784+ union {
33785+ struct {
33786+ char *list;
33787+ size_t size;
33788+ } list;
33789+ struct {
33790+ const char *name;
33791+ void *value;
33792+ size_t size;
33793+ } get;
33794+ } u;
33795+};
33796+
33797+static ssize_t au_lgxattr(struct dentry *dentry, struct au_lgxattr *arg)
33798+{
33799+ ssize_t err;
33800+ struct path h_path;
33801+ struct super_block *sb;
33802+
33803+ sb = dentry->d_sb;
33804+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
33805+ if (unlikely(err))
33806+ goto out;
33807+ err = au_h_path_getattr(dentry, /*force*/1, &h_path);
33808+ if (unlikely(err))
33809+ goto out_si;
33810+ if (unlikely(!h_path.dentry))
33811+ /* illegally overlapped or something */
33812+ goto out_di; /* pretending success */
33813+
33814+ /* always topmost entry only */
33815+ switch (arg->type) {
33816+ case AU_XATTR_LIST:
33817+ err = vfs_listxattr(h_path.dentry,
33818+ arg->u.list.list, arg->u.list.size);
33819+ break;
33820+ case AU_XATTR_GET:
33821+ err = vfs_getxattr(h_path.dentry,
33822+ arg->u.get.name, arg->u.get.value,
33823+ arg->u.get.size);
33824+ break;
33825+ }
33826+
33827+out_di:
33828+ di_read_unlock(dentry, AuLock_IR);
33829+out_si:
33830+ si_read_unlock(sb);
33831+out:
33832+ AuTraceErr(err);
33833+ return err;
33834+}
33835+
33836+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
33837+{
33838+ struct au_lgxattr arg = {
33839+ .type = AU_XATTR_LIST,
33840+ .u.list = {
33841+ .list = list,
33842+ .size = size
33843+ },
33844+ };
33845+
33846+ return au_lgxattr(dentry, &arg);
33847+}
33848+
33849+ssize_t aufs_getxattr(struct dentry *dentry, const char *name, void *value,
33850+ size_t size)
33851+{
33852+ struct au_lgxattr arg = {
33853+ .type = AU_XATTR_GET,
33854+ .u.get = {
33855+ .name = name,
33856+ .value = value,
33857+ .size = size
33858+ },
33859+ };
33860+
33861+ return au_lgxattr(dentry, &arg);
33862+}
33863+
33864+int aufs_setxattr(struct dentry *dentry, const char *name, const void *value,
33865+ size_t size, int flags)
33866+{
33867+ struct au_srxattr arg = {
33868+ .type = AU_XATTR_SET,
33869+ .u.set = {
33870+ .name = name,
33871+ .value = value,
33872+ .size = size,
33873+ .flags = flags
33874+ },
33875+ };
33876+
33877+ return au_srxattr(dentry, &arg);
33878+}
33879+
33880+int aufs_removexattr(struct dentry *dentry, const char *name)
33881+{
33882+ struct au_srxattr arg = {
33883+ .type = AU_XATTR_REMOVE,
33884+ .u.remove = {
33885+ .name = name
33886+ },
33887+ };
33888+
33889+ return au_srxattr(dentry, &arg);
33890+}
33891+
33892+/* ---------------------------------------------------------------------- */
33893+
33894+#if 0
33895+static size_t au_xattr_list(struct dentry *dentry, char *list, size_t list_size,
33896+ const char *name, size_t name_len, int type)
33897+{
33898+ return aufs_listxattr(dentry, list, list_size);
33899+}
33900+
33901+static int au_xattr_get(struct dentry *dentry, const char *name, void *buffer,
33902+ size_t size, int type)
33903+{
33904+ return aufs_getxattr(dentry, name, buffer, size);
33905+}
33906+
33907+static int au_xattr_set(struct dentry *dentry, const char *name,
33908+ const void *value, size_t size, int flags, int type)
33909+{
33910+ return aufs_setxattr(dentry, name, value, size, flags);
33911+}
33912+
33913+static const struct xattr_handler au_xattr_handler = {
33914+ /* no prefix, no flags */
33915+ .list = au_xattr_list,
33916+ .get = au_xattr_get,
33917+ .set = au_xattr_set
33918+ /* why no remove? */
33919+};
33920+
33921+static const struct xattr_handler *au_xattr_handlers[] = {
33922+ &au_xattr_handler
33923+};
33924+
33925+void au_xattr_init(struct super_block *sb)
33926+{
33927+ /* sb->s_xattr = au_xattr_handlers; */
33928+}
33929+#endif
7f207e10
AM
33930diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
33931--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1
JR
33932+++ linux/fs/aufs/xino.c 2016-02-28 12:40:45.727721922 +0100
33933@@ -0,0 +1,1343 @@
1facf9fc 33934+/*
7f2ca4b1 33935+ * Copyright (C) 2005-2016 Junjiro R. Okajima
1facf9fc 33936+ *
33937+ * This program, aufs is free software; you can redistribute it and/or modify
33938+ * it under the terms of the GNU General Public License as published by
33939+ * the Free Software Foundation; either version 2 of the License, or
33940+ * (at your option) any later version.
dece6358
AM
33941+ *
33942+ * This program is distributed in the hope that it will be useful,
33943+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33944+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33945+ * GNU General Public License for more details.
33946+ *
33947+ * You should have received a copy of the GNU General Public License
523b37e3 33948+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
1facf9fc 33949+ */
33950+
33951+/*
33952+ * external inode number translation table and bitmap
33953+ */
33954+
33955+#include <linux/seq_file.h>
392086de 33956+#include <linux/statfs.h>
1facf9fc 33957+#include "aufs.h"
33958+
9dbd164d 33959+/* todo: unnecessary to support mmap_sem since kernel-space? */
b752ccd1 33960+ssize_t xino_fread(au_readf_t func, struct file *file, void *kbuf, size_t size,
1facf9fc 33961+ loff_t *pos)
33962+{
33963+ ssize_t err;
33964+ mm_segment_t oldfs;
b752ccd1
AM
33965+ union {
33966+ void *k;
33967+ char __user *u;
33968+ } buf;
1facf9fc 33969+
b752ccd1 33970+ buf.k = kbuf;
1facf9fc 33971+ oldfs = get_fs();
33972+ set_fs(KERNEL_DS);
33973+ do {
33974+ /* todo: signal_pending? */
b752ccd1 33975+ err = func(file, buf.u, size, pos);
1facf9fc 33976+ } while (err == -EAGAIN || err == -EINTR);
33977+ set_fs(oldfs);
33978+
33979+#if 0 /* reserved for future use */
33980+ if (err > 0)
33981+ fsnotify_access(file->f_dentry);
33982+#endif
33983+
33984+ return err;
33985+}
33986+
33987+/* ---------------------------------------------------------------------- */
33988+
7f2ca4b1
JR
33989+static ssize_t xino_fwrite_wkq(au_writef_t func, struct file *file, void *buf,
33990+ size_t size, loff_t *pos);
33991+
b752ccd1 33992+static ssize_t do_xino_fwrite(au_writef_t func, struct file *file, void *kbuf,
1facf9fc 33993+ size_t size, loff_t *pos)
33994+{
33995+ ssize_t err;
33996+ mm_segment_t oldfs;
b752ccd1
AM
33997+ union {
33998+ void *k;
33999+ const char __user *u;
34000+ } buf;
7f2ca4b1
JR
34001+ int i;
34002+ const int prevent_endless = 10;
1facf9fc 34003+
7f2ca4b1 34004+ i = 0;
b752ccd1 34005+ buf.k = kbuf;
1facf9fc 34006+ oldfs = get_fs();
34007+ set_fs(KERNEL_DS);
1facf9fc 34008+ do {
b752ccd1 34009+ err = func(file, buf.u, size, pos);
7f2ca4b1
JR
34010+ if (err == -EINTR
34011+ && !au_wkq_test()
34012+ && fatal_signal_pending(current)) {
34013+ set_fs(oldfs);
34014+ err = xino_fwrite_wkq(func, file, kbuf, size, pos);
34015+ BUG_ON(err == -EINTR);
34016+ oldfs = get_fs();
34017+ set_fs(KERNEL_DS);
34018+ }
34019+ } while (i++ < prevent_endless
34020+ && (err == -EAGAIN || err == -EINTR));
1facf9fc 34021+ set_fs(oldfs);
34022+
34023+#if 0 /* reserved for future use */
34024+ if (err > 0)
34025+ fsnotify_modify(file->f_dentry);
34026+#endif
34027+
34028+ return err;
34029+}
34030+
34031+struct do_xino_fwrite_args {
34032+ ssize_t *errp;
34033+ au_writef_t func;
34034+ struct file *file;
34035+ void *buf;
34036+ size_t size;
34037+ loff_t *pos;
34038+};
34039+
34040+static void call_do_xino_fwrite(void *args)
34041+{
34042+ struct do_xino_fwrite_args *a = args;
34043+ *a->errp = do_xino_fwrite(a->func, a->file, a->buf, a->size, a->pos);
34044+}
34045+
7f2ca4b1
JR
34046+static ssize_t xino_fwrite_wkq(au_writef_t func, struct file *file, void *buf,
34047+ size_t size, loff_t *pos)
34048+{
34049+ ssize_t err;
34050+ int wkq_err;
34051+ struct do_xino_fwrite_args args = {
34052+ .errp = &err,
34053+ .func = func,
34054+ .file = file,
34055+ .buf = buf,
34056+ .size = size,
34057+ .pos = pos
34058+ };
34059+
34060+ /*
34061+ * it breaks RLIMIT_FSIZE and normal user's limit,
34062+ * users should care about quota and real 'filesystem full.'
34063+ */
34064+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
34065+ if (unlikely(wkq_err))
34066+ err = wkq_err;
34067+
34068+ return err;
34069+}
34070+
1facf9fc 34071+ssize_t xino_fwrite(au_writef_t func, struct file *file, void *buf, size_t size,
34072+ loff_t *pos)
34073+{
34074+ ssize_t err;
34075+
b752ccd1
AM
34076+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
34077+ lockdep_off();
34078+ err = do_xino_fwrite(func, file, buf, size, pos);
34079+ lockdep_on();
7f2ca4b1
JR
34080+ } else
34081+ err = xino_fwrite_wkq(func, file, buf, size, pos);
1facf9fc 34082+
34083+ return err;
34084+}
34085+
34086+/* ---------------------------------------------------------------------- */
34087+
34088+/*
34089+ * create a new xinofile at the same place/path as @base_file.
34090+ */
34091+struct file *au_xino_create2(struct file *base_file, struct file *copy_src)
34092+{
34093+ struct file *file;
4a4d8108 34094+ struct dentry *base, *parent;
523b37e3 34095+ struct inode *dir, *delegated;
1facf9fc 34096+ struct qstr *name;
1308ab2a 34097+ struct path path;
4a4d8108 34098+ int err;
1facf9fc 34099+
34100+ base = base_file->f_dentry;
34101+ parent = base->d_parent; /* dir inode is locked */
34102+ dir = parent->d_inode;
34103+ IMustLock(dir);
34104+
34105+ file = ERR_PTR(-EINVAL);
34106+ name = &base->d_name;
4a4d8108
AM
34107+ path.dentry = vfsub_lookup_one_len(name->name, parent, name->len);
34108+ if (IS_ERR(path.dentry)) {
34109+ file = (void *)path.dentry;
523b37e3
AM
34110+ pr_err("%pd lookup err %ld\n",
34111+ base, PTR_ERR(path.dentry));
1facf9fc 34112+ goto out;
34113+ }
34114+
34115+ /* no need to mnt_want_write() since we call dentry_open() later */
4a4d8108 34116+ err = vfs_create(dir, path.dentry, S_IRUGO | S_IWUGO, NULL);
1facf9fc 34117+ if (unlikely(err)) {
34118+ file = ERR_PTR(err);
523b37e3 34119+ pr_err("%pd create err %d\n", base, err);
1facf9fc 34120+ goto out_dput;
34121+ }
34122+
c06a8ce3 34123+ path.mnt = base_file->f_path.mnt;
4a4d8108 34124+ file = vfsub_dentry_open(&path,
7f207e10 34125+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34126+ /* | __FMODE_NONOTIFY */);
1facf9fc 34127+ if (IS_ERR(file)) {
523b37e3 34128+ pr_err("%pd open err %ld\n", base, PTR_ERR(file));
1facf9fc 34129+ goto out_dput;
34130+ }
34131+
523b37e3
AM
34132+ delegated = NULL;
34133+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
34134+ if (unlikely(err == -EWOULDBLOCK)) {
34135+ pr_warn("cannot retry for NFSv4 delegation"
34136+ " for an internal unlink\n");
34137+ iput(delegated);
34138+ }
1facf9fc 34139+ if (unlikely(err)) {
523b37e3 34140+ pr_err("%pd unlink err %d\n", base, err);
1facf9fc 34141+ goto out_fput;
34142+ }
34143+
34144+ if (copy_src) {
34145+ /* no one can touch copy_src xino */
c06a8ce3 34146+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
1facf9fc 34147+ if (unlikely(err)) {
523b37e3 34148+ pr_err("%pd copy err %d\n", base, err);
1facf9fc 34149+ goto out_fput;
34150+ }
34151+ }
34152+ goto out_dput; /* success */
34153+
4f0767ce 34154+out_fput:
1facf9fc 34155+ fput(file);
34156+ file = ERR_PTR(err);
4f0767ce 34157+out_dput:
4a4d8108 34158+ dput(path.dentry);
4f0767ce 34159+out:
1facf9fc 34160+ return file;
34161+}
34162+
34163+struct au_xino_lock_dir {
34164+ struct au_hinode *hdir;
34165+ struct dentry *parent;
34166+ struct mutex *mtx;
34167+};
34168+
34169+static void au_xino_lock_dir(struct super_block *sb, struct file *xino,
34170+ struct au_xino_lock_dir *ldir)
34171+{
34172+ aufs_bindex_t brid, bindex;
34173+
34174+ ldir->hdir = NULL;
34175+ bindex = -1;
34176+ brid = au_xino_brid(sb);
34177+ if (brid >= 0)
34178+ bindex = au_br_index(sb, brid);
34179+ if (bindex >= 0) {
34180+ ldir->hdir = au_hi(sb->s_root->d_inode, bindex);
4a4d8108 34181+ au_hn_imtx_lock_nested(ldir->hdir, AuLsc_I_PARENT);
1facf9fc 34182+ } else {
34183+ ldir->parent = dget_parent(xino->f_dentry);
34184+ ldir->mtx = &ldir->parent->d_inode->i_mutex;
34185+ mutex_lock_nested(ldir->mtx, AuLsc_I_PARENT);
34186+ }
34187+}
34188+
34189+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
34190+{
34191+ if (ldir->hdir)
4a4d8108 34192+ au_hn_imtx_unlock(ldir->hdir);
1facf9fc 34193+ else {
34194+ mutex_unlock(ldir->mtx);
34195+ dput(ldir->parent);
34196+ }
34197+}
34198+
34199+/* ---------------------------------------------------------------------- */
34200+
34201+/* trucate xino files asynchronously */
34202+
34203+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
34204+{
34205+ int err;
392086de
AM
34206+ unsigned long jiffy;
34207+ blkcnt_t blocks;
1facf9fc 34208+ aufs_bindex_t bi, bend;
392086de 34209+ struct kstatfs *st;
1facf9fc 34210+ struct au_branch *br;
34211+ struct file *new_xino, *file;
34212+ struct super_block *h_sb;
34213+ struct au_xino_lock_dir ldir;
34214+
392086de 34215+ err = -ENOMEM;
7f2ca4b1 34216+ st = kmalloc(sizeof(*st), GFP_NOFS);
392086de
AM
34217+ if (unlikely(!st))
34218+ goto out;
34219+
1facf9fc 34220+ err = -EINVAL;
34221+ bend = au_sbend(sb);
34222+ if (unlikely(bindex < 0 || bend < bindex))
392086de 34223+ goto out_st;
1facf9fc 34224+ br = au_sbr(sb, bindex);
34225+ file = br->br_xino.xi_file;
34226+ if (!file)
392086de
AM
34227+ goto out_st;
34228+
34229+ err = vfs_statfs(&file->f_path, st);
34230+ if (unlikely(err))
34231+ AuErr1("statfs err %d, ignored\n", err);
34232+ jiffy = jiffies;
34233+ blocks = file_inode(file)->i_blocks;
34234+ pr_info("begin truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34235+ bindex, (u64)blocks, st->f_bfree, st->f_blocks);
1facf9fc 34236+
34237+ au_xino_lock_dir(sb, file, &ldir);
34238+ /* mnt_want_write() is unnecessary here */
34239+ new_xino = au_xino_create2(file, file);
34240+ au_xino_unlock_dir(&ldir);
34241+ err = PTR_ERR(new_xino);
392086de
AM
34242+ if (IS_ERR(new_xino)) {
34243+ pr_err("err %d, ignored\n", err);
34244+ goto out_st;
34245+ }
1facf9fc 34246+ err = 0;
34247+ fput(file);
34248+ br->br_xino.xi_file = new_xino;
34249+
86dc4139 34250+ h_sb = au_br_sb(br);
1facf9fc 34251+ for (bi = 0; bi <= bend; bi++) {
34252+ if (unlikely(bi == bindex))
34253+ continue;
34254+ br = au_sbr(sb, bi);
86dc4139 34255+ if (au_br_sb(br) != h_sb)
1facf9fc 34256+ continue;
34257+
34258+ fput(br->br_xino.xi_file);
34259+ br->br_xino.xi_file = new_xino;
34260+ get_file(new_xino);
34261+ }
34262+
392086de
AM
34263+ err = vfs_statfs(&new_xino->f_path, st);
34264+ if (!err) {
34265+ pr_info("end truncating xino(b%d), ib%llu, %llu/%llu free blks\n",
34266+ bindex, (u64)file_inode(new_xino)->i_blocks,
34267+ st->f_bfree, st->f_blocks);
34268+ if (file_inode(new_xino)->i_blocks < blocks)
34269+ au_sbi(sb)->si_xino_jiffy = jiffy;
34270+ } else
34271+ AuErr1("statfs err %d, ignored\n", err);
34272+
34273+out_st:
34274+ kfree(st);
4f0767ce 34275+out:
1facf9fc 34276+ return err;
34277+}
34278+
34279+struct xino_do_trunc_args {
34280+ struct super_block *sb;
34281+ struct au_branch *br;
34282+};
34283+
34284+static void xino_do_trunc(void *_args)
34285+{
34286+ struct xino_do_trunc_args *args = _args;
34287+ struct super_block *sb;
34288+ struct au_branch *br;
34289+ struct inode *dir;
34290+ int err;
34291+ aufs_bindex_t bindex;
34292+
34293+ err = 0;
34294+ sb = args->sb;
34295+ dir = sb->s_root->d_inode;
34296+ br = args->br;
34297+
34298+ si_noflush_write_lock(sb);
34299+ ii_read_lock_parent(dir);
34300+ bindex = au_br_index(sb, br->br_id);
34301+ err = au_xino_trunc(sb, bindex);
1facf9fc 34302+ ii_read_unlock(dir);
34303+ if (unlikely(err))
392086de 34304+ pr_warn("err b%d, (%d)\n", bindex, err);
1facf9fc 34305+ atomic_dec(&br->br_xino_running);
34306+ atomic_dec(&br->br_count);
1facf9fc 34307+ si_write_unlock(sb);
027c5e7a 34308+ au_nwt_done(&au_sbi(sb)->si_nowait);
1facf9fc 34309+ kfree(args);
34310+}
34311+
392086de
AM
34312+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
34313+{
34314+ int err;
34315+ struct kstatfs st;
34316+ struct au_sbinfo *sbinfo;
34317+
34318+ /* todo: si_xino_expire and the ratio should be customizable */
34319+ sbinfo = au_sbi(sb);
34320+ if (time_before(jiffies,
34321+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
34322+ return 0;
34323+
34324+ /* truncation border */
34325+ err = vfs_statfs(&br->br_xino.xi_file->f_path, &st);
34326+ if (unlikely(err)) {
34327+ AuErr1("statfs err %d, ignored\n", err);
34328+ return 0;
34329+ }
34330+ if (div64_u64(st.f_bfree * 100, st.f_blocks) >= AUFS_XINO_DEF_TRUNC)
34331+ return 0;
34332+
34333+ return 1;
34334+}
34335+
1facf9fc 34336+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
34337+{
34338+ struct xino_do_trunc_args *args;
34339+ int wkq_err;
34340+
392086de 34341+ if (!xino_trunc_test(sb, br))
1facf9fc 34342+ return;
34343+
34344+ if (atomic_inc_return(&br->br_xino_running) > 1)
34345+ goto out;
34346+
34347+ /* lock and kfree() will be called in trunc_xino() */
34348+ args = kmalloc(sizeof(*args), GFP_NOFS);
34349+ if (unlikely(!args)) {
34350+ AuErr1("no memory\n");
34351+ goto out_args;
34352+ }
34353+
e49829fe 34354+ atomic_inc(&br->br_count);
1facf9fc 34355+ args->sb = sb;
34356+ args->br = br;
53392da6 34357+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
1facf9fc 34358+ if (!wkq_err)
34359+ return; /* success */
34360+
4a4d8108 34361+ pr_err("wkq %d\n", wkq_err);
e49829fe 34362+ atomic_dec(&br->br_count);
1facf9fc 34363+
4f0767ce 34364+out_args:
1facf9fc 34365+ kfree(args);
4f0767ce 34366+out:
e49829fe 34367+ atomic_dec(&br->br_xino_running);
1facf9fc 34368+}
34369+
34370+/* ---------------------------------------------------------------------- */
34371+
34372+static int au_xino_do_write(au_writef_t write, struct file *file,
34373+ ino_t h_ino, ino_t ino)
34374+{
34375+ loff_t pos;
34376+ ssize_t sz;
34377+
34378+ pos = h_ino;
34379+ if (unlikely(au_loff_max / sizeof(ino) - 1 < pos)) {
34380+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34381+ return -EFBIG;
34382+ }
34383+ pos *= sizeof(ino);
34384+ sz = xino_fwrite(write, file, &ino, sizeof(ino), &pos);
34385+ if (sz == sizeof(ino))
34386+ return 0; /* success */
34387+
34388+ AuIOErr("write failed (%zd)\n", sz);
34389+ return -EIO;
34390+}
34391+
34392+/*
34393+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
34394+ * at the position of @h_ino.
34395+ * even if @ino is zero, it is written to the xinofile and means no entry.
34396+ * if the size of the xino file on a specific filesystem exceeds the watermark,
34397+ * try truncating it.
34398+ */
34399+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34400+ ino_t ino)
34401+{
34402+ int err;
34403+ unsigned int mnt_flags;
34404+ struct au_branch *br;
34405+
34406+ BUILD_BUG_ON(sizeof(long long) != sizeof(au_loff_max)
34407+ || ((loff_t)-1) > 0);
dece6358 34408+ SiMustAnyLock(sb);
1facf9fc 34409+
34410+ mnt_flags = au_mntflags(sb);
34411+ if (!au_opt_test(mnt_flags, XINO))
34412+ return 0;
34413+
34414+ br = au_sbr(sb, bindex);
34415+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34416+ h_ino, ino);
34417+ if (!err) {
34418+ if (au_opt_test(mnt_flags, TRUNC_XINO)
86dc4139 34419+ && au_test_fs_trunc_xino(au_br_sb(br)))
1facf9fc 34420+ xino_try_trunc(sb, br);
34421+ return 0; /* success */
34422+ }
34423+
34424+ AuIOErr("write failed (%d)\n", err);
34425+ return -EIO;
34426+}
34427+
34428+/* ---------------------------------------------------------------------- */
34429+
34430+/* aufs inode number bitmap */
34431+
34432+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
34433+static ino_t xib_calc_ino(unsigned long pindex, int bit)
34434+{
34435+ ino_t ino;
34436+
34437+ AuDebugOn(bit < 0 || page_bits <= bit);
34438+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
34439+ return ino;
34440+}
34441+
34442+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
34443+{
34444+ AuDebugOn(ino < AUFS_FIRST_INO);
34445+ ino -= AUFS_FIRST_INO;
34446+ *pindex = ino / page_bits;
34447+ *bit = ino % page_bits;
34448+}
34449+
34450+static int xib_pindex(struct super_block *sb, unsigned long pindex)
34451+{
34452+ int err;
34453+ loff_t pos;
34454+ ssize_t sz;
34455+ struct au_sbinfo *sbinfo;
34456+ struct file *xib;
34457+ unsigned long *p;
34458+
34459+ sbinfo = au_sbi(sb);
34460+ MtxMustLock(&sbinfo->si_xib_mtx);
34461+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
34462+ || !au_opt_test(sbinfo->si_mntflags, XINO));
34463+
34464+ if (pindex == sbinfo->si_xib_last_pindex)
34465+ return 0;
34466+
34467+ xib = sbinfo->si_xib;
34468+ p = sbinfo->si_xib_buf;
34469+ pos = sbinfo->si_xib_last_pindex;
34470+ pos *= PAGE_SIZE;
34471+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34472+ if (unlikely(sz != PAGE_SIZE))
34473+ goto out;
34474+
34475+ pos = pindex;
34476+ pos *= PAGE_SIZE;
c06a8ce3 34477+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
1facf9fc 34478+ sz = xino_fread(sbinfo->si_xread, xib, p, PAGE_SIZE, &pos);
34479+ else {
34480+ memset(p, 0, PAGE_SIZE);
34481+ sz = xino_fwrite(sbinfo->si_xwrite, xib, p, PAGE_SIZE, &pos);
34482+ }
34483+ if (sz == PAGE_SIZE) {
34484+ sbinfo->si_xib_last_pindex = pindex;
34485+ return 0; /* success */
34486+ }
34487+
4f0767ce 34488+out:
b752ccd1
AM
34489+ AuIOErr1("write failed (%zd)\n", sz);
34490+ err = sz;
34491+ if (sz >= 0)
34492+ err = -EIO;
34493+ return err;
34494+}
34495+
34496+/* ---------------------------------------------------------------------- */
34497+
34498+static void au_xib_clear_bit(struct inode *inode)
34499+{
34500+ int err, bit;
34501+ unsigned long pindex;
34502+ struct super_block *sb;
34503+ struct au_sbinfo *sbinfo;
34504+
34505+ AuDebugOn(inode->i_nlink);
34506+
34507+ sb = inode->i_sb;
34508+ xib_calc_bit(inode->i_ino, &pindex, &bit);
34509+ AuDebugOn(page_bits <= bit);
34510+ sbinfo = au_sbi(sb);
34511+ mutex_lock(&sbinfo->si_xib_mtx);
34512+ err = xib_pindex(sb, pindex);
34513+ if (!err) {
34514+ clear_bit(bit, sbinfo->si_xib_buf);
34515+ sbinfo->si_xib_next_bit = bit;
34516+ }
34517+ mutex_unlock(&sbinfo->si_xib_mtx);
34518+}
34519+
34520+/* for s_op->delete_inode() */
34521+void au_xino_delete_inode(struct inode *inode, const int unlinked)
34522+{
34523+ int err;
34524+ unsigned int mnt_flags;
34525+ aufs_bindex_t bindex, bend, bi;
34526+ unsigned char try_trunc;
34527+ struct au_iinfo *iinfo;
34528+ struct super_block *sb;
34529+ struct au_hinode *hi;
34530+ struct inode *h_inode;
34531+ struct au_branch *br;
34532+ au_writef_t xwrite;
34533+
34534+ sb = inode->i_sb;
34535+ mnt_flags = au_mntflags(sb);
34536+ if (!au_opt_test(mnt_flags, XINO)
34537+ || inode->i_ino == AUFS_ROOT_INO)
34538+ return;
34539+
34540+ if (unlinked) {
34541+ au_xigen_inc(inode);
34542+ au_xib_clear_bit(inode);
34543+ }
34544+
34545+ iinfo = au_ii(inode);
34546+ if (!iinfo)
34547+ return;
1facf9fc 34548+
b752ccd1
AM
34549+ bindex = iinfo->ii_bstart;
34550+ if (bindex < 0)
34551+ return;
1facf9fc 34552+
b752ccd1
AM
34553+ xwrite = au_sbi(sb)->si_xwrite;
34554+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
34555+ hi = iinfo->ii_hinode + bindex;
34556+ bend = iinfo->ii_bend;
34557+ for (; bindex <= bend; bindex++, hi++) {
34558+ h_inode = hi->hi_inode;
34559+ if (!h_inode
34560+ || (!unlinked && h_inode->i_nlink))
34561+ continue;
1facf9fc 34562+
b752ccd1
AM
34563+ /* inode may not be revalidated */
34564+ bi = au_br_index(sb, hi->hi_id);
34565+ if (bi < 0)
34566+ continue;
1facf9fc 34567+
b752ccd1
AM
34568+ br = au_sbr(sb, bi);
34569+ err = au_xino_do_write(xwrite, br->br_xino.xi_file,
34570+ h_inode->i_ino, /*ino*/0);
34571+ if (!err && try_trunc
86dc4139 34572+ && au_test_fs_trunc_xino(au_br_sb(br)))
b752ccd1 34573+ xino_try_trunc(sb, br);
1facf9fc 34574+ }
1facf9fc 34575+}
34576+
34577+/* get an unused inode number from bitmap */
34578+ino_t au_xino_new_ino(struct super_block *sb)
34579+{
34580+ ino_t ino;
34581+ unsigned long *p, pindex, ul, pend;
34582+ struct au_sbinfo *sbinfo;
34583+ struct file *file;
34584+ int free_bit, err;
34585+
34586+ if (!au_opt_test(au_mntflags(sb), XINO))
34587+ return iunique(sb, AUFS_FIRST_INO);
34588+
34589+ sbinfo = au_sbi(sb);
34590+ mutex_lock(&sbinfo->si_xib_mtx);
34591+ p = sbinfo->si_xib_buf;
34592+ free_bit = sbinfo->si_xib_next_bit;
34593+ if (free_bit < page_bits && !test_bit(free_bit, p))
34594+ goto out; /* success */
34595+ free_bit = find_first_zero_bit(p, page_bits);
34596+ if (free_bit < page_bits)
34597+ goto out; /* success */
34598+
34599+ pindex = sbinfo->si_xib_last_pindex;
34600+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
34601+ err = xib_pindex(sb, ul);
34602+ if (unlikely(err))
34603+ goto out_err;
34604+ free_bit = find_first_zero_bit(p, page_bits);
34605+ if (free_bit < page_bits)
34606+ goto out; /* success */
34607+ }
34608+
34609+ file = sbinfo->si_xib;
c06a8ce3 34610+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
1facf9fc 34611+ for (ul = pindex + 1; ul <= pend; ul++) {
34612+ err = xib_pindex(sb, ul);
34613+ if (unlikely(err))
34614+ goto out_err;
34615+ free_bit = find_first_zero_bit(p, page_bits);
34616+ if (free_bit < page_bits)
34617+ goto out; /* success */
34618+ }
34619+ BUG();
34620+
4f0767ce 34621+out:
1facf9fc 34622+ set_bit(free_bit, p);
7f207e10 34623+ sbinfo->si_xib_next_bit = free_bit + 1;
1facf9fc 34624+ pindex = sbinfo->si_xib_last_pindex;
34625+ mutex_unlock(&sbinfo->si_xib_mtx);
34626+ ino = xib_calc_ino(pindex, free_bit);
34627+ AuDbg("i%lu\n", (unsigned long)ino);
34628+ return ino;
4f0767ce 34629+out_err:
1facf9fc 34630+ mutex_unlock(&sbinfo->si_xib_mtx);
34631+ AuDbg("i0\n");
34632+ return 0;
34633+}
34634+
34635+/*
34636+ * read @ino from xinofile for the specified branch{@sb, @bindex}
34637+ * at the position of @h_ino.
34638+ * if @ino does not exist and @do_new is true, get new one.
34639+ */
34640+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
34641+ ino_t *ino)
34642+{
34643+ int err;
34644+ ssize_t sz;
34645+ loff_t pos;
34646+ struct file *file;
34647+ struct au_sbinfo *sbinfo;
34648+
34649+ *ino = 0;
34650+ if (!au_opt_test(au_mntflags(sb), XINO))
34651+ return 0; /* no xino */
34652+
34653+ err = 0;
34654+ sbinfo = au_sbi(sb);
34655+ pos = h_ino;
34656+ if (unlikely(au_loff_max / sizeof(*ino) - 1 < pos)) {
34657+ AuIOErr1("too large hi%lu\n", (unsigned long)h_ino);
34658+ return -EFBIG;
34659+ }
34660+ pos *= sizeof(*ino);
34661+
34662+ file = au_sbr(sb, bindex)->br_xino.xi_file;
c06a8ce3 34663+ if (vfsub_f_size_read(file) < pos + sizeof(*ino))
1facf9fc 34664+ return 0; /* no ino */
34665+
34666+ sz = xino_fread(sbinfo->si_xread, file, ino, sizeof(*ino), &pos);
34667+ if (sz == sizeof(*ino))
34668+ return 0; /* success */
34669+
34670+ err = sz;
34671+ if (unlikely(sz >= 0)) {
34672+ err = -EIO;
34673+ AuIOErr("xino read error (%zd)\n", sz);
34674+ }
34675+
34676+ return err;
34677+}
34678+
34679+/* ---------------------------------------------------------------------- */
34680+
34681+/* create and set a new xino file */
34682+
34683+struct file *au_xino_create(struct super_block *sb, char *fname, int silent)
34684+{
34685+ struct file *file;
34686+ struct dentry *h_parent, *d;
7f2ca4b1 34687+ struct inode *h_dir, *inode;
1facf9fc 34688+ int err;
34689+
34690+ /*
34691+ * at mount-time, and the xino file is the default path,
4a4d8108 34692+ * hnotify is disabled so we have no notify events to ignore.
1facf9fc 34693+ * when a user specified the xino, we cannot get au_hdir to be ignored.
34694+ */
7f207e10 34695+ file = vfsub_filp_open(fname, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
2cbb1c4b 34696+ /* | __FMODE_NONOTIFY */,
1facf9fc 34697+ S_IRUGO | S_IWUGO);
34698+ if (IS_ERR(file)) {
34699+ if (!silent)
4a4d8108 34700+ pr_err("open %s(%ld)\n", fname, PTR_ERR(file));
1facf9fc 34701+ return file;
34702+ }
34703+
34704+ /* keep file count */
7f2ca4b1
JR
34705+ err = 0;
34706+ inode = file_inode(file);
1facf9fc 34707+ h_parent = dget_parent(file->f_dentry);
34708+ h_dir = h_parent->d_inode;
34709+ mutex_lock_nested(&h_dir->i_mutex, AuLsc_I_PARENT);
34710+ /* mnt_want_write() is unnecessary here */
523b37e3 34711+ /* no delegation since it is just created */
7f2ca4b1
JR
34712+ if (inode->i_nlink)
34713+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
34714+ /*force*/0);
1facf9fc 34715+ mutex_unlock(&h_dir->i_mutex);
34716+ dput(h_parent);
34717+ if (unlikely(err)) {
34718+ if (!silent)
4a4d8108 34719+ pr_err("unlink %s(%d)\n", fname, err);
1facf9fc 34720+ goto out;
34721+ }
34722+
34723+ err = -EINVAL;
34724+ d = file->f_dentry;
34725+ if (unlikely(sb == d->d_sb)) {
34726+ if (!silent)
4a4d8108 34727+ pr_err("%s must be outside\n", fname);
1facf9fc 34728+ goto out;
34729+ }
34730+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
34731+ if (!silent)
4a4d8108
AM
34732+ pr_err("xino doesn't support %s(%s)\n",
34733+ fname, au_sbtype(d->d_sb));
1facf9fc 34734+ goto out;
34735+ }
34736+ return file; /* success */
34737+
4f0767ce 34738+out:
1facf9fc 34739+ fput(file);
34740+ file = ERR_PTR(err);
34741+ return file;
34742+}
34743+
34744+/*
34745+ * find another branch who is on the same filesystem of the specified
34746+ * branch{@btgt}. search until @bend.
34747+ */
34748+static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
34749+ aufs_bindex_t bend)
34750+{
34751+ aufs_bindex_t bindex;
34752+ struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
34753+
34754+ for (bindex = 0; bindex < btgt; bindex++)
34755+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34756+ return bindex;
34757+ for (bindex++; bindex <= bend; bindex++)
34758+ if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
34759+ return bindex;
34760+ return -1;
34761+}
34762+
34763+/* ---------------------------------------------------------------------- */
34764+
34765+/*
34766+ * initialize the xinofile for the specified branch @br
34767+ * at the place/path where @base_file indicates.
34768+ * test whether another branch is on the same filesystem or not,
34769+ * if @do_test is true.
34770+ */
34771+int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
34772+ struct file *base_file, int do_test)
34773+{
34774+ int err;
34775+ ino_t ino;
34776+ aufs_bindex_t bend, bindex;
34777+ struct au_branch *shared_br, *b;
34778+ struct file *file;
34779+ struct super_block *tgt_sb;
34780+
34781+ shared_br = NULL;
34782+ bend = au_sbend(sb);
34783+ if (do_test) {
86dc4139 34784+ tgt_sb = au_br_sb(br);
1facf9fc 34785+ for (bindex = 0; bindex <= bend; bindex++) {
34786+ b = au_sbr(sb, bindex);
86dc4139 34787+ if (tgt_sb == au_br_sb(b)) {
1facf9fc 34788+ shared_br = b;
34789+ break;
34790+ }
34791+ }
34792+ }
34793+
34794+ if (!shared_br || !shared_br->br_xino.xi_file) {
34795+ struct au_xino_lock_dir ldir;
34796+
34797+ au_xino_lock_dir(sb, base_file, &ldir);
34798+ /* mnt_want_write() is unnecessary here */
34799+ file = au_xino_create2(base_file, NULL);
34800+ au_xino_unlock_dir(&ldir);
34801+ err = PTR_ERR(file);
34802+ if (IS_ERR(file))
34803+ goto out;
34804+ br->br_xino.xi_file = file;
34805+ } else {
34806+ br->br_xino.xi_file = shared_br->br_xino.xi_file;
34807+ get_file(br->br_xino.xi_file);
34808+ }
34809+
34810+ ino = AUFS_ROOT_INO;
34811+ err = au_xino_do_write(au_sbi(sb)->si_xwrite, br->br_xino.xi_file,
34812+ h_ino, ino);
b752ccd1
AM
34813+ if (unlikely(err)) {
34814+ fput(br->br_xino.xi_file);
34815+ br->br_xino.xi_file = NULL;
34816+ }
1facf9fc 34817+
4f0767ce 34818+out:
1facf9fc 34819+ return err;
34820+}
34821+
34822+/* ---------------------------------------------------------------------- */
34823+
34824+/* trucate a xino bitmap file */
34825+
34826+/* todo: slow */
34827+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
34828+{
34829+ int err, bit;
34830+ ssize_t sz;
34831+ unsigned long pindex;
34832+ loff_t pos, pend;
34833+ struct au_sbinfo *sbinfo;
34834+ au_readf_t func;
34835+ ino_t *ino;
34836+ unsigned long *p;
34837+
34838+ err = 0;
34839+ sbinfo = au_sbi(sb);
dece6358 34840+ MtxMustLock(&sbinfo->si_xib_mtx);
1facf9fc 34841+ p = sbinfo->si_xib_buf;
34842+ func = sbinfo->si_xread;
c06a8ce3 34843+ pend = vfsub_f_size_read(file);
1facf9fc 34844+ pos = 0;
34845+ while (pos < pend) {
34846+ sz = xino_fread(func, file, page, PAGE_SIZE, &pos);
34847+ err = sz;
34848+ if (unlikely(sz <= 0))
34849+ goto out;
34850+
34851+ err = 0;
34852+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
34853+ if (unlikely(*ino < AUFS_FIRST_INO))
34854+ continue;
34855+
34856+ xib_calc_bit(*ino, &pindex, &bit);
34857+ AuDebugOn(page_bits <= bit);
34858+ err = xib_pindex(sb, pindex);
34859+ if (!err)
34860+ set_bit(bit, p);
34861+ else
34862+ goto out;
34863+ }
34864+ }
34865+
4f0767ce 34866+out:
1facf9fc 34867+ return err;
34868+}
34869+
34870+static int xib_restore(struct super_block *sb)
34871+{
34872+ int err;
34873+ aufs_bindex_t bindex, bend;
34874+ void *page;
34875+
34876+ err = -ENOMEM;
34877+ page = (void *)__get_free_page(GFP_NOFS);
34878+ if (unlikely(!page))
34879+ goto out;
34880+
34881+ err = 0;
34882+ bend = au_sbend(sb);
34883+ for (bindex = 0; !err && bindex <= bend; bindex++)
34884+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
34885+ err = do_xib_restore
34886+ (sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
34887+ else
34888+ AuDbg("b%d\n", bindex);
34889+ free_page((unsigned long)page);
34890+
4f0767ce 34891+out:
1facf9fc 34892+ return err;
34893+}
34894+
34895+int au_xib_trunc(struct super_block *sb)
34896+{
34897+ int err;
34898+ ssize_t sz;
34899+ loff_t pos;
34900+ struct au_xino_lock_dir ldir;
34901+ struct au_sbinfo *sbinfo;
34902+ unsigned long *p;
34903+ struct file *file;
34904+
dece6358
AM
34905+ SiMustWriteLock(sb);
34906+
1facf9fc 34907+ err = 0;
34908+ sbinfo = au_sbi(sb);
34909+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
34910+ goto out;
34911+
34912+ file = sbinfo->si_xib;
c06a8ce3 34913+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
1facf9fc 34914+ goto out;
34915+
34916+ au_xino_lock_dir(sb, file, &ldir);
34917+ /* mnt_want_write() is unnecessary here */
34918+ file = au_xino_create2(sbinfo->si_xib, NULL);
34919+ au_xino_unlock_dir(&ldir);
34920+ err = PTR_ERR(file);
34921+ if (IS_ERR(file))
34922+ goto out;
34923+ fput(sbinfo->si_xib);
34924+ sbinfo->si_xib = file;
34925+
34926+ p = sbinfo->si_xib_buf;
34927+ memset(p, 0, PAGE_SIZE);
34928+ pos = 0;
34929+ sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xib, p, PAGE_SIZE, &pos);
34930+ if (unlikely(sz != PAGE_SIZE)) {
34931+ err = sz;
34932+ AuIOErr("err %d\n", err);
34933+ if (sz >= 0)
34934+ err = -EIO;
34935+ goto out;
34936+ }
34937+
34938+ mutex_lock(&sbinfo->si_xib_mtx);
34939+ /* mnt_want_write() is unnecessary here */
34940+ err = xib_restore(sb);
34941+ mutex_unlock(&sbinfo->si_xib_mtx);
34942+
34943+out:
34944+ return err;
34945+}
34946+
34947+/* ---------------------------------------------------------------------- */
34948+
34949+/*
34950+ * xino mount option handlers
34951+ */
34952+static au_readf_t find_readf(struct file *h_file)
34953+{
34954+ const struct file_operations *fop = h_file->f_op;
34955+
523b37e3
AM
34956+ if (fop->read)
34957+ return fop->read;
34958+ if (fop->aio_read)
34959+ return do_sync_read;
076b876e
AM
34960+ if (fop->read_iter)
34961+ return new_sync_read;
1facf9fc 34962+ return ERR_PTR(-ENOSYS);
34963+}
34964+
34965+static au_writef_t find_writef(struct file *h_file)
34966+{
34967+ const struct file_operations *fop = h_file->f_op;
34968+
523b37e3
AM
34969+ if (fop->write)
34970+ return fop->write;
34971+ if (fop->aio_write)
34972+ return do_sync_write;
076b876e
AM
34973+ if (fop->write_iter)
34974+ return new_sync_write;
1facf9fc 34975+ return ERR_PTR(-ENOSYS);
34976+}
34977+
34978+/* xino bitmap */
34979+static void xino_clear_xib(struct super_block *sb)
34980+{
34981+ struct au_sbinfo *sbinfo;
34982+
dece6358
AM
34983+ SiMustWriteLock(sb);
34984+
1facf9fc 34985+ sbinfo = au_sbi(sb);
34986+ sbinfo->si_xread = NULL;
34987+ sbinfo->si_xwrite = NULL;
34988+ if (sbinfo->si_xib)
34989+ fput(sbinfo->si_xib);
34990+ sbinfo->si_xib = NULL;
34991+ free_page((unsigned long)sbinfo->si_xib_buf);
34992+ sbinfo->si_xib_buf = NULL;
34993+}
34994+
34995+static int au_xino_set_xib(struct super_block *sb, struct file *base)
34996+{
34997+ int err;
34998+ loff_t pos;
34999+ struct au_sbinfo *sbinfo;
35000+ struct file *file;
35001+
dece6358
AM
35002+ SiMustWriteLock(sb);
35003+
1facf9fc 35004+ sbinfo = au_sbi(sb);
35005+ file = au_xino_create2(base, sbinfo->si_xib);
35006+ err = PTR_ERR(file);
35007+ if (IS_ERR(file))
35008+ goto out;
35009+ if (sbinfo->si_xib)
35010+ fput(sbinfo->si_xib);
35011+ sbinfo->si_xib = file;
35012+ sbinfo->si_xread = find_readf(file);
35013+ sbinfo->si_xwrite = find_writef(file);
35014+
35015+ err = -ENOMEM;
35016+ if (!sbinfo->si_xib_buf)
35017+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
35018+ if (unlikely(!sbinfo->si_xib_buf))
35019+ goto out_unset;
35020+
35021+ sbinfo->si_xib_last_pindex = 0;
35022+ sbinfo->si_xib_next_bit = 0;
c06a8ce3 35023+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
1facf9fc 35024+ pos = 0;
35025+ err = xino_fwrite(sbinfo->si_xwrite, file, sbinfo->si_xib_buf,
35026+ PAGE_SIZE, &pos);
35027+ if (unlikely(err != PAGE_SIZE))
35028+ goto out_free;
35029+ }
35030+ err = 0;
35031+ goto out; /* success */
35032+
4f0767ce 35033+out_free:
1facf9fc 35034+ free_page((unsigned long)sbinfo->si_xib_buf);
b752ccd1
AM
35035+ sbinfo->si_xib_buf = NULL;
35036+ if (err >= 0)
35037+ err = -EIO;
4f0767ce 35038+out_unset:
b752ccd1
AM
35039+ fput(sbinfo->si_xib);
35040+ sbinfo->si_xib = NULL;
35041+ sbinfo->si_xread = NULL;
35042+ sbinfo->si_xwrite = NULL;
4f0767ce 35043+out:
b752ccd1 35044+ return err;
1facf9fc 35045+}
35046+
b752ccd1
AM
35047+/* xino for each branch */
35048+static void xino_clear_br(struct super_block *sb)
35049+{
35050+ aufs_bindex_t bindex, bend;
35051+ struct au_branch *br;
1facf9fc 35052+
b752ccd1
AM
35053+ bend = au_sbend(sb);
35054+ for (bindex = 0; bindex <= bend; bindex++) {
35055+ br = au_sbr(sb, bindex);
35056+ if (!br || !br->br_xino.xi_file)
35057+ continue;
35058+
35059+ fput(br->br_xino.xi_file);
35060+ br->br_xino.xi_file = NULL;
35061+ }
35062+}
35063+
35064+static int au_xino_set_br(struct super_block *sb, struct file *base)
1facf9fc 35065+{
35066+ int err;
b752ccd1
AM
35067+ ino_t ino;
35068+ aufs_bindex_t bindex, bend, bshared;
35069+ struct {
35070+ struct file *old, *new;
35071+ } *fpair, *p;
35072+ struct au_branch *br;
35073+ struct inode *inode;
35074+ au_writef_t writef;
1facf9fc 35075+
b752ccd1
AM
35076+ SiMustWriteLock(sb);
35077+
35078+ err = -ENOMEM;
35079+ bend = au_sbend(sb);
35080+ fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
35081+ if (unlikely(!fpair))
1facf9fc 35082+ goto out;
35083+
b752ccd1
AM
35084+ inode = sb->s_root->d_inode;
35085+ ino = AUFS_ROOT_INO;
35086+ writef = au_sbi(sb)->si_xwrite;
35087+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
35088+ br = au_sbr(sb, bindex);
35089+ bshared = is_sb_shared(sb, bindex, bindex - 1);
35090+ if (bshared >= 0) {
35091+ /* shared xino */
35092+ *p = fpair[bshared];
35093+ get_file(p->new);
35094+ }
35095+
35096+ if (!p->new) {
35097+ /* new xino */
35098+ p->old = br->br_xino.xi_file;
35099+ p->new = au_xino_create2(base, br->br_xino.xi_file);
35100+ err = PTR_ERR(p->new);
35101+ if (IS_ERR(p->new)) {
35102+ p->new = NULL;
35103+ goto out_pair;
35104+ }
35105+ }
35106+
35107+ err = au_xino_do_write(writef, p->new,
35108+ au_h_iptr(inode, bindex)->i_ino, ino);
35109+ if (unlikely(err))
35110+ goto out_pair;
35111+ }
35112+
35113+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
35114+ br = au_sbr(sb, bindex);
35115+ if (br->br_xino.xi_file)
35116+ fput(br->br_xino.xi_file);
35117+ get_file(p->new);
35118+ br->br_xino.xi_file = p->new;
35119+ }
1facf9fc 35120+
4f0767ce 35121+out_pair:
b752ccd1
AM
35122+ for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
35123+ if (p->new)
35124+ fput(p->new);
35125+ else
35126+ break;
35127+ kfree(fpair);
4f0767ce 35128+out:
1facf9fc 35129+ return err;
35130+}
b752ccd1
AM
35131+
35132+void au_xino_clr(struct super_block *sb)
35133+{
35134+ struct au_sbinfo *sbinfo;
35135+
35136+ au_xigen_clr(sb);
35137+ xino_clear_xib(sb);
35138+ xino_clear_br(sb);
35139+ sbinfo = au_sbi(sb);
35140+ /* lvalue, do not call au_mntflags() */
35141+ au_opt_clr(sbinfo->si_mntflags, XINO);
35142+}
35143+
35144+int au_xino_set(struct super_block *sb, struct au_opt_xino *xino, int remount)
35145+{
35146+ int err, skip;
35147+ struct dentry *parent, *cur_parent;
35148+ struct qstr *dname, *cur_name;
35149+ struct file *cur_xino;
35150+ struct inode *dir;
35151+ struct au_sbinfo *sbinfo;
35152+
35153+ SiMustWriteLock(sb);
35154+
35155+ err = 0;
35156+ sbinfo = au_sbi(sb);
35157+ parent = dget_parent(xino->file->f_dentry);
35158+ if (remount) {
35159+ skip = 0;
35160+ dname = &xino->file->f_dentry->d_name;
35161+ cur_xino = sbinfo->si_xib;
35162+ if (cur_xino) {
35163+ cur_parent = dget_parent(cur_xino->f_dentry);
35164+ cur_name = &cur_xino->f_dentry->d_name;
35165+ skip = (cur_parent == parent
38d290e6 35166+ && au_qstreq(dname, cur_name));
b752ccd1
AM
35167+ dput(cur_parent);
35168+ }
35169+ if (skip)
35170+ goto out;
35171+ }
35172+
35173+ au_opt_set(sbinfo->si_mntflags, XINO);
35174+ dir = parent->d_inode;
35175+ mutex_lock_nested(&dir->i_mutex, AuLsc_I_PARENT);
35176+ /* mnt_want_write() is unnecessary here */
35177+ err = au_xino_set_xib(sb, xino->file);
35178+ if (!err)
35179+ err = au_xigen_set(sb, xino->file);
35180+ if (!err)
35181+ err = au_xino_set_br(sb, xino->file);
35182+ mutex_unlock(&dir->i_mutex);
35183+ if (!err)
35184+ goto out; /* success */
35185+
35186+ /* reset all */
35187+ AuIOErr("failed creating xino(%d).\n", err);
c1595e42
JR
35188+ au_xigen_clr(sb);
35189+ xino_clear_xib(sb);
b752ccd1 35190+
4f0767ce 35191+out:
b752ccd1
AM
35192+ dput(parent);
35193+ return err;
35194+}
35195+
35196+/* ---------------------------------------------------------------------- */
35197+
35198+/*
35199+ * create a xinofile at the default place/path.
35200+ */
35201+struct file *au_xino_def(struct super_block *sb)
35202+{
35203+ struct file *file;
35204+ char *page, *p;
35205+ struct au_branch *br;
35206+ struct super_block *h_sb;
35207+ struct path path;
35208+ aufs_bindex_t bend, bindex, bwr;
35209+
35210+ br = NULL;
35211+ bend = au_sbend(sb);
35212+ bwr = -1;
35213+ for (bindex = 0; bindex <= bend; bindex++) {
35214+ br = au_sbr(sb, bindex);
35215+ if (au_br_writable(br->br_perm)
86dc4139 35216+ && !au_test_fs_bad_xino(au_br_sb(br))) {
b752ccd1
AM
35217+ bwr = bindex;
35218+ break;
35219+ }
35220+ }
35221+
7f207e10
AM
35222+ if (bwr >= 0) {
35223+ file = ERR_PTR(-ENOMEM);
537831f9 35224+ page = (void *)__get_free_page(GFP_NOFS);
7f207e10
AM
35225+ if (unlikely(!page))
35226+ goto out;
86dc4139 35227+ path.mnt = au_br_mnt(br);
7f207e10
AM
35228+ path.dentry = au_h_dptr(sb->s_root, bwr);
35229+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
35230+ file = (void *)p;
35231+ if (!IS_ERR(p)) {
35232+ strcat(p, "/" AUFS_XINO_FNAME);
35233+ AuDbg("%s\n", p);
35234+ file = au_xino_create(sb, p, /*silent*/0);
35235+ if (!IS_ERR(file))
35236+ au_xino_brid_set(sb, br->br_id);
35237+ }
537831f9 35238+ free_page((unsigned long)page);
7f207e10
AM
35239+ } else {
35240+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0);
35241+ if (IS_ERR(file))
35242+ goto out;
35243+ h_sb = file->f_dentry->d_sb;
35244+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
35245+ pr_err("xino doesn't support %s(%s)\n",
35246+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
35247+ fput(file);
35248+ file = ERR_PTR(-EINVAL);
35249+ }
35250+ if (!IS_ERR(file))
35251+ au_xino_brid_set(sb, -1);
35252+ }
0c5527e5 35253+
7f207e10
AM
35254+out:
35255+ return file;
35256+}
35257+
35258+/* ---------------------------------------------------------------------- */
35259+
35260+int au_xino_path(struct seq_file *seq, struct file *file)
35261+{
35262+ int err;
35263+
35264+ err = au_seq_path(seq, &file->f_path);
7f2ca4b1 35265+ if (unlikely(err))
7f207e10
AM
35266+ goto out;
35267+
7f207e10
AM
35268+#define Deleted "\\040(deleted)"
35269+ seq->count -= sizeof(Deleted) - 1;
35270+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
35271+ sizeof(Deleted) - 1));
35272+#undef Deleted
35273+
35274+out:
35275+ return err;
35276+}
537831f9
AM
35277diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
35278--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
7f2ca4b1 35279+++ linux/include/uapi/linux/aufs_type.h 2016-02-28 12:40:45.727721922 +0100
c1595e42 35280@@ -0,0 +1,419 @@
7f207e10 35281+/*
7f2ca4b1 35282+ * Copyright (C) 2005-2016 Junjiro R. Okajima
7f207e10
AM
35283+ *
35284+ * This program, aufs is free software; you can redistribute it and/or modify
35285+ * it under the terms of the GNU General Public License as published by
35286+ * the Free Software Foundation; either version 2 of the License, or
35287+ * (at your option) any later version.
35288+ *
35289+ * This program is distributed in the hope that it will be useful,
35290+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35291+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35292+ * GNU General Public License for more details.
35293+ *
35294+ * You should have received a copy of the GNU General Public License
523b37e3 35295+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7f207e10
AM
35296+ */
35297+
35298+#ifndef __AUFS_TYPE_H__
35299+#define __AUFS_TYPE_H__
35300+
f6c5ef8b
AM
35301+#define AUFS_NAME "aufs"
35302+
9dbd164d 35303+#ifdef __KERNEL__
f6c5ef8b
AM
35304+/*
35305+ * define it before including all other headers.
35306+ * sched.h may use pr_* macros before defining "current", so define the
35307+ * no-current version first, and re-define later.
35308+ */
35309+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
35310+#include <linux/sched.h>
35311+#undef pr_fmt
a2a7ad62
AM
35312+#define pr_fmt(fmt) \
35313+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
35314+ (int)sizeof(current->comm), current->comm, current->pid
9dbd164d
AM
35315+#else
35316+#include <stdint.h>
35317+#include <sys/types.h>
f6c5ef8b 35318+#endif /* __KERNEL__ */
7f207e10 35319+
f6c5ef8b
AM
35320+#include <linux/limits.h>
35321+
7f2ca4b1 35322+#define AUFS_VERSION "3.18.25+-20160223"
7f207e10
AM
35323+
35324+/* todo? move this to linux-2.6.19/include/magic.h */
35325+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
35326+
35327+/* ---------------------------------------------------------------------- */
35328+
35329+#ifdef CONFIG_AUFS_BRANCH_MAX_127
9dbd164d 35330+typedef int8_t aufs_bindex_t;
7f207e10
AM
35331+#define AUFS_BRANCH_MAX 127
35332+#else
9dbd164d 35333+typedef int16_t aufs_bindex_t;
7f207e10
AM
35334+#ifdef CONFIG_AUFS_BRANCH_MAX_511
35335+#define AUFS_BRANCH_MAX 511
35336+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
35337+#define AUFS_BRANCH_MAX 1023
35338+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
35339+#define AUFS_BRANCH_MAX 32767
35340+#endif
35341+#endif
35342+
35343+#ifdef __KERNEL__
35344+#ifndef AUFS_BRANCH_MAX
35345+#error unknown CONFIG_AUFS_BRANCH_MAX value
35346+#endif
35347+#endif /* __KERNEL__ */
35348+
35349+/* ---------------------------------------------------------------------- */
35350+
7f207e10
AM
35351+#define AUFS_FSTYPE AUFS_NAME
35352+
35353+#define AUFS_ROOT_INO 2
35354+#define AUFS_FIRST_INO 11
35355+
35356+#define AUFS_WH_PFX ".wh."
35357+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
35358+#define AUFS_WH_TMP_LEN 4
86dc4139 35359+/* a limit for rmdir/rename a dir and copyup */
7f207e10
AM
35360+#define AUFS_MAX_NAMELEN (NAME_MAX \
35361+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
35362+ - 1 /* dot */\
35363+ - AUFS_WH_TMP_LEN) /* hex */
35364+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
35365+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
392086de
AM
35366+#define AUFS_XINO_DEF_SEC 30 /* seconds */
35367+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
7f207e10
AM
35368+#define AUFS_DIRWH_DEF 3
35369+#define AUFS_RDCACHE_DEF 10 /* seconds */
027c5e7a 35370+#define AUFS_RDCACHE_MAX 3600 /* seconds */
7f207e10
AM
35371+#define AUFS_RDBLK_DEF 512 /* bytes */
35372+#define AUFS_RDHASH_DEF 32
35373+#define AUFS_WKQ_NAME AUFS_NAME "d"
027c5e7a
AM
35374+#define AUFS_MFS_DEF_SEC 30 /* seconds */
35375+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
076b876e 35376+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
86dc4139 35377+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
7f207e10
AM
35378+
35379+/* pseudo-link maintenace under /proc */
35380+#define AUFS_PLINK_MAINT_NAME "plink_maint"
35381+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
35382+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
35383+
35384+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
35385+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
35386+
35387+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
35388+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
35389+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
35390+
35391+/* doubly whiteouted */
35392+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
35393+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
35394+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
35395+
1e00d052 35396+/* branch permissions and attributes */
7f207e10
AM
35397+#define AUFS_BRPERM_RW "rw"
35398+#define AUFS_BRPERM_RO "ro"
35399+#define AUFS_BRPERM_RR "rr"
076b876e
AM
35400+#define AUFS_BRATTR_COO_REG "coo_reg"
35401+#define AUFS_BRATTR_COO_ALL "coo_all"
35402+#define AUFS_BRATTR_FHSM "fhsm"
35403+#define AUFS_BRATTR_UNPIN "unpin"
c1595e42
JR
35404+#define AUFS_BRATTR_ICEX "icex"
35405+#define AUFS_BRATTR_ICEX_SEC "icexsec"
35406+#define AUFS_BRATTR_ICEX_SYS "icexsys"
35407+#define AUFS_BRATTR_ICEX_TR "icextr"
35408+#define AUFS_BRATTR_ICEX_USR "icexusr"
35409+#define AUFS_BRATTR_ICEX_OTH "icexoth"
1e00d052
AM
35410+#define AUFS_BRRATTR_WH "wh"
35411+#define AUFS_BRWATTR_NLWH "nolwh"
076b876e
AM
35412+#define AUFS_BRWATTR_MOO "moo"
35413+
35414+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
35415+#define AuBrPerm_RO (1 << 1) /* readonly */
35416+#define AuBrPerm_RR (1 << 2) /* natively readonly */
35417+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
35418+
35419+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
35420+#define AuBrAttr_COO_ALL (1 << 4)
35421+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
35422+
35423+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
35424+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
c1595e42
JR
35425+ branch. meaningless since
35426+ linux-3.18-rc1 */
35427+
35428+/* ignore error in copying XATTR */
35429+#define AuBrAttr_ICEX_SEC (1 << 7)
35430+#define AuBrAttr_ICEX_SYS (1 << 8)
35431+#define AuBrAttr_ICEX_TR (1 << 9)
35432+#define AuBrAttr_ICEX_USR (1 << 10)
35433+#define AuBrAttr_ICEX_OTH (1 << 11)
35434+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
35435+ | AuBrAttr_ICEX_SYS \
35436+ | AuBrAttr_ICEX_TR \
35437+ | AuBrAttr_ICEX_USR \
35438+ | AuBrAttr_ICEX_OTH)
35439+
35440+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
076b876e
AM
35441+#define AuBrRAttr_Mask AuBrRAttr_WH
35442+
c1595e42
JR
35443+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
35444+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
076b876e
AM
35445+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
35446+
35447+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
35448+
c1595e42 35449+/* #warning test userspace */
076b876e
AM
35450+#ifdef __KERNEL__
35451+#ifndef CONFIG_AUFS_FHSM
35452+#undef AuBrAttr_FHSM
35453+#define AuBrAttr_FHSM 0
35454+#endif
c1595e42
JR
35455+#ifndef CONFIG_AUFS_XATTR
35456+#undef AuBrAttr_ICEX
35457+#define AuBrAttr_ICEX 0
35458+#undef AuBrAttr_ICEX_SEC
35459+#define AuBrAttr_ICEX_SEC 0
35460+#undef AuBrAttr_ICEX_SYS
35461+#define AuBrAttr_ICEX_SYS 0
35462+#undef AuBrAttr_ICEX_TR
35463+#define AuBrAttr_ICEX_TR 0
35464+#undef AuBrAttr_ICEX_USR
35465+#define AuBrAttr_ICEX_USR 0
35466+#undef AuBrAttr_ICEX_OTH
35467+#define AuBrAttr_ICEX_OTH 0
35468+#endif
076b876e
AM
35469+#endif
35470+
35471+/* the longest combination */
c1595e42
JR
35472+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
35473+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
35474+ "+" AUFS_BRATTR_COO_REG \
35475+ "+" AUFS_BRATTR_FHSM \
35476+ "+" AUFS_BRATTR_UNPIN \
7f2ca4b1
JR
35477+ "+" AUFS_BRATTR_ICEX_SEC \
35478+ "+" AUFS_BRATTR_ICEX_SYS \
35479+ "+" AUFS_BRATTR_ICEX_USR \
35480+ "+" AUFS_BRATTR_ICEX_OTH \
076b876e
AM
35481+ "+" AUFS_BRWATTR_NLWH)
35482+
35483+typedef struct {
35484+ char a[AuBrPermStrSz];
35485+} au_br_perm_str_t;
35486+
35487+static inline int au_br_writable(int brperm)
35488+{
35489+ return brperm & AuBrPerm_RW;
35490+}
35491+
35492+static inline int au_br_whable(int brperm)
35493+{
35494+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
35495+}
35496+
35497+static inline int au_br_wh_linkable(int brperm)
35498+{
35499+ return !(brperm & AuBrWAttr_NoLinkWH);
35500+}
35501+
35502+static inline int au_br_cmoo(int brperm)
35503+{
35504+ return brperm & AuBrAttr_CMOO_Mask;
35505+}
35506+
35507+static inline int au_br_fhsm(int brperm)
35508+{
35509+ return brperm & AuBrAttr_FHSM;
35510+}
7f207e10
AM
35511+
35512+/* ---------------------------------------------------------------------- */
35513+
35514+/* ioctl */
35515+enum {
35516+ /* readdir in userspace */
35517+ AuCtl_RDU,
35518+ AuCtl_RDU_INO,
35519+
076b876e
AM
35520+ AuCtl_WBR_FD, /* pathconf wrapper */
35521+ AuCtl_IBUSY, /* busy inode */
35522+ AuCtl_MVDOWN, /* move-down */
35523+ AuCtl_BR, /* info about branches */
35524+ AuCtl_FHSM_FD /* connection for fhsm */
7f207e10
AM
35525+};
35526+
35527+/* borrowed from linux/include/linux/kernel.h */
35528+#ifndef ALIGN
35529+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
35530+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
35531+#endif
35532+
35533+/* borrowed from linux/include/linux/compiler-gcc3.h */
35534+#ifndef __aligned
35535+#define __aligned(x) __attribute__((aligned(x)))
53392da6
AM
35536+#endif
35537+
35538+#ifdef __KERNEL__
35539+#ifndef __packed
7f207e10
AM
35540+#define __packed __attribute__((packed))
35541+#endif
53392da6 35542+#endif
7f207e10
AM
35543+
35544+struct au_rdu_cookie {
9dbd164d
AM
35545+ uint64_t h_pos;
35546+ int16_t bindex;
35547+ uint8_t flags;
35548+ uint8_t pad;
35549+ uint32_t generation;
7f207e10
AM
35550+} __aligned(8);
35551+
35552+struct au_rdu_ent {
9dbd164d
AM
35553+ uint64_t ino;
35554+ int16_t bindex;
35555+ uint8_t type;
35556+ uint8_t nlen;
35557+ uint8_t wh;
7f207e10
AM
35558+ char name[0];
35559+} __aligned(8);
35560+
35561+static inline int au_rdu_len(int nlen)
35562+{
35563+ /* include the terminating NULL */
35564+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
9dbd164d 35565+ sizeof(uint64_t));
7f207e10
AM
35566+}
35567+
35568+union au_rdu_ent_ul {
35569+ struct au_rdu_ent __user *e;
9dbd164d 35570+ uint64_t ul;
7f207e10
AM
35571+};
35572+
35573+enum {
35574+ AufsCtlRduV_SZ,
35575+ AufsCtlRduV_End
35576+};
35577+
35578+struct aufs_rdu {
35579+ /* input */
35580+ union {
9dbd164d
AM
35581+ uint64_t sz; /* AuCtl_RDU */
35582+ uint64_t nent; /* AuCtl_RDU_INO */
7f207e10
AM
35583+ };
35584+ union au_rdu_ent_ul ent;
9dbd164d 35585+ uint16_t verify[AufsCtlRduV_End];
7f207e10
AM
35586+
35587+ /* input/output */
9dbd164d 35588+ uint32_t blk;
7f207e10
AM
35589+
35590+ /* output */
35591+ union au_rdu_ent_ul tail;
35592+ /* number of entries which were added in a single call */
9dbd164d
AM
35593+ uint64_t rent;
35594+ uint8_t full;
35595+ uint8_t shwh;
7f207e10
AM
35596+
35597+ struct au_rdu_cookie cookie;
35598+} __aligned(8);
35599+
1e00d052
AM
35600+/* ---------------------------------------------------------------------- */
35601+
35602+struct aufs_wbr_fd {
9dbd164d
AM
35603+ uint32_t oflags;
35604+ int16_t brid;
1e00d052
AM
35605+} __aligned(8);
35606+
35607+/* ---------------------------------------------------------------------- */
35608+
027c5e7a 35609+struct aufs_ibusy {
9dbd164d
AM
35610+ uint64_t ino, h_ino;
35611+ int16_t bindex;
027c5e7a
AM
35612+} __aligned(8);
35613+
1e00d052
AM
35614+/* ---------------------------------------------------------------------- */
35615+
392086de
AM
35616+/* error code for move-down */
35617+/* the actual message strings are implemented in aufs-util.git */
35618+enum {
35619+ EAU_MVDOWN_OPAQUE = 1,
35620+ EAU_MVDOWN_WHITEOUT,
35621+ EAU_MVDOWN_UPPER,
35622+ EAU_MVDOWN_BOTTOM,
35623+ EAU_MVDOWN_NOUPPER,
35624+ EAU_MVDOWN_NOLOWERBR,
35625+ EAU_Last
35626+};
35627+
c2b27bf2 35628+/* flags for move-down */
392086de
AM
35629+#define AUFS_MVDOWN_DMSG 1
35630+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
35631+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
35632+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
35633+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
35634+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
35635+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
35636+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
35637+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
076b876e
AM
35638+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
35639+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
35640+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
35641+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
c2b27bf2 35642+
076b876e 35643+/* index for move-down */
392086de
AM
35644+enum {
35645+ AUFS_MVDOWN_UPPER,
35646+ AUFS_MVDOWN_LOWER,
35647+ AUFS_MVDOWN_NARRAY
35648+};
35649+
076b876e
AM
35650+/*
35651+ * additional info of move-down
35652+ * number of free blocks and inodes.
35653+ * subset of struct kstatfs, but smaller and always 64bit.
35654+ */
35655+struct aufs_stfs {
35656+ uint64_t f_blocks;
35657+ uint64_t f_bavail;
35658+ uint64_t f_files;
35659+ uint64_t f_ffree;
35660+};
35661+
35662+struct aufs_stbr {
35663+ int16_t brid; /* optional input */
35664+ int16_t bindex; /* output */
35665+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
35666+} __aligned(8);
35667+
c2b27bf2 35668+struct aufs_mvdown {
076b876e
AM
35669+ uint32_t flags; /* input/output */
35670+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
35671+ int8_t au_errno; /* output */
35672+} __aligned(8);
35673+
35674+/* ---------------------------------------------------------------------- */
35675+
35676+union aufs_brinfo {
35677+ /* PATH_MAX may differ between kernel-space and user-space */
35678+ char _spacer[4096];
392086de 35679+ struct {
076b876e
AM
35680+ int16_t id;
35681+ int perm;
35682+ char path[0];
35683+ };
c2b27bf2
AM
35684+} __aligned(8);
35685+
35686+/* ---------------------------------------------------------------------- */
35687+
7f207e10
AM
35688+#define AuCtlType 'A'
35689+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
35690+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
1e00d052
AM
35691+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
35692+ struct aufs_wbr_fd)
027c5e7a 35693+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
392086de
AM
35694+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
35695+ struct aufs_mvdown)
076b876e
AM
35696+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
35697+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
7f207e10
AM
35698+
35699+#endif /* __AUFS_TYPE_H__ */
7f2ca4b1 35700aufs3.18.25+ loopback patch
93a1a2a2
JR
35701
35702diff --git a/drivers/block/loop.c b/drivers/block/loop.c
076b876e 35703index 30efd68..77b31b4 100644
93a1a2a2
JR
35704--- a/drivers/block/loop.c
35705+++ b/drivers/block/loop.c
35706@@ -514,7 +514,7 @@ out:
35707 }
35708
35709 struct switch_request {
35710- struct file *file;
35711+ struct file *file, *virt_file;
35712 struct completion wait;
35713 };
35714
35715@@ -576,7 +576,8 @@ static int loop_thread(void *data)
35716 * First it needs to flush existing IO, it does this by sending a magic
35717 * BIO down the pipe. The completion of this BIO does the actual switch.
35718 */
35719-static int loop_switch(struct loop_device *lo, struct file *file)
35720+static int loop_switch(struct loop_device *lo, struct file *file,
35721+ struct file *virt_file)
35722 {
35723 struct switch_request w;
35724 struct bio *bio = bio_alloc(GFP_KERNEL, 0);
35725@@ -584,6 +585,7 @@ static int loop_switch(struct loop_device *lo, struct file *file)
35726 return -ENOMEM;
35727 init_completion(&w.wait);
35728 w.file = file;
35729+ w.virt_file = virt_file;
35730 bio->bi_private = &w;
35731 bio->bi_bdev = NULL;
35732 loop_make_request(lo->lo_queue, bio);
35733@@ -600,7 +602,7 @@ static int loop_flush(struct loop_device *lo)
35734 if (!lo->lo_thread)
35735 return 0;
35736
35737- return loop_switch(lo, NULL);
35738+ return loop_switch(lo, NULL, NULL);
35739 }
35740
35741 /*
35742@@ -619,6 +621,7 @@ static void do_loop_switch(struct loop_device *lo, struct switch_request *p)
35743 mapping = file->f_mapping;
35744 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
35745 lo->lo_backing_file = file;
35746+ lo->lo_backing_virt_file = p->virt_file;
35747 lo->lo_blocksize = S_ISBLK(mapping->host->i_mode) ?
35748 mapping->host->i_bdev->bd_block_size : PAGE_SIZE;
35749 lo->old_gfp_mask = mapping_gfp_mask(mapping);
35750@@ -627,6 +630,13 @@ out:
35751 complete(&p->wait);
35752 }
35753
35754+static struct file *loop_real_file(struct file *file)
35755+{
35756+ struct file *f = NULL;
35757+ if (file->f_dentry->d_sb->s_op->real_loop)
35758+ f = file->f_dentry->d_sb->s_op->real_loop(file);
35759+ return f;
35760+}
35761
35762 /*
35763 * loop_change_fd switched the backing store of a loopback device to
35764@@ -640,6 +650,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
35765 unsigned int arg)
35766 {
35767 struct file *file, *old_file;
35768+ struct file *f, *virt_file = NULL, *old_virt_file;
35769 struct inode *inode;
35770 int error;
35771
35772@@ -656,9 +667,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
35773 file = fget(arg);
35774 if (!file)
35775 goto out;
35776+ f = loop_real_file(file);
35777+ if (f) {
35778+ virt_file = file;
35779+ file = f;
35780+ get_file(file);
35781+ }
35782
35783 inode = file->f_mapping->host;
35784 old_file = lo->lo_backing_file;
35785+ old_virt_file = lo->lo_backing_virt_file;
35786
35787 error = -EINVAL;
35788
35789@@ -670,17 +688,21 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
35790 goto out_putf;
35791
35792 /* and ... switch */
35793- error = loop_switch(lo, file);
35794+ error = loop_switch(lo, file, virt_file);
35795 if (error)
35796 goto out_putf;
35797
35798 fput(old_file);
35799+ if (old_virt_file)
35800+ fput(old_virt_file);
35801 if (lo->lo_flags & LO_FLAGS_PARTSCAN)
35802 ioctl_by_bdev(bdev, BLKRRPART, 0);
35803 return 0;
35804
35805 out_putf:
35806 fput(file);
35807+ if (virt_file)
35808+ fput(virt_file);
35809 out:
35810 return error;
35811 }
35812@@ -841,7 +863,7 @@ static void loop_config_discard(struct loop_device *lo)
35813 static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35814 struct block_device *bdev, unsigned int arg)
35815 {
35816- struct file *file, *f;
35817+ struct file *file, *f, *virt_file = NULL;
35818 struct inode *inode;
35819 struct address_space *mapping;
35820 unsigned lo_blocksize;
35821@@ -856,6 +878,12 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35822 file = fget(arg);
35823 if (!file)
35824 goto out;
35825+ f = loop_real_file(file);
35826+ if (f) {
35827+ virt_file = file;
35828+ file = f;
35829+ get_file(file);
35830+ }
35831
35832 error = -EBUSY;
35833 if (lo->lo_state != Lo_unbound)
35834@@ -904,6 +932,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
35835 lo->lo_device = bdev;
35836 lo->lo_flags = lo_flags;
35837 lo->lo_backing_file = file;
35838+ lo->lo_backing_virt_file = virt_file;
35839 lo->transfer = transfer_none;
35840 lo->ioctl = NULL;
35841 lo->lo_sizelimit = 0;
35842@@ -948,6 +977,7 @@ out_clr:
35843 lo->lo_thread = NULL;
35844 lo->lo_device = NULL;
35845 lo->lo_backing_file = NULL;
35846+ lo->lo_backing_virt_file = NULL;
35847 lo->lo_flags = 0;
35848 set_capacity(lo->lo_disk, 0);
35849 invalidate_bdev(bdev);
35850@@ -957,6 +987,8 @@ out_clr:
35851 lo->lo_state = Lo_unbound;
35852 out_putf:
35853 fput(file);
35854+ if (virt_file)
35855+ fput(virt_file);
35856 out:
35857 /* This is safe: open() is still holding a reference. */
35858 module_put(THIS_MODULE);
35859@@ -1003,6 +1035,7 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
35860 static int loop_clr_fd(struct loop_device *lo)
35861 {
35862 struct file *filp = lo->lo_backing_file;
35863+ struct file *virt_filp = lo->lo_backing_virt_file;
35864 gfp_t gfp = lo->old_gfp_mask;
35865 struct block_device *bdev = lo->lo_device;
35866
35867@@ -1036,6 +1069,7 @@ static int loop_clr_fd(struct loop_device *lo)
35868
35869 spin_lock_irq(&lo->lo_lock);
35870 lo->lo_backing_file = NULL;
35871+ lo->lo_backing_virt_file = NULL;
35872 spin_unlock_irq(&lo->lo_lock);
35873
35874 loop_release_xfer(lo);
35875@@ -1078,6 +1112,8 @@ static int loop_clr_fd(struct loop_device *lo)
35876 * bd_mutex which is usually taken before lo_ctl_mutex.
35877 */
35878 fput(filp);
35879+ if (virt_filp)
35880+ fput(virt_filp);
35881 return 0;
35882 }
35883
35884diff --git a/drivers/block/loop.h b/drivers/block/loop.h
35885index 90df5d6..cb91822 100644
35886--- a/drivers/block/loop.h
35887+++ b/drivers/block/loop.h
35888@@ -44,7 +44,7 @@ struct loop_device {
35889 int (*ioctl)(struct loop_device *, int cmd,
35890 unsigned long arg);
35891
35892- struct file * lo_backing_file;
35893+ struct file * lo_backing_file, *lo_backing_virt_file;
35894 struct block_device *lo_device;
35895 unsigned lo_blocksize;
35896 void *key_data;
35897diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
7f2ca4b1 35898index b0e14a9..e00e692 100644
93a1a2a2
JR
35899--- a/fs/aufs/f_op.c
35900+++ b/fs/aufs/f_op.c
7f2ca4b1
JR
35901@@ -408,7 +408,7 @@ static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
35902 if (IS_ERR(h_file))
35903 goto out;
35904
93a1a2a2
JR
35905- if (au_test_loopback_kthread()) {
35906+ if (0 && au_test_loopback_kthread()) {
35907 au_warn_loopback(h_file->f_dentry->d_sb);
35908 if (file->f_mapping != h_file->f_mapping) {
35909 file->f_mapping = h_file->f_mapping;
35910diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
7f2ca4b1 35911index c9d9659..4e19ced 100644
93a1a2a2
JR
35912--- a/fs/aufs/loop.c
35913+++ b/fs/aufs/loop.c
7f2ca4b1
JR
35914@@ -131,3 +131,19 @@ void au_loopback_fin(void)
35915 symbol_put(loop_backing_file);
93a1a2a2
JR
35916 kfree(au_warn_loopback_array);
35917 }
35918+
35919+/* ---------------------------------------------------------------------- */
35920+
35921+/* support the loopback block device insude aufs */
35922+
35923+struct file *aufs_real_loop(struct file *file)
35924+{
35925+ struct file *f;
35926+
35927+ BUG_ON(!au_test_aufs(file->f_dentry->d_sb));
35928+ fi_read_lock(file);
35929+ f = au_hf_top(file);
35930+ fi_read_unlock(file);
35931+ AuDebugOn(!f);
35932+ return f;
35933+}
35934diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
7f2ca4b1 35935index 48bf070..66afec7 100644
93a1a2a2
JR
35936--- a/fs/aufs/loop.h
35937+++ b/fs/aufs/loop.h
35938@@ -25,7 +25,11 @@ void au_warn_loopback(struct super_block *h_sb);
35939
35940 int au_loopback_init(void);
35941 void au_loopback_fin(void);
35942+
35943+struct file *aufs_real_loop(struct file *file);
35944 #else
35945+AuStub(struct file *, loop_backing_file, return NULL)
35946+
35947 AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
35948 struct dentry *h_adding)
35949 AuStubInt0(au_test_loopback_kthread, void)
35950@@ -33,6 +37,8 @@ AuStubVoid(au_warn_loopback, struct super_block *h_sb)
35951
35952 AuStubInt0(au_loopback_init, void)
35953 AuStubVoid(au_loopback_fin, void)
35954+
35955+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
35956 #endif /* BLK_DEV_LOOP */
35957
35958 #endif /* __KERNEL__ */
35959diff --git a/fs/aufs/super.c b/fs/aufs/super.c
7f2ca4b1 35960index b9fa96e..eef5a01 100644
93a1a2a2
JR
35961--- a/fs/aufs/super.c
35962+++ b/fs/aufs/super.c
7f2ca4b1 35963@@ -834,7 +834,10 @@ static const struct super_operations aufs_sop = {
93a1a2a2
JR
35964 .statfs = aufs_statfs,
35965 .put_super = aufs_put_super,
35966 .sync_fs = aufs_sync_fs,
35967- .remount_fs = aufs_remount_fs
35968+ .remount_fs = aufs_remount_fs,
35969+#ifdef CONFIG_AUFS_BDEV_LOOP
35970+ .real_loop = aufs_real_loop
35971+#endif
35972 };
35973
35974 /* ---------------------------------------------------------------------- */
35975diff --git a/include/linux/fs.h b/include/linux/fs.h
7f2ca4b1 35976index e35dc41..1e9ea4a 100644
93a1a2a2
JR
35977--- a/include/linux/fs.h
35978+++ b/include/linux/fs.h
7f2ca4b1 35979@@ -1593,6 +1593,10 @@ struct super_operations {
93a1a2a2
JR
35980 int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
35981 long (*nr_cached_objects)(struct super_block *, int);
35982 long (*free_cached_objects)(struct super_block *, long, int);
35983+#if defined(CONFIG_BLK_DEV_LOOP) || defined(CONFIG_BLK_DEV_LOOP_MODULE)
35984+ /* and aufs */
35985+ struct file *(*real_loop)(struct file *);
35986+#endif
35987 };
35988
35989 /*
This page took 6.396846 seconds and 4 git commands to generate.