]> git.pld-linux.org Git - packages/kernel.git/blame_incremental - kernel-aufs.patch
leave ZLIB_*/LZO_* library routines to kconfig
[packages/kernel.git] / kernel-aufs.patch
... / ...
CommitLineData
1SPDX-License-Identifier: GPL-2.0
2aufs6.x-rcN kbuild patch
3
4diff --git a/fs/Kconfig b/fs/Kconfig
5index 2685a4d0d353..a2df3d4bb3aa 100644
6--- a/fs/Kconfig
7+++ b/fs/Kconfig
8@@ -319,6 +319,7 @@ source "fs/sysv/Kconfig"
9 source "fs/ufs/Kconfig"
10 source "fs/erofs/Kconfig"
11 source "fs/vboxsf/Kconfig"
12+source "fs/aufs/Kconfig"
13
14 endif # MISC_FILESYSTEMS
15
16diff --git a/fs/Makefile b/fs/Makefile
17index 4dea17840761..9795cbfdd0bf 100644
18--- a/fs/Makefile
19+++ b/fs/Makefile
20@@ -137,3 +137,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/
21 obj-$(CONFIG_EROFS_FS) += erofs/
22 obj-$(CONFIG_VBOXSF_FS) += vboxsf/
23 obj-$(CONFIG_ZONEFS_FS) += zonefs/
24+obj-$(CONFIG_AUFS_FS) += aufs/
25SPDX-License-Identifier: GPL-2.0
26aufs6.x-rcN base patch
27
28diff --git a/MAINTAINERS b/MAINTAINERS
29index f61eb221415b..b192cff97244 100644
30--- a/MAINTAINERS
31+++ b/MAINTAINERS
32@@ -3522,6 +3522,19 @@ F: include/uapi/linux/audit.h
33 F: kernel/audit*
34 F: lib/*audit.c
35
36+AUFS (advanced multi layered unification filesystem) FILESYSTEM
37+M: "J. R. Okajima" <hooanon05g@gmail.com>
38+L: aufs-users@lists.sourceforge.net (members only)
39+L: linux-unionfs@vger.kernel.org
40+S: Supported
41+W: http://aufs.sourceforge.net
42+T: git://github.com/sfjro/aufs4-linux.git
43+F: Documentation/ABI/testing/debugfs-aufs
44+F: Documentation/ABI/testing/sysfs-aufs
45+F: Documentation/filesystems/aufs/
46+F: fs/aufs/
47+F: include/uapi/linux/aufs_type.h
48+
49 AUXILIARY DISPLAY DRIVERS
50 M: Miguel Ojeda <ojeda@kernel.org>
51 S: Maintained
52diff --git a/drivers/block/loop.c b/drivers/block/loop.c
53index 1518a6423279..7a9928c6db9d 100644
54--- a/drivers/block/loop.c
55+++ b/drivers/block/loop.c
56@@ -645,6 +645,24 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
57 goto done;
58 }
59
60+/*
61+ * for AUFS
62+ * no get/put for file.
63+ */
64+struct file *loop_backing_file(struct super_block *sb)
65+{
66+ struct file *ret;
67+ struct loop_device *l;
68+
69+ ret = NULL;
70+ if (MAJOR(sb->s_dev) == LOOP_MAJOR) {
71+ l = sb->s_bdev->bd_disk->private_data;
72+ ret = l->lo_backing_file;
73+ }
74+ return ret;
75+}
76+EXPORT_SYMBOL_GPL(loop_backing_file);
77+
78 /* loop sysfs attributes */
79
80 static ssize_t loop_attr_show(struct device *dev, char *page,
81diff --git a/fs/dcache.c b/fs/dcache.c
82index 52e6d5fdab6b..519321f32f95 100644
83--- a/fs/dcache.c
84+++ b/fs/dcache.c
85@@ -1345,7 +1345,7 @@ enum d_walk_ret {
86 *
87 * The @enter() callbacks are called with d_lock held.
88 */
89-static void d_walk(struct dentry *parent, void *data,
90+void d_walk(struct dentry *parent, void *data,
91 enum d_walk_ret (*enter)(void *, struct dentry *))
92 {
93 struct dentry *this_parent;
94diff --git a/fs/fcntl.c b/fs/fcntl.c
95index 146c9ab0cd4b..c980a0524118 100644
96--- a/fs/fcntl.c
97+++ b/fs/fcntl.c
98@@ -33,7 +33,7 @@
99
100 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
101
102-static int setfl(int fd, struct file * filp, unsigned long arg)
103+int setfl(int fd, struct file *filp, unsigned long arg)
104 {
105 struct inode * inode = file_inode(filp);
106 int error = 0;
107@@ -63,6 +63,8 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
108
109 if (filp->f_op->check_flags)
110 error = filp->f_op->check_flags(arg);
111+ if (!error && filp->f_op->setfl)
112+ error = filp->f_op->setfl(filp, arg);
113 if (error)
114 return error;
115
116diff --git a/fs/namespace.c b/fs/namespace.c
117index ab467ee58341..8cd34f760f63 100644
118--- a/fs/namespace.c
119+++ b/fs/namespace.c
120@@ -963,6 +963,12 @@ static inline int check_mnt(struct mount *mnt)
121 return mnt->mnt_ns == current->nsproxy->mnt_ns;
122 }
123
124+/* for aufs, CONFIG_AUFS_BR_FUSE */
125+int is_current_mnt_ns(struct vfsmount *mnt)
126+{
127+ return check_mnt(real_mount(mnt));
128+}
129+
130 /*
131 * vfsmount lock must be held for write
132 */
133diff --git a/fs/splice.c b/fs/splice.c
134index 5969b7a1d353..21db9b057d60 100644
135--- a/fs/splice.c
136+++ b/fs/splice.c
137@@ -756,8 +756,8 @@ static int warn_unsupported(struct file *file, const char *op)
138 /*
139 * Attempt to initiate a splice from pipe to file.
140 */
141-static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
142- loff_t *ppos, size_t len, unsigned int flags)
143+long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
144+ loff_t *ppos, size_t len, unsigned int flags)
145 {
146 if (unlikely(!out->f_op->splice_write))
147 return warn_unsupported(out, "write");
148@@ -767,9 +767,9 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
149 /*
150 * Attempt to initiate a splice from a file to a pipe.
151 */
152-static long do_splice_to(struct file *in, loff_t *ppos,
153- struct pipe_inode_info *pipe, size_t len,
154- unsigned int flags)
155+long do_splice_to(struct file *in, loff_t *ppos,
156+ struct pipe_inode_info *pipe, size_t len,
157+ unsigned int flags)
158 {
159 unsigned int p_space;
160 int ret;
161diff --git a/include/linux/fs.h b/include/linux/fs.h
162index 066555ad1bf8..2d30def9a580 100644
163--- a/include/linux/fs.h
164+++ b/include/linux/fs.h
165@@ -1383,6 +1383,7 @@ extern void fasync_free(struct fasync_struct *);
166 /* can be called from interrupts */
167 extern void kill_fasync(struct fasync_struct **, int, int);
168
169+extern int setfl(int fd, struct file *filp, unsigned long arg);
170 extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
171 extern int f_setown(struct file *filp, unsigned long arg, int force);
172 extern void f_delown(struct file *filp);
173@@ -2110,6 +2111,7 @@ struct file_operations {
174 ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
175 unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
176 int (*check_flags)(int);
177+ int (*setfl)(struct file *, unsigned long);
178 int (*flock) (struct file *, int, struct file_lock *);
179 ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
180 ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
181diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
182index 1f1099dac3f0..81d10b8e9dc0 100644
183--- a/include/linux/lockdep.h
184+++ b/include/linux/lockdep.h
185@@ -248,6 +248,8 @@ static inline int lockdep_match_key(struct lockdep_map *lock,
186 return lock->key == key;
187 }
188
189+struct lock_class *lockdep_hlock_class(struct held_lock *hlock);
190+
191 /*
192 * Acquire a lock.
193 *
194diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h
195index 8f882f5881e8..6b9808f09843 100644
196--- a/include/linux/mnt_namespace.h
197+++ b/include/linux/mnt_namespace.h
198@@ -7,12 +7,15 @@ struct mnt_namespace;
199 struct fs_struct;
200 struct user_namespace;
201 struct ns_common;
202+struct vfsmount;
203
204 extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *,
205 struct user_namespace *, struct fs_struct *);
206 extern void put_mnt_ns(struct mnt_namespace *ns);
207 extern struct ns_common *from_mnt_ns(struct mnt_namespace *);
208
209+extern int is_current_mnt_ns(struct vfsmount *mnt);
210+
211 extern const struct file_operations proc_mounts_operations;
212 extern const struct file_operations proc_mountinfo_operations;
213 extern const struct file_operations proc_mountstats_operations;
214diff --git a/include/linux/splice.h b/include/linux/splice.h
215index a55179fd60fc..8e21c53cf883 100644
216--- a/include/linux/splice.h
217+++ b/include/linux/splice.h
218@@ -93,4 +93,10 @@ extern void splice_shrink_spd(struct splice_pipe_desc *);
219
220 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
221 extern const struct pipe_buf_operations default_pipe_buf_ops;
222+
223+extern long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
224+ loff_t *ppos, size_t len, unsigned int flags);
225+extern long do_splice_to(struct file *in, loff_t *ppos,
226+ struct pipe_inode_info *pipe, size_t len,
227+ unsigned int flags);
228 #endif
229diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
230index e3375bc40dad..c1f0aadde539 100644
231--- a/kernel/locking/lockdep.c
232+++ b/kernel/locking/lockdep.c
233@@ -217,7 +217,7 @@ unsigned long max_lock_class_idx;
234 struct lock_class lock_classes[MAX_LOCKDEP_KEYS];
235 DECLARE_BITMAP(lock_classes_in_use, MAX_LOCKDEP_KEYS);
236
237-static inline struct lock_class *hlock_class(struct held_lock *hlock)
238+inline struct lock_class *lockdep_hlock_class(struct held_lock *hlock)
239 {
240 unsigned int class_idx = hlock->class_idx;
241
242@@ -238,6 +238,7 @@ static inline struct lock_class *hlock_class(struct held_lock *hlock)
243 */
244 return lock_classes + class_idx;
245 }
246+#define hlock_class(hlock) lockdep_hlock_class(hlock)
247
248 #ifdef CONFIG_LOCK_STAT
249 static DEFINE_PER_CPU(struct lock_class_stats[MAX_LOCKDEP_KEYS], cpu_lock_stats);
250SPDX-License-Identifier: GPL-2.0
251aufs6.x-rcN mmap patch
252
253diff --git a/fs/proc/base.c b/fs/proc/base.c
254index 9e479d7d202b..986c2ae12f8b 100644
255--- a/fs/proc/base.c
256+++ b/fs/proc/base.c
257@@ -2218,7 +2218,7 @@ static int map_files_get_link(struct dentry *dentry, struct path *path)
258 rc = -ENOENT;
259 vma = find_exact_vma(mm, vm_start, vm_end);
260 if (vma && vma->vm_file) {
261- *path = vma->vm_file->f_path;
262+ *path = vma_pr_or_file(vma)->f_path;
263 path_get(path);
264 rc = 0;
265 }
266diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c
267index 4d3493579458..42edd9a42c78 100644
268--- a/fs/proc/nommu.c
269+++ b/fs/proc/nommu.c
270@@ -39,7 +39,10 @@ static int nommu_region_show(struct seq_file *m, struct vm_region *region)
271 file = region->vm_file;
272
273 if (file) {
274- struct inode *inode = file_inode(region->vm_file);
275+ struct inode *inode;
276+
277+ file = vmr_pr_or_file(region);
278+ inode = file_inode(file);
279 dev = inode->i_sb->s_dev;
280 ino = inode->i_ino;
281 }
282diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
283index e35a0398db63..d9080329fd7a 100644
284--- a/fs/proc/task_mmu.c
285+++ b/fs/proc/task_mmu.c
286@@ -285,7 +285,10 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
287 const char *name = NULL;
288
289 if (file) {
290- struct inode *inode = file_inode(vma->vm_file);
291+ struct inode *inode;
292+
293+ file = vma_pr_or_file(vma);
294+ inode = file_inode(file);
295 dev = inode->i_sb->s_dev;
296 ino = inode->i_ino;
297 pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
298@@ -1942,7 +1945,7 @@ static int show_numa_map(struct seq_file *m, void *v)
299 struct proc_maps_private *proc_priv = &numa_priv->proc_maps;
300 struct vm_area_struct *vma = v;
301 struct numa_maps *md = &numa_priv->md;
302- struct file *file = vma->vm_file;
303+ struct file *file = vma_pr_or_file(vma);
304 struct mm_struct *mm = vma->vm_mm;
305 struct mempolicy *pol;
306 char buffer[64];
307diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
308index 2fd06f52b6a4..b01030da424f 100644
309--- a/fs/proc/task_nommu.c
310+++ b/fs/proc/task_nommu.c
311@@ -150,7 +150,10 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma)
312 file = vma->vm_file;
313
314 if (file) {
315- struct inode *inode = file_inode(vma->vm_file);
316+ struct inode *inode;
317+
318+ file = vma_pr_or_file(vma);
319+ inode = file_inode(file);
320 dev = inode->i_sb->s_dev;
321 ino = inode->i_ino;
322 pgoff = (loff_t)vma->vm_pgoff << PAGE_SHIFT;
323diff --git a/include/linux/mm.h b/include/linux/mm.h
324index f3f196e4d66d..831812fd4d07 100644
325--- a/include/linux/mm.h
326+++ b/include/linux/mm.h
327@@ -2059,6 +2059,43 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
328 unmap_mapping_range(mapping, holebegin, holelen, 0);
329 }
330
331+#if IS_ENABLED(CONFIG_AUFS_FS)
332+extern void vma_do_file_update_time(struct vm_area_struct *, const char[], int);
333+extern struct file *vma_do_pr_or_file(struct vm_area_struct *, const char[],
334+ int);
335+extern void vma_do_get_file(struct vm_area_struct *, const char[], int);
336+extern void vma_do_fput(struct vm_area_struct *, const char[], int);
337+
338+#define vma_file_update_time(vma) vma_do_file_update_time(vma, __func__, \
339+ __LINE__)
340+#define vma_pr_or_file(vma) vma_do_pr_or_file(vma, __func__, \
341+ __LINE__)
342+#define vma_get_file(vma) vma_do_get_file(vma, __func__, __LINE__)
343+#define vma_fput(vma) vma_do_fput(vma, __func__, __LINE__)
344+
345+#ifndef CONFIG_MMU
346+extern struct file *vmr_do_pr_or_file(struct vm_region *, const char[], int);
347+extern void vmr_do_fput(struct vm_region *, const char[], int);
348+
349+#define vmr_pr_or_file(region) vmr_do_pr_or_file(region, __func__, \
350+ __LINE__)
351+#define vmr_fput(region) vmr_do_fput(region, __func__, __LINE__)
352+#endif /* !CONFIG_MMU */
353+
354+#else
355+
356+#define vma_file_update_time(vma) file_update_time((vma)->vm_file)
357+#define vma_pr_or_file(vma) (vma)->vm_file
358+#define vma_get_file(vma) get_file((vma)->vm_file)
359+#define vma_fput(vma) fput((vma)->vm_file)
360+
361+#ifndef CONFIG_MMU
362+#define vmr_pr_or_file(region) (region)->vm_file
363+#define vmr_fput(region) fput((region)->vm_file)
364+#endif /* !CONFIG_MMU */
365+
366+#endif /* CONFIG_AUFS_FS */
367+
368 extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
369 void *buf, int len, unsigned int gup_flags);
370 extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
371diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
372index 3b8475007734..7a2de3f818bc 100644
373--- a/include/linux/mm_types.h
374+++ b/include/linux/mm_types.h
375@@ -504,6 +504,9 @@ struct vm_region {
376 unsigned long vm_top; /* region allocated to here */
377 unsigned long vm_pgoff; /* the offset in vm_file corresponding to vm_start */
378 struct file *vm_file; /* the backing file or NULL */
379+#if IS_ENABLED(CONFIG_AUFS_FS)
380+ struct file *vm_prfile; /* the virtual backing file or NULL */
381+#endif
382
383 int vm_usage; /* region usage count (access under nommu_region_sem) */
384 bool vm_icache_flushed : 1; /* true if the icache has been flushed for
385@@ -575,6 +578,9 @@ struct vm_area_struct {
386 unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE
387 units */
388 struct file * vm_file; /* File we map to (can be NULL). */
389+#if IS_ENABLED(CONFIG_AUFS_FS)
390+ struct file *vm_prfile; /* shadow of vm_file */
391+#endif
392 void * vm_private_data; /* was vm_pte (shared mem) */
393
394 #ifdef CONFIG_ANON_VMA_NAME
395diff --git a/kernel/fork.c b/kernel/fork.c
396index 9f7fe3541897..f9e44ca88a7d 100644
397--- a/kernel/fork.c
398+++ b/kernel/fork.c
399@@ -664,7 +664,7 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
400 if (file) {
401 struct address_space *mapping = file->f_mapping;
402
403- get_file(file);
404+ vma_get_file(tmp);
405 i_mmap_lock_write(mapping);
406 if (tmp->vm_flags & VM_SHARED)
407 mapping_allow_writable(mapping);
408diff --git a/mm/Makefile b/mm/Makefile
409index 8e105e5b3e29..c218d042dbce 100644
410--- a/mm/Makefile
411+++ b/mm/Makefile
412@@ -138,3 +138,4 @@ obj-$(CONFIG_IO_MAPPING) += io-mapping.o
413 obj-$(CONFIG_HAVE_BOOTMEM_INFO_NODE) += bootmem_info.o
414 obj-$(CONFIG_GENERIC_IOREMAP) += ioremap.o
415 obj-$(CONFIG_SHRINKER_DEBUG) += shrinker_debug.o
416+obj-$(CONFIG_AUFS_FS:m=y) += prfile.o
417diff --git a/mm/filemap.c b/mm/filemap.c
418index c4d4ace9cc70..257da81286d1 100644
419--- a/mm/filemap.c
420+++ b/mm/filemap.c
421@@ -3437,7 +3437,7 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf)
422 vm_fault_t ret = VM_FAULT_LOCKED;
423
424 sb_start_pagefault(mapping->host->i_sb);
425- file_update_time(vmf->vma->vm_file);
426+ vma_file_update_time(vmf->vma);
427 folio_lock(folio);
428 if (folio->mapping != mapping) {
429 folio_unlock(folio);
430diff --git a/mm/mmap.c b/mm/mmap.c
431index 87d929316d57..fac904cd41cd 100644
432--- a/mm/mmap.c
433+++ b/mm/mmap.c
434@@ -139,7 +139,7 @@ static void remove_vma(struct vm_area_struct *vma)
435 if (vma->vm_ops && vma->vm_ops->close)
436 vma->vm_ops->close(vma);
437 if (vma->vm_file)
438- fput(vma->vm_file);
439+ vma_fput(vma);
440 mpol_put(vma_policy(vma));
441 vm_area_free(vma);
442 }
443@@ -589,7 +589,7 @@ inline int vma_expand(struct ma_state *mas, struct vm_area_struct *vma,
444 if (remove_next) {
445 if (file) {
446 uprobe_munmap(next, next->vm_start, next->vm_end);
447- fput(file);
448+ vma_fput(vma);
449 }
450 if (next->anon_vma)
451 anon_vma_merge(vma, next);
452@@ -830,7 +830,7 @@ int __vma_adjust(struct vm_area_struct *vma, unsigned long start,
453 again:
454 if (file) {
455 uprobe_munmap(next, next->vm_start, next->vm_end);
456- fput(file);
457+ vma_fput(vma);
458 }
459 if (next->anon_vma)
460 anon_vma_merge(vma, next);
461@@ -2224,7 +2224,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
462 goto out_free_mpol;
463
464 if (new->vm_file)
465- get_file(new->vm_file);
466+ vma_get_file(new);
467
468 if (new->vm_ops && new->vm_ops->open)
469 new->vm_ops->open(new);
470@@ -2246,7 +2246,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
471 if (new->vm_ops && new->vm_ops->close)
472 new->vm_ops->close(new);
473 if (new->vm_file)
474- fput(new->vm_file);
475+ vma_fput(new);
476 unlink_anon_vmas(new);
477 out_free_mpol:
478 mpol_put(vma_policy(new));
479@@ -2746,7 +2746,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
480 if (vma->vm_ops && vma->vm_ops->close)
481 vma->vm_ops->close(vma);
482 unmap_and_free_vma:
483- fput(vma->vm_file);
484+ vma_fput(vma);
485 vma->vm_file = NULL;
486
487 /* Undo any partial mapping done by a device driver. */
488@@ -2813,6 +2813,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
489 unsigned long populate = 0;
490 unsigned long ret = -EINVAL;
491 struct file *file;
492+#if IS_ENABLED(CONFIG_AUFS_FS)
493+ struct file *prfile;
494+#endif
495
496 pr_warn_once("%s (%d) uses deprecated remap_file_pages() syscall. See Documentation/mm/remap_file_pages.rst.\n",
497 current->comm, current->pid);
498@@ -2871,10 +2874,34 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
499 if (vma->vm_flags & VM_LOCKED)
500 flags |= MAP_LOCKED;
501
502+#if IS_ENABLED(CONFIG_AUFS_FS)
503+ vma_get_file(vma);
504+ file = vma->vm_file;
505+ prfile = vma->vm_prfile;
506+ ret = do_mmap(vma->vm_file, start, size,
507+ prot, flags, pgoff, &populate, NULL);
508+ if (!IS_ERR_VALUE(ret) && file && prfile) {
509+ struct vm_area_struct *new_vma;
510+
511+ new_vma = find_vma(mm, ret);
512+ if (!new_vma->vm_prfile)
513+ new_vma->vm_prfile = prfile;
514+ if (new_vma != vma)
515+ get_file(prfile);
516+ }
517+ /*
518+ * two fput()s instead of vma_fput(vma),
519+ * coz vma may not be available anymore.
520+ */
521+ fput(file);
522+ if (prfile)
523+ fput(prfile);
524+#else
525 file = get_file(vma->vm_file);
526 ret = do_mmap(vma->vm_file, start, size,
527 prot, flags, pgoff, &populate, NULL);
528 fput(file);
529+#endif /* CONFIG_AUFS_FS */
530 out:
531 mmap_write_unlock(mm);
532 if (populate)
533@@ -3222,7 +3249,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
534 if (anon_vma_clone(new_vma, vma))
535 goto out_free_mempol;
536 if (new_vma->vm_file)
537- get_file(new_vma->vm_file);
538+ vma_get_file(new_vma);
539 if (new_vma->vm_ops && new_vma->vm_ops->open)
540 new_vma->vm_ops->open(new_vma);
541 if (vma_link(mm, new_vma))
542diff --git a/mm/nommu.c b/mm/nommu.c
543index 214c70e1d059..dccbb939d2a9 100644
544--- a/mm/nommu.c
545+++ b/mm/nommu.c
546@@ -523,7 +523,7 @@ static void __put_nommu_region(struct vm_region *region)
547 up_write(&nommu_region_sem);
548
549 if (region->vm_file)
550- fput(region->vm_file);
551+ vmr_fput(region);
552
553 /* IO memory and memory shared directly out of the pagecache
554 * from ramfs/tmpfs mustn't be released here */
555@@ -653,7 +653,7 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
556 if (vma->vm_ops && vma->vm_ops->close)
557 vma->vm_ops->close(vma);
558 if (vma->vm_file)
559- fput(vma->vm_file);
560+ vma_fput(vma);
561 put_nommu_region(vma->vm_region);
562 vm_area_free(vma);
563 }
564@@ -1164,7 +1164,7 @@ unsigned long do_mmap(struct file *file,
565 goto error_just_free;
566 }
567 }
568- fput(region->vm_file);
569+ vmr_fput(region);
570 kmem_cache_free(vm_region_jar, region);
571 region = pregion;
572 result = start;
573@@ -1242,10 +1242,11 @@ error_just_free:
574 error:
575 mas_destroy(&mas);
576 if (region->vm_file)
577- fput(region->vm_file);
578+ vmr_fput(region);
579 kmem_cache_free(vm_region_jar, region);
580 if (vma->vm_file)
581 fput(vma->vm_file);
582+ vmr_fput(vma);
583 vm_area_free(vma);
584 return ret;
585
586diff --git a/mm/prfile.c b/mm/prfile.c
587new file mode 100644
588index 000000000000..8f820a235364
589--- /dev/null
590+++ b/mm/prfile.c
591@@ -0,0 +1,86 @@
592+// SPDX-License-Identifier: GPL-2.0
593+/*
594+ * Mainly for aufs which mmap(2) different file and wants to print different
595+ * path in /proc/PID/maps.
596+ * Call these functions via macros defined in linux/mm.h.
597+ *
598+ * See Documentation/filesystems/aufs/design/06mmap.txt
599+ *
600+ * Copyright (c) 2014-2022 Junjro R. Okajima
601+ * Copyright (c) 2014 Ian Campbell
602+ */
603+
604+#include <linux/mm.h>
605+#include <linux/file.h>
606+#include <linux/fs.h>
607+
608+/* #define PRFILE_TRACE */
609+static inline void prfile_trace(struct file *f, struct file *pr,
610+ const char func[], int line, const char func2[])
611+{
612+#ifdef PRFILE_TRACE
613+ if (pr)
614+ pr_info("%s:%d: %s, %pD2\n", func, line, func2, f);
615+#endif
616+}
617+
618+void vma_do_file_update_time(struct vm_area_struct *vma, const char func[],
619+ int line)
620+{
621+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
622+
623+ prfile_trace(f, pr, func, line, __func__);
624+ file_update_time(f);
625+ if (f && pr)
626+ file_update_time(pr);
627+}
628+
629+struct file *vma_do_pr_or_file(struct vm_area_struct *vma, const char func[],
630+ int line)
631+{
632+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
633+
634+ prfile_trace(f, pr, func, line, __func__);
635+ return (f && pr) ? pr : f;
636+}
637+
638+void vma_do_get_file(struct vm_area_struct *vma, const char func[], int line)
639+{
640+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
641+
642+ prfile_trace(f, pr, func, line, __func__);
643+ get_file(f);
644+ if (f && pr)
645+ get_file(pr);
646+}
647+
648+void vma_do_fput(struct vm_area_struct *vma, const char func[], int line)
649+{
650+ struct file *f = vma->vm_file, *pr = vma->vm_prfile;
651+
652+ prfile_trace(f, pr, func, line, __func__);
653+ fput(f);
654+ if (f && pr)
655+ fput(pr);
656+}
657+
658+#ifndef CONFIG_MMU
659+struct file *vmr_do_pr_or_file(struct vm_region *region, const char func[],
660+ int line)
661+{
662+ struct file *f = region->vm_file, *pr = region->vm_prfile;
663+
664+ prfile_trace(f, pr, func, line, __func__);
665+ return (f && pr) ? pr : f;
666+}
667+
668+void vmr_do_fput(struct vm_region *region, const char func[], int line)
669+{
670+ struct file *f = region->vm_file, *pr = region->vm_prfile;
671+
672+ prfile_trace(f, pr, func, line, __func__);
673+ fput(f);
674+ if (f && pr)
675+ fput(pr);
676+}
677+#endif /* !CONFIG_MMU */
678SPDX-License-Identifier: GPL-2.0
679aufs6.x-rcN standalone patch
680
681diff --git a/fs/dcache.c b/fs/dcache.c
682index 519321f32f95..267e0c65914a 100644
683--- a/fs/dcache.c
684+++ b/fs/dcache.c
685@@ -1450,6 +1450,7 @@ void d_walk(struct dentry *parent, void *data,
686 seq = 1;
687 goto again;
688 }
689+EXPORT_SYMBOL_GPL(d_walk);
690
691 struct check_mount {
692 struct vfsmount *mnt;
693@@ -3052,6 +3053,7 @@ void d_exchange(struct dentry *dentry1, struct dentry *dentry2)
694
695 write_sequnlock(&rename_lock);
696 }
697+EXPORT_SYMBOL_GPL(d_exchange);
698
699 /**
700 * d_ancestor - search for an ancestor
701diff --git a/fs/exec.c b/fs/exec.c
702index ab913243a367..d84461fddf7d 100644
703--- a/fs/exec.c
704+++ b/fs/exec.c
705@@ -111,6 +111,7 @@ bool path_noexec(const struct path *path)
706 return (path->mnt->mnt_flags & MNT_NOEXEC) ||
707 (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC);
708 }
709+EXPORT_SYMBOL_GPL(path_noexec);
710
711 #ifdef CONFIG_USELIB
712 /*
713diff --git a/fs/fcntl.c b/fs/fcntl.c
714index c980a0524118..40087d11a974 100644
715--- a/fs/fcntl.c
716+++ b/fs/fcntl.c
717@@ -86,6 +86,7 @@ int setfl(int fd, struct file *filp, unsigned long arg)
718 out:
719 return error;
720 }
721+EXPORT_SYMBOL_GPL(setfl);
722
723 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
724 int force)
725diff --git a/fs/file_table.c b/fs/file_table.c
726index dd88701e54a9..ecdb7817b074 100644
727--- a/fs/file_table.c
728+++ b/fs/file_table.c
729@@ -198,6 +198,7 @@ struct file *alloc_empty_file(int flags, const struct cred *cred)
730 }
731 return ERR_PTR(-ENFILE);
732 }
733+EXPORT_SYMBOL_GPL(alloc_empty_file);
734
735 /*
736 * Variant of alloc_empty_file() that doesn't check and modify nr_files.
737diff --git a/fs/namespace.c b/fs/namespace.c
738index 8cd34f760f63..799a94205906 100644
739--- a/fs/namespace.c
740+++ b/fs/namespace.c
741@@ -571,6 +571,7 @@ void __mnt_drop_write(struct vfsmount *mnt)
742 mnt_dec_writers(real_mount(mnt));
743 preempt_enable();
744 }
745+EXPORT_SYMBOL_GPL(__mnt_drop_write);
746
747 /**
748 * mnt_drop_write - give up write access to a mount
749@@ -968,6 +969,7 @@ int is_current_mnt_ns(struct vfsmount *mnt)
750 {
751 return check_mnt(real_mount(mnt));
752 }
753+EXPORT_SYMBOL_GPL(is_current_mnt_ns);
754
755 /*
756 * vfsmount lock must be held for write
757@@ -2165,6 +2167,7 @@ int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
758 }
759 return 0;
760 }
761+EXPORT_SYMBOL_GPL(iterate_mounts);
762
763 static void lock_mnt_tree(struct mount *mnt)
764 {
765diff --git a/fs/notify/group.c b/fs/notify/group.c
766index 1de6631a3925..3008eb37a18d 100644
767--- a/fs/notify/group.c
768+++ b/fs/notify/group.c
769@@ -100,6 +100,7 @@ void fsnotify_get_group(struct fsnotify_group *group)
770 {
771 refcount_inc(&group->refcnt);
772 }
773+EXPORT_SYMBOL_GPL(fsnotify_get_group);
774
775 /*
776 * Drop a reference to a group. Free it if it's through.
777diff --git a/fs/open.c b/fs/open.c
778index 82c1a28b3308..e7bd6f1c9474 100644
779--- a/fs/open.c
780+++ b/fs/open.c
781@@ -66,6 +66,7 @@ int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
782 inode_unlock(dentry->d_inode);
783 return ret;
784 }
785+EXPORT_SYMBOL_GPL(do_truncate);
786
787 long vfs_truncate(const struct path *path, loff_t length)
788 {
789diff --git a/fs/read_write.c b/fs/read_write.c
790index 7a2ff6157eda..76f5d4d08687 100644
791--- a/fs/read_write.c
792+++ b/fs/read_write.c
793@@ -477,6 +477,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
794 inc_syscr(current);
795 return ret;
796 }
797+EXPORT_SYMBOL_GPL(vfs_read);
798
799 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
800 {
801@@ -592,6 +593,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
802 file_end_write(file);
803 return ret;
804 }
805+EXPORT_SYMBOL_GPL(vfs_write);
806
807 /* file_ppos returns &file->f_pos or NULL if file is stream */
808 static inline loff_t *file_ppos(struct file *file)
809diff --git a/fs/splice.c b/fs/splice.c
810index 21db9b057d60..abcd14f52c1e 100644
811--- a/fs/splice.c
812+++ b/fs/splice.c
813@@ -763,6 +763,7 @@ long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
814 return warn_unsupported(out, "write");
815 return out->f_op->splice_write(pipe, out, ppos, len, flags);
816 }
817+EXPORT_SYMBOL_GPL(do_splice_from);
818
819 /*
820 * Attempt to initiate a splice from a file to a pipe.
821@@ -792,6 +793,7 @@ long do_splice_to(struct file *in, loff_t *ppos,
822 return warn_unsupported(in, "read");
823 return in->f_op->splice_read(in, ppos, pipe, len, flags);
824 }
825+EXPORT_SYMBOL_GPL(do_splice_to);
826
827 /**
828 * splice_direct_to_actor - splices data directly between two non-pipes
829diff --git a/fs/xattr.c b/fs/xattr.c
830index adab9a70b536..b2dc4fc0b25b 100644
831--- a/fs/xattr.c
832+++ b/fs/xattr.c
833@@ -407,6 +407,7 @@ vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry,
834 *xattr_value = value;
835 return error;
836 }
837+EXPORT_SYMBOL_GPL(vfs_getxattr_alloc);
838
839 ssize_t
840 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
841diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
842index c1f0aadde539..278e0a322d20 100644
843--- a/kernel/locking/lockdep.c
844+++ b/kernel/locking/lockdep.c
845@@ -238,6 +238,7 @@ inline struct lock_class *lockdep_hlock_class(struct held_lock *hlock)
846 */
847 return lock_classes + class_idx;
848 }
849+EXPORT_SYMBOL_GPL(lockdep_hlock_class);
850 #define hlock_class(hlock) lockdep_hlock_class(hlock)
851
852 #ifdef CONFIG_LOCK_STAT
853diff --git a/kernel/task_work.c b/kernel/task_work.c
854index 065e1ef8fc8d..c623c6f0c645 100644
855--- a/kernel/task_work.c
856+++ b/kernel/task_work.c
857@@ -182,3 +182,4 @@ void task_work_run(void)
858 } while (work);
859 }
860 }
861+EXPORT_SYMBOL_GPL(task_work_run);
862diff --git a/security/security.c b/security/security.c
863index d1571900a8c7..c8333d54e590 100644
864--- a/security/security.c
865+++ b/security/security.c
866@@ -1195,6 +1195,7 @@ int security_path_rmdir(const struct path *dir, struct dentry *dentry)
867 return 0;
868 return call_int_hook(path_rmdir, 0, dir, dentry);
869 }
870+EXPORT_SYMBOL_GPL(security_path_rmdir);
871
872 int security_path_unlink(const struct path *dir, struct dentry *dentry)
873 {
874@@ -1211,6 +1212,7 @@ int security_path_symlink(const struct path *dir, struct dentry *dentry,
875 return 0;
876 return call_int_hook(path_symlink, 0, dir, dentry, old_name);
877 }
878+EXPORT_SYMBOL_GPL(security_path_symlink);
879
880 int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
881 struct dentry *new_dentry)
882@@ -1219,6 +1221,7 @@ int security_path_link(struct dentry *old_dentry, const struct path *new_dir,
883 return 0;
884 return call_int_hook(path_link, 0, old_dentry, new_dir, new_dentry);
885 }
886+EXPORT_SYMBOL_GPL(security_path_link);
887
888 int security_path_rename(const struct path *old_dir, struct dentry *old_dentry,
889 const struct path *new_dir, struct dentry *new_dentry,
890@@ -1239,6 +1242,7 @@ int security_path_truncate(const struct path *path)
891 return 0;
892 return call_int_hook(path_truncate, 0, path);
893 }
894+EXPORT_SYMBOL_GPL(security_path_truncate);
895
896 int security_path_chmod(const struct path *path, umode_t mode)
897 {
898@@ -1246,6 +1250,7 @@ int security_path_chmod(const struct path *path, umode_t mode)
899 return 0;
900 return call_int_hook(path_chmod, 0, path, mode);
901 }
902+EXPORT_SYMBOL_GPL(security_path_chmod);
903
904 int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
905 {
906@@ -1253,6 +1258,7 @@ int security_path_chown(const struct path *path, kuid_t uid, kgid_t gid)
907 return 0;
908 return call_int_hook(path_chown, 0, path, uid, gid);
909 }
910+EXPORT_SYMBOL_GPL(security_path_chown);
911
912 int security_path_chroot(const struct path *path)
913 {
914@@ -1353,6 +1359,7 @@ int security_inode_permission(struct inode *inode, int mask)
915 return 0;
916 return call_int_hook(inode_permission, 0, inode, mask);
917 }
918+EXPORT_SYMBOL_GPL(security_inode_permission);
919
920 int security_inode_setattr(struct user_namespace *mnt_userns,
921 struct dentry *dentry, struct iattr *attr)
922@@ -1593,6 +1600,7 @@ int security_file_permission(struct file *file, int mask)
923
924 return fsnotify_perm(file, mask);
925 }
926+EXPORT_SYMBOL_GPL(security_file_permission);
927
928 int security_file_alloc(struct file *file)
929 {
930@@ -1726,6 +1734,7 @@ int security_file_truncate(struct file *file)
931 {
932 return call_int_hook(file_truncate, 0, file);
933 }
934+EXPORT_SYMBOL_GPL(security_file_truncate);
935
936 int security_task_alloc(struct task_struct *task, unsigned long clone_flags)
937 {
938diff -urN /usr/share/empty/Documentation/ABI/testing/debugfs-aufs linux/Documentation/ABI/testing/debugfs-aufs
939--- /usr/share/empty/Documentation/ABI/testing/debugfs-aufs 1970-01-01 01:00:00.000000000 +0100
940+++ linux/Documentation/ABI/testing/debugfs-aufs 2022-11-05 23:02:18.955889283 +0100
941@@ -0,0 +1,55 @@
942+What: /debug/aufs/si_<id>/
943+Date: March 2009
944+Contact: J. R. Okajima <hooanon05g@gmail.com>
945+Description:
946+ Under /debug/aufs, a directory named si_<id> is created
947+ per aufs mount, where <id> is a unique id generated
948+ internally.
949+
950+What: /debug/aufs/si_<id>/plink
951+Date: Apr 2013
952+Contact: J. R. Okajima <hooanon05g@gmail.com>
953+Description:
954+ It has three lines and shows the information about the
955+ pseudo-link. The first line is a single number
956+ representing a number of buckets. The second line is a
957+ number of pseudo-links per buckets (separated by a
958+ blank). The last line is a single number representing a
959+ total number of psedo-links.
960+ When the aufs mount option 'noplink' is specified, it
961+ will show "1\n0\n0\n".
962+
963+What: /debug/aufs/si_<id>/xib
964+Date: March 2009
965+Contact: J. R. Okajima <hooanon05g@gmail.com>
966+Description:
967+ It shows the consumed blocks by xib (External Inode Number
968+ Bitmap), its block size and file size.
969+ When the aufs mount option 'noxino' is specified, it
970+ will be empty. About XINO files, see the aufs manual.
971+
972+What: /debug/aufs/si_<id>/xi<branch-index>
973+Date: March 2009
974+Contact: J. R. Okajima <hooanon05g@gmail.com>
975+Description:
976+ It shows the consumed blocks by xino (External Inode Number
977+ Translation Table), its link count, block size and file
978+ size.
979+ Due to the file size limit, there may exist multiple
980+ xino files per branch. In this case, "-N" is added to
981+ the filename and it corresponds to the index of the
982+ internal xino array. "-0" is omitted.
983+ When the aufs mount option 'noxino' is specified, Those
984+ entries won't exist. About XINO files, see the aufs
985+ manual.
986+
987+What: /debug/aufs/si_<id>/xigen
988+Date: March 2009
989+Contact: J. R. Okajima <hooanon05g@gmail.com>
990+Description:
991+ It shows the consumed blocks by xigen (External Inode
992+ Generation Table), its block size and file size.
993+ If CONFIG_AUFS_EXPORT is disabled, this entry will not
994+ be created.
995+ When the aufs mount option 'noxino' is specified, it
996+ will be empty. About XINO files, see the aufs manual.
997diff -urN /usr/share/empty/Documentation/ABI/testing/sysfs-aufs linux/Documentation/ABI/testing/sysfs-aufs
998--- /usr/share/empty/Documentation/ABI/testing/sysfs-aufs 1970-01-01 01:00:00.000000000 +0100
999+++ linux/Documentation/ABI/testing/sysfs-aufs 2022-11-05 23:02:18.955889283 +0100
1000@@ -0,0 +1,31 @@
1001+What: /sys/fs/aufs/si_<id>/
1002+Date: March 2009
1003+Contact: J. R. Okajima <hooanon05g@gmail.com>
1004+Description:
1005+ Under /sys/fs/aufs, a directory named si_<id> is created
1006+ per aufs mount, where <id> is a unique id generated
1007+ internally.
1008+
1009+What: /sys/fs/aufs/si_<id>/br<idx>
1010+Date: March 2009
1011+Contact: J. R. Okajima <hooanon05g@gmail.com>
1012+Description:
1013+ It shows the abolute path of a member directory (which
1014+ is called branch) in aufs, and its permission.
1015+
1016+What: /sys/fs/aufs/si_<id>/brid<idx>
1017+Date: July 2013
1018+Contact: J. R. Okajima <hooanon05g@gmail.com>
1019+Description:
1020+ It shows the id of a member directory (which is called
1021+ branch) in aufs.
1022+
1023+What: /sys/fs/aufs/si_<id>/xi_path
1024+Date: March 2009
1025+Contact: J. R. Okajima <hooanon05g@gmail.com>
1026+Description:
1027+ It shows the abolute path of XINO (External Inode Number
1028+ Bitmap, Translation Table and Generation Table) file
1029+ even if it is the default path.
1030+ When the aufs mount option 'noxino' is specified, it
1031+ will be empty. About XINO files, see the aufs manual.
1032diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt linux/Documentation/filesystems/aufs/design/01intro.txt
1033--- /usr/share/empty/Documentation/filesystems/aufs/design/01intro.txt 1970-01-01 01:00:00.000000000 +0100
1034+++ linux/Documentation/filesystems/aufs/design/01intro.txt 2022-11-05 23:02:18.955889283 +0100
1035@@ -0,0 +1,171 @@
1036+
1037+# Copyright (C) 2005-2022 Junjiro R. Okajima
1038+#
1039+# This program is free software; you can redistribute it and/or modify
1040+# it under the terms of the GNU General Public License as published by
1041+# the Free Software Foundation; either version 2 of the License, or
1042+# (at your option) any later version.
1043+#
1044+# This program is distributed in the hope that it will be useful,
1045+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1046+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1047+# GNU General Public License for more details.
1048+#
1049+# You should have received a copy of the GNU General Public License
1050+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1051+
1052+Introduction
1053+----------------------------------------
1054+
1055+aufs [ei ju: ef es] | /ey-yoo-ef-es/ | [a u f s]
1056+1. abbrev. for "advanced multi-layered unification filesystem".
1057+2. abbrev. for "another unionfs".
1058+3. abbrev. for "auf das" in German which means "on the" in English.
1059+ Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
1060+ But "Filesystem aufs Filesystem" is hard to understand.
1061+4. abbrev. for "African Urban Fashion Show".
1062+
1063+AUFS is a filesystem with features:
1064+- multi layered stackable unification filesystem, the member directory
1065+ is called as a branch.
1066+- branch permission and attribute, 'readonly', 'real-readonly',
1067+ 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
1068+ combination.
1069+- internal "file copy-on-write".
1070+- logical deletion, whiteout.
1071+- dynamic branch manipulation, adding, deleting and changing permission.
1072+- allow bypassing aufs, user's direct branch access.
1073+- external inode number translation table and bitmap which maintains the
1074+ persistent aufs inode number.
1075+- seekable directory, including NFS readdir.
1076+- file mapping, mmap and sharing pages.
1077+- pseudo-link, hardlink over branches.
1078+- loopback mounted filesystem as a branch.
1079+- several policies to select one among multiple writable branches.
1080+- revert a single systemcall when an error occurs in aufs.
1081+- and more...
1082+
1083+
1084+Multi Layered Stackable Unification Filesystem
1085+----------------------------------------------------------------------
1086+Most people already knows what it is.
1087+It is a filesystem which unifies several directories and provides a
1088+merged single directory. When users access a file, the access will be
1089+passed/re-directed/converted (sorry, I am not sure which English word is
1090+correct) to the real file on the member filesystem. The member
1091+filesystem is called 'lower filesystem' or 'branch' and has a mode
1092+'readonly' and 'readwrite.' And the deletion for a file on the lower
1093+readonly branch is handled by creating 'whiteout' on the upper writable
1094+branch.
1095+
1096+On LKML, there have been discussions about UnionMount (Jan Blunck,
1097+Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
1098+different approaches to implement the merged-view.
1099+The former tries putting it into VFS, and the latter implements as a
1100+separate filesystem.
1101+(If I misunderstand about these implementations, please let me know and
1102+I shall correct it. Because it is a long time ago when I read their
1103+source files last time).
1104+
1105+UnionMount's approach will be able to small, but may be hard to share
1106+branches between several UnionMount since the whiteout in it is
1107+implemented in the inode on branch filesystem and always
1108+shared. According to Bharata's post, readdir does not seems to be
1109+finished yet.
1110+There are several missing features known in this implementations such as
1111+- for users, the inode number may change silently. eg. copy-up.
1112+- link(2) may break by copy-up.
1113+- read(2) may get an obsoleted filedata (fstat(2) too).
1114+- fcntl(F_SETLK) may be broken by copy-up.
1115+- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
1116+ open(O_RDWR).
1117+
1118+In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
1119+merged into mainline. This is another implementation of UnionMount as a
1120+separated filesystem. All the limitations and known problems which
1121+UnionMount are equally inherited to "overlay" filesystem.
1122+
1123+Unionfs has a longer history. When I started implementing a stackable
1124+filesystem (Aug 2005), it already existed. It has virtual super_block,
1125+inode, dentry and file objects and they have an array pointing lower
1126+same kind objects. After contributing many patches for Unionfs, I
1127+re-started my project AUFS (Jun 2006).
1128+
1129+In AUFS, the structure of filesystem resembles to Unionfs, but I
1130+implemented my own ideas, approaches and enhancements and it became
1131+totally different one.
1132+
1133+Comparing DM snapshot and fs based implementation
1134+- the number of bytes to be copied between devices is much smaller.
1135+- the type of filesystem must be one and only.
1136+- the fs must be writable, no readonly fs, even for the lower original
1137+ device. so the compression fs will not be usable. but if we use
1138+ loopback mount, we may address this issue.
1139+ for instance,
1140+ mount /cdrom/squashfs.img /sq
1141+ losetup /sq/ext2.img
1142+ losetup /somewhere/cow
1143+ dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
1144+- it will be difficult (or needs more operations) to extract the
1145+ difference between the original device and COW.
1146+- DM snapshot-merge may help a lot when users try merging. in the
1147+ fs-layer union, users will use rsync(1).
1148+
1149+You may want to read my old paper "Filesystems in LiveCD"
1150+(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
1151+
1152+
1153+Several characters/aspects/persona of aufs
1154+----------------------------------------------------------------------
1155+
1156+Aufs has several characters, aspects or persona.
1157+1. a filesystem, callee of VFS helper
1158+2. sub-VFS, caller of VFS helper for branches
1159+3. a virtual filesystem which maintains persistent inode number
1160+4. reader/writer of files on branches such like an application
1161+
1162+1. Callee of VFS Helper
1163+As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
1164+unlink(2) from an application reaches sys_unlink() kernel function and
1165+then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
1166+calls filesystem specific unlink operation. Actually aufs implements the
1167+unlink operation but it behaves like a redirector.
1168+
1169+2. Caller of VFS Helper for Branches
1170+aufs_unlink() passes the unlink request to the branch filesystem as if
1171+it were called from VFS. So the called unlink operation of the branch
1172+filesystem acts as usual. As a caller of VFS helper, aufs should handle
1173+every necessary pre/post operation for the branch filesystem.
1174+- acquire the lock for the parent dir on a branch
1175+- lookup in a branch
1176+- revalidate dentry on a branch
1177+- mnt_want_write() for a branch
1178+- vfs_unlink() for a branch
1179+- mnt_drop_write() for a branch
1180+- release the lock on a branch
1181+
1182+3. Persistent Inode Number
1183+One of the most important issue for a filesystem is to maintain inode
1184+numbers. This is particularly important to support exporting a
1185+filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
1186+backend block device for its own. But some storage is necessary to
1187+keep and maintain the inode numbers. It may be a large space and may not
1188+suit to keep in memory. Aufs rents some space from its first writable
1189+branch filesystem (by default) and creates file(s) on it. These files
1190+are created by aufs internally and removed soon (currently) keeping
1191+opened.
1192+Note: Because these files are removed, they are totally gone after
1193+ unmounting aufs. It means the inode numbers are not persistent
1194+ across unmount or reboot. I have a plan to make them really
1195+ persistent which will be important for aufs on NFS server.
1196+
1197+4. Read/Write Files Internally (copy-on-write)
1198+Because a branch can be readonly, when you write a file on it, aufs will
1199+"copy-up" it to the upper writable branch internally. And then write the
1200+originally requested thing to the file. Generally kernel doesn't
1201+open/read/write file actively. In aufs, even a single write may cause a
1202+internal "file copy". This behaviour is very similar to cp(1) command.
1203+
1204+Some people may think it is better to pass such work to user space
1205+helper, instead of doing in kernel space. Actually I am still thinking
1206+about it. But currently I have implemented it in kernel space.
1207diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt linux/Documentation/filesystems/aufs/design/02struct.txt
1208--- /usr/share/empty/Documentation/filesystems/aufs/design/02struct.txt 1970-01-01 01:00:00.000000000 +0100
1209+++ linux/Documentation/filesystems/aufs/design/02struct.txt 2022-11-05 23:02:18.955889283 +0100
1210@@ -0,0 +1,258 @@
1211+
1212+# Copyright (C) 2005-2022 Junjiro R. Okajima
1213+#
1214+# This program is free software; you can redistribute it and/or modify
1215+# it under the terms of the GNU General Public License as published by
1216+# the Free Software Foundation; either version 2 of the License, or
1217+# (at your option) any later version.
1218+#
1219+# This program is distributed in the hope that it will be useful,
1220+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1221+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1222+# GNU General Public License for more details.
1223+#
1224+# You should have received a copy of the GNU General Public License
1225+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1226+
1227+Basic Aufs Internal Structure
1228+
1229+Superblock/Inode/Dentry/File Objects
1230+----------------------------------------------------------------------
1231+As like an ordinary filesystem, aufs has its own
1232+superblock/inode/dentry/file objects. All these objects have a
1233+dynamically allocated array and store the same kind of pointers to the
1234+lower filesystem, branch.
1235+For example, when you build a union with one readwrite branch and one
1236+readonly, mounted /au, /rw and /ro respectively.
1237+- /au = /rw + /ro
1238+- /ro/fileA exists but /rw/fileA
1239+
1240+Aufs lookup operation finds /ro/fileA and gets dentry for that. These
1241+pointers are stored in a aufs dentry. The array in aufs dentry will be,
1242+- [0] = NULL (because /rw/fileA doesn't exist)
1243+- [1] = /ro/fileA
1244+
1245+This style of an array is essentially same to the aufs
1246+superblock/inode/dentry/file objects.
1247+
1248+Because aufs supports manipulating branches, ie. add/delete/change
1249+branches dynamically, these objects has its own generation. When
1250+branches are changed, the generation in aufs superblock is
1251+incremented. And a generation in other object are compared when it is
1252+accessed. When a generation in other objects are obsoleted, aufs
1253+refreshes the internal array.
1254+
1255+
1256+Superblock
1257+----------------------------------------------------------------------
1258+Additionally aufs superblock has some data for policies to select one
1259+among multiple writable branches, XIB files, pseudo-links and kobject.
1260+See below in detail.
1261+About the policies which supports copy-down a directory, see
1262+wbr_policy.txt too.
1263+
1264+
1265+Branch and XINO(External Inode Number Translation Table)
1266+----------------------------------------------------------------------
1267+Every branch has its own xino (external inode number translation table)
1268+file. The xino file is created and unlinked by aufs internally. When two
1269+members of a union exist on the same filesystem, they share the single
1270+xino file.
1271+The struct of a xino file is simple, just a sequence of aufs inode
1272+numbers which is indexed by the lower inode number.
1273+In the above sample, assume the inode number of /ro/fileA is i111 and
1274+aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
1275+4(8) bytes at 111 * 4(8) bytes offset in the xino file.
1276+
1277+When the inode numbers are not contiguous, the xino file will be sparse
1278+which has a hole in it and doesn't consume as much disk space as it
1279+might appear. If your branch filesystem consumes disk space for such
1280+holes, then you should specify 'xino=' option at mounting aufs.
1281+
1282+Aufs has a mount option to free the disk blocks for such holes in XINO
1283+files on tmpfs or ramdisk. But it is not so effective actually. If you
1284+meet a problem of disk shortage due to XINO files, then you should try
1285+"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
1286+The patch localizes the assignment inumbers per tmpfs-mount and avoid
1287+the holes in XINO files.
1288+
1289+Also a writable branch has three kinds of "whiteout bases". All these
1290+are existed when the branch is joined to aufs, and their names are
1291+whiteout-ed doubly, so that users will never see their names in aufs
1292+hierarchy.
1293+1. a regular file which will be hardlinked to all whiteouts.
1294+2. a directory to store a pseudo-link.
1295+3. a directory to store an "orphan"-ed file temporary.
1296+
1297+1. Whiteout Base
1298+ When you remove a file on a readonly branch, aufs handles it as a
1299+ logical deletion and creates a whiteout on the upper writable branch
1300+ as a hardlink of this file in order not to consume inode on the
1301+ writable branch.
1302+2. Pseudo-link Dir
1303+ See below, Pseudo-link.
1304+3. Step-Parent Dir
1305+ When "fileC" exists on the lower readonly branch only and it is
1306+ opened and removed with its parent dir, and then user writes
1307+ something into it, then aufs copies-up fileC to this
1308+ directory. Because there is no other dir to store fileC. After
1309+ creating a file under this dir, the file is unlinked.
1310+
1311+Because aufs supports manipulating branches, ie. add/delete/change
1312+dynamically, a branch has its own id. When the branch order changes,
1313+aufs finds the new index by searching the branch id.
1314+
1315+
1316+Pseudo-link
1317+----------------------------------------------------------------------
1318+Assume "fileA" exists on the lower readonly branch only and it is
1319+hardlinked to "fileB" on the branch. When you write something to fileA,
1320+aufs copies-up it to the upper writable branch. Additionally aufs
1321+creates a hardlink under the Pseudo-link Directory of the writable
1322+branch. The inode of a pseudo-link is kept in aufs super_block as a
1323+simple list. If fileB is read after unlinking fileA, aufs returns
1324+filedata from the pseudo-link instead of the lower readonly
1325+branch. Because the pseudo-link is based upon the inode, to keep the
1326+inode number by xino (see above) is essentially necessary.
1327+
1328+All the hardlinks under the Pseudo-link Directory of the writable branch
1329+should be restored in a proper location later. Aufs provides a utility
1330+to do this. The userspace helpers executed at remounting and unmounting
1331+aufs by default.
1332+During this utility is running, it puts aufs into the pseudo-link
1333+maintenance mode. In this mode, only the process which began the
1334+maintenance mode (and its child processes) is allowed to operate in
1335+aufs. Some other processes which are not related to the pseudo-link will
1336+be allowed to run too, but the rest have to return an error or wait
1337+until the maintenance mode ends. If a process already acquires an inode
1338+mutex (in VFS), it has to return an error.
1339+
1340+
1341+XIB(external inode number bitmap)
1342+----------------------------------------------------------------------
1343+Addition to the xino file per a branch, aufs has an external inode number
1344+bitmap in a superblock object. It is also an internal file such like a
1345+xino file.
1346+It is a simple bitmap to mark whether the aufs inode number is in-use or
1347+not.
1348+To reduce the file I/O, aufs prepares a single memory page to cache xib.
1349+
1350+As well as XINO files, aufs has a feature to truncate/refresh XIB to
1351+reduce the number of consumed disk blocks for these files.
1352+
1353+
1354+Virtual or Vertical Dir, and Readdir in Userspace
1355+----------------------------------------------------------------------
1356+In order to support multiple layers (branches), aufs readdir operation
1357+constructs a virtual dir block on memory. For readdir, aufs calls
1358+vfs_readdir() internally for each dir on branches, merges their entries
1359+with eliminating the whiteout-ed ones, and sets it to file (dir)
1360+object. So the file object has its entry list until it is closed. The
1361+entry list will be updated when the file position is zero and becomes
1362+obsoleted. This decision is made in aufs automatically.
1363+
1364+The dynamically allocated memory block for the name of entries has a
1365+unit of 512 bytes (by default) and stores the names contiguously (no
1366+padding). Another block for each entry is handled by kmem_cache too.
1367+During building dir blocks, aufs creates hash list and judging whether
1368+the entry is whiteouted by its upper branch or already listed.
1369+The merged result is cached in the corresponding inode object and
1370+maintained by a customizable life-time option.
1371+
1372+Some people may call it can be a security hole or invite DoS attack
1373+since the opened and once readdir-ed dir (file object) holds its entry
1374+list and becomes a pressure for system memory. But I'd say it is similar
1375+to files under /proc or /sys. The virtual files in them also holds a
1376+memory page (generally) while they are opened. When an idea to reduce
1377+memory for them is introduced, it will be applied to aufs too.
1378+For those who really hate this situation, I've developed readdir(3)
1379+library which operates this merging in userspace. You just need to set
1380+LD_PRELOAD environment variable, and aufs will not consume no memory in
1381+kernel space for readdir(3).
1382+
1383+
1384+Workqueue
1385+----------------------------------------------------------------------
1386+Aufs sometimes requires privilege access to a branch. For instance,
1387+in copy-up/down operation. When a user process is going to make changes
1388+to a file which exists in the lower readonly branch only, and the mode
1389+of one of ancestor directories may not be writable by a user
1390+process. Here aufs copy-up the file with its ancestors and they may
1391+require privilege to set its owner/group/mode/etc.
1392+This is a typical case of a application character of aufs (see
1393+Introduction).
1394+
1395+Aufs uses workqueue synchronously for this case. It creates its own
1396+workqueue. The workqueue is a kernel thread and has privilege. Aufs
1397+passes the request to call mkdir or write (for example), and wait for
1398+its completion. This approach solves a problem of a signal handler
1399+simply.
1400+If aufs didn't adopt the workqueue and changed the privilege of the
1401+process, then the process may receive the unexpected SIGXFSZ or other
1402+signals.
1403+
1404+Also aufs uses the system global workqueue ("events" kernel thread) too
1405+for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
1406+whiteout base and etc. This is unrelated to a privilege.
1407+Most of aufs operation tries acquiring a rw_semaphore for aufs
1408+superblock at the beginning, at the same time waits for the completion
1409+of all queued asynchronous tasks.
1410+
1411+
1412+Whiteout
1413+----------------------------------------------------------------------
1414+The whiteout in aufs is very similar to Unionfs's. That is represented
1415+by its filename. UnionMount takes an approach of a file mode, but I am
1416+afraid several utilities (find(1) or something) will have to support it.
1417+
1418+Basically the whiteout represents "logical deletion" which stops aufs to
1419+lookup further, but also it represents "dir is opaque" which also stop
1420+further lookup.
1421+
1422+In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
1423+In order to make several functions in a single systemcall to be
1424+revertible, aufs adopts an approach to rename a directory to a temporary
1425+unique whiteouted name.
1426+For example, in rename(2) dir where the target dir already existed, aufs
1427+renames the target dir to a temporary unique whiteouted name before the
1428+actual rename on a branch, and then handles other actions (make it opaque,
1429+update the attributes, etc). If an error happens in these actions, aufs
1430+simply renames the whiteouted name back and returns an error. If all are
1431+succeeded, aufs registers a function to remove the whiteouted unique
1432+temporary name completely and asynchronously to the system global
1433+workqueue.
1434+
1435+
1436+Copy-up
1437+----------------------------------------------------------------------
1438+It is a well-known feature or concept.
1439+When user modifies a file on a readonly branch, aufs operate "copy-up"
1440+internally and makes change to the new file on the upper writable branch.
1441+When the trigger systemcall does not update the timestamps of the parent
1442+dir, aufs reverts it after copy-up.
1443+
1444+
1445+Move-down (aufs3.9 and later)
1446+----------------------------------------------------------------------
1447+"Copy-up" is one of the essential feature in aufs. It copies a file from
1448+the lower readonly branch to the upper writable branch when a user
1449+changes something about the file.
1450+"Move-down" is an opposite action of copy-up. Basically this action is
1451+ran manually instead of automatically and internally.
1452+For desgin and implementation, aufs has to consider these issues.
1453+- whiteout for the file may exist on the lower branch.
1454+- ancestor directories may not exist on the lower branch.
1455+- diropq for the ancestor directories may exist on the upper branch.
1456+- free space on the lower branch will reduce.
1457+- another access to the file may happen during moving-down, including
1458+ UDBA (see "Revalidate Dentry and UDBA").
1459+- the file should not be hard-linked nor pseudo-linked. they should be
1460+ handled by auplink utility later.
1461+
1462+Sometimes users want to move-down a file from the upper writable branch
1463+to the lower readonly or writable branch. For instance,
1464+- the free space of the upper writable branch is going to run out.
1465+- create a new intermediate branch between the upper and lower branch.
1466+- etc.
1467+
1468+For this purpose, use "aumvdown" command in aufs-util.git.
1469diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt linux/Documentation/filesystems/aufs/design/03atomic_open.txt
1470--- /usr/share/empty/Documentation/filesystems/aufs/design/03atomic_open.txt 1970-01-01 01:00:00.000000000 +0100
1471+++ linux/Documentation/filesystems/aufs/design/03atomic_open.txt 2022-11-05 23:02:18.955889283 +0100
1472@@ -0,0 +1,85 @@
1473+
1474+# Copyright (C) 2015-2022 Junjiro R. Okajima
1475+#
1476+# This program is free software; you can redistribute it and/or modify
1477+# it under the terms of the GNU General Public License as published by
1478+# the Free Software Foundation; either version 2 of the License, or
1479+# (at your option) any later version.
1480+#
1481+# This program is distributed in the hope that it will be useful,
1482+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1483+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1484+# GNU General Public License for more details.
1485+#
1486+# You should have received a copy of the GNU General Public License
1487+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1488+
1489+Support for a branch who has its ->atomic_open()
1490+----------------------------------------------------------------------
1491+The filesystems who implement its ->atomic_open() are not majority. For
1492+example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
1493+particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
1494+->atomic_open(), NFSv4 returns an error for this open(2). While I am not
1495+sure whether all filesystems who have ->atomic_open() behave like this,
1496+but NFSv4 surely returns the error.
1497+
1498+In order to support ->atomic_open() for aufs, there are a few
1499+approaches.
1500+
1501+A. Introduce aufs_atomic_open()
1502+ - calls one of VFS:do_last(), lookup_open() or atomic_open() for
1503+ branch fs.
1504+B. Introduce aufs_atomic_open() calling create, open and chmod. this is
1505+ an aufs user Pip Cet's approach
1506+ - calls aufs_create(), VFS finish_open() and notify_change().
1507+ - pass fake-mode to finish_open(), and then correct the mode by
1508+ notify_change().
1509+C. Extend aufs_open() to call branch fs's ->atomic_open()
1510+ - no aufs_atomic_open().
1511+ - aufs_lookup() registers the TID to an aufs internal object.
1512+ - aufs_create() does nothing when the matching TID is registered, but
1513+ registers the mode.
1514+ - aufs_open() calls branch fs's ->atomic_open() when the matching
1515+ TID is registered.
1516+D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
1517+ credential
1518+ - no aufs_atomic_open().
1519+ - aufs_create() registers the TID to an internal object. this info
1520+ represents "this process created this file just now."
1521+ - when aufs gets EACCES from branch fs's ->open(), then confirm the
1522+ registered TID and re-try open() with superuser's credential.
1523+
1524+Pros and cons for each approach.
1525+
1526+A.
1527+ - straightforward but highly depends upon VFS internal.
1528+ - the atomic behavaiour is kept.
1529+ - some of parameters such as nameidata are hard to reproduce for
1530+ branch fs.
1531+ - large overhead.
1532+B.
1533+ - easy to implement.
1534+ - the atomic behavaiour is lost.
1535+C.
1536+ - the atomic behavaiour is kept.
1537+ - dirty and tricky.
1538+ - VFS checks whether the file is created correctly after calling
1539+ ->create(), which means this approach doesn't work.
1540+D.
1541+ - easy to implement.
1542+ - the atomic behavaiour is lost.
1543+ - to open a file with superuser's credential and give it to a user
1544+ process is a bad idea, since the file object keeps the credential
1545+ in it. It may affect LSM or something. This approach doesn't work
1546+ either.
1547+
1548+The approach A is ideal, but it hard to implement. So here is a
1549+variation of A, which is to be implemented.
1550+
1551+A-1. Introduce aufs_atomic_open()
1552+ - calls branch fs ->atomic_open() if exists. otherwise calls
1553+ vfs_create() and finish_open().
1554+ - the demerit is that the several checks after branch fs
1555+ ->atomic_open() are lost. in the ordinary case, the checks are
1556+ done by VFS:do_last(), lookup_open() and atomic_open(). some can
1557+ be implemented in aufs, but not all I am afraid.
1558diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt linux/Documentation/filesystems/aufs/design/03lookup.txt
1559--- /usr/share/empty/Documentation/filesystems/aufs/design/03lookup.txt 1970-01-01 01:00:00.000000000 +0100
1560+++ linux/Documentation/filesystems/aufs/design/03lookup.txt 2022-11-05 23:02:18.959222617 +0100
1561@@ -0,0 +1,113 @@
1562+
1563+# Copyright (C) 2005-2022 Junjiro R. Okajima
1564+#
1565+# This program is free software; you can redistribute it and/or modify
1566+# it under the terms of the GNU General Public License as published by
1567+# the Free Software Foundation; either version 2 of the License, or
1568+# (at your option) any later version.
1569+#
1570+# This program is distributed in the hope that it will be useful,
1571+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1572+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1573+# GNU General Public License for more details.
1574+#
1575+# You should have received a copy of the GNU General Public License
1576+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1577+
1578+Lookup in a Branch
1579+----------------------------------------------------------------------
1580+Since aufs has a character of sub-VFS (see Introduction), it operates
1581+lookup for branches as VFS does. It may be a heavy work. But almost all
1582+lookup operation in aufs is the simplest case, ie. lookup only an entry
1583+directly connected to its parent. Digging down the directory hierarchy
1584+is unnecessary. VFS has a function lookup_one_len() for that use, and
1585+aufs calls it.
1586+
1587+When a branch is a remote filesystem, aufs basically relies upon its
1588+->d_revalidate(), also aufs forces the hardest revalidate tests for
1589+them.
1590+For d_revalidate, aufs implements three levels of revalidate tests. See
1591+"Revalidate Dentry and UDBA" in detail.
1592+
1593+
1594+Test Only the Highest One for the Directory Permission (dirperm1 option)
1595+----------------------------------------------------------------------
1596+Let's try case study.
1597+- aufs has two branches, upper readwrite and lower readonly.
1598+ /au = /rw + /ro
1599+- "dirA" exists under /ro, but /rw. and its mode is 0700.
1600+- user invoked "chmod a+rx /au/dirA"
1601+- the internal copy-up is activated and "/rw/dirA" is created and its
1602+ permission bits are set to world readable.
1603+- then "/au/dirA" becomes world readable?
1604+
1605+In this case, /ro/dirA is still 0700 since it exists in readonly branch,
1606+or it may be a natively readonly filesystem. If aufs respects the lower
1607+branch, it should not respond readdir request from other users. But user
1608+allowed it by chmod. Should really aufs rejects showing the entries
1609+under /ro/dirA?
1610+
1611+To be honest, I don't have a good solution for this case. So aufs
1612+implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
1613+users.
1614+When dirperm1 is specified, aufs checks only the highest one for the
1615+directory permission, and shows the entries. Otherwise, as usual, checks
1616+every dir existing on all branches and rejects the request.
1617+
1618+As a side effect, dirperm1 option improves the performance of aufs
1619+because the number of permission check is reduced when the number of
1620+branch is many.
1621+
1622+
1623+Revalidate Dentry and UDBA (User's Direct Branch Access)
1624+----------------------------------------------------------------------
1625+Generally VFS helpers re-validate a dentry as a part of lookup.
1626+0. digging down the directory hierarchy.
1627+1. lock the parent dir by its i_mutex.
1628+2. lookup the final (child) entry.
1629+3. revalidate it.
1630+4. call the actual operation (create, unlink, etc.)
1631+5. unlock the parent dir
1632+
1633+If the filesystem implements its ->d_revalidate() (step 3), then it is
1634+called. Actually aufs implements it and checks the dentry on a branch is
1635+still valid.
1636+But it is not enough. Because aufs has to release the lock for the
1637+parent dir on a branch at the end of ->lookup() (step 2) and
1638+->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
1639+held by VFS.
1640+If the file on a branch is changed directly, eg. bypassing aufs, after
1641+aufs released the lock, then the subsequent operation may cause
1642+something unpleasant result.
1643+
1644+This situation is a result of VFS architecture, ->lookup() and
1645+->d_revalidate() is separated. But I never say it is wrong. It is a good
1646+design from VFS's point of view. It is just not suitable for sub-VFS
1647+character in aufs.
1648+
1649+Aufs supports such case by three level of revalidation which is
1650+selectable by user.
1651+1. Simple Revalidate
1652+ Addition to the native flow in VFS's, confirm the child-parent
1653+ relationship on the branch just after locking the parent dir on the
1654+ branch in the "actual operation" (step 4). When this validation
1655+ fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
1656+ checks the validation of the dentry on branches.
1657+2. Monitor Changes Internally by Inotify/Fsnotify
1658+ Addition to above, in the "actual operation" (step 4) aufs re-lookup
1659+ the dentry on the branch, and returns EBUSY if it finds different
1660+ dentry.
1661+ Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
1662+ during it is in cache. When the event is notified, aufs registers a
1663+ function to kernel 'events' thread by schedule_work(). And the
1664+ function sets some special status to the cached aufs dentry and inode
1665+ private data. If they are not cached, then aufs has nothing to
1666+ do. When the same file is accessed through aufs (step 0-3) later,
1667+ aufs will detect the status and refresh all necessary data.
1668+ In this mode, aufs has to ignore the event which is fired by aufs
1669+ itself.
1670+3. No Extra Validation
1671+ This is the simplest test and doesn't add any additional revalidation
1672+ test, and skip the revalidation in step 4. It is useful and improves
1673+ aufs performance when system surely hide the aufs branches from user,
1674+ by over-mounting something (or another method).
1675diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt linux/Documentation/filesystems/aufs/design/04branch.txt
1676--- /usr/share/empty/Documentation/filesystems/aufs/design/04branch.txt 1970-01-01 01:00:00.000000000 +0100
1677+++ linux/Documentation/filesystems/aufs/design/04branch.txt 2022-11-05 23:02:18.959222617 +0100
1678@@ -0,0 +1,74 @@
1679+
1680+# Copyright (C) 2005-2022 Junjiro R. Okajima
1681+#
1682+# This program is free software; you can redistribute it and/or modify
1683+# it under the terms of the GNU General Public License as published by
1684+# the Free Software Foundation; either version 2 of the License, or
1685+# (at your option) any later version.
1686+#
1687+# This program is distributed in the hope that it will be useful,
1688+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1689+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1690+# GNU General Public License for more details.
1691+#
1692+# You should have received a copy of the GNU General Public License
1693+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1694+
1695+Branch Manipulation
1696+
1697+Since aufs supports dynamic branch manipulation, ie. add/remove a branch
1698+and changing its permission/attribute, there are a lot of works to do.
1699+
1700+
1701+Add a Branch
1702+----------------------------------------------------------------------
1703+o Confirm the adding dir exists outside of aufs, including loopback
1704+ mount, and its various attributes.
1705+o Initialize the xino file and whiteout bases if necessary.
1706+ See struct.txt.
1707+
1708+o Check the owner/group/mode of the directory
1709+ When the owner/group/mode of the adding directory differs from the
1710+ existing branch, aufs issues a warning because it may impose a
1711+ security risk.
1712+ For example, when a upper writable branch has a world writable empty
1713+ top directory, a malicious user can create any files on the writable
1714+ branch directly, like copy-up and modify manually. If something like
1715+ /etc/{passwd,shadow} exists on the lower readonly branch but the upper
1716+ writable branch, and the writable branch is world-writable, then a
1717+ malicious guy may create /etc/passwd on the writable branch directly
1718+ and the infected file will be valid in aufs.
1719+ I am afraid it can be a security issue, but aufs can do nothing except
1720+ producing a warning.
1721+
1722+
1723+Delete a Branch
1724+----------------------------------------------------------------------
1725+o Confirm the deleting branch is not busy
1726+ To be general, there is one merit to adopt "remount" interface to
1727+ manipulate branches. It is to discard caches. At deleting a branch,
1728+ aufs checks the still cached (and connected) dentries and inodes. If
1729+ there are any, then they are all in-use. An inode without its
1730+ corresponding dentry can be alive alone (for example, inotify/fsnotify case).
1731+
1732+ For the cached one, aufs checks whether the same named entry exists on
1733+ other branches.
1734+ If the cached one is a directory, because aufs provides a merged view
1735+ to users, as long as one dir is left on any branch aufs can show the
1736+ dir to users. In this case, the branch can be removed from aufs.
1737+ Otherwise aufs rejects deleting the branch.
1738+
1739+ If any file on the deleting branch is opened by aufs, then aufs
1740+ rejects deleting.
1741+
1742+
1743+Modify the Permission of a Branch
1744+----------------------------------------------------------------------
1745+o Re-initialize or remove the xino file and whiteout bases if necessary.
1746+ See struct.txt.
1747+
1748+o rw --> ro: Confirm the modifying branch is not busy
1749+ Aufs rejects the request if any of these conditions are true.
1750+ - a file on the branch is mmap-ed.
1751+ - a regular file on the branch is opened for write and there is no
1752+ same named entry on the upper branch.
1753diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt linux/Documentation/filesystems/aufs/design/05wbr_policy.txt
1754--- /usr/share/empty/Documentation/filesystems/aufs/design/05wbr_policy.txt 1970-01-01 01:00:00.000000000 +0100
1755+++ linux/Documentation/filesystems/aufs/design/05wbr_policy.txt 2022-11-05 23:02:18.959222617 +0100
1756@@ -0,0 +1,64 @@
1757+
1758+# Copyright (C) 2005-2022 Junjiro R. Okajima
1759+#
1760+# This program is free software; you can redistribute it and/or modify
1761+# it under the terms of the GNU General Public License as published by
1762+# the Free Software Foundation; either version 2 of the License, or
1763+# (at your option) any later version.
1764+#
1765+# This program is distributed in the hope that it will be useful,
1766+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1767+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1768+# GNU General Public License for more details.
1769+#
1770+# You should have received a copy of the GNU General Public License
1771+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1772+
1773+Policies to Select One among Multiple Writable Branches
1774+----------------------------------------------------------------------
1775+When the number of writable branch is more than one, aufs has to decide
1776+the target branch for file creation or copy-up. By default, the highest
1777+writable branch which has the parent (or ancestor) dir of the target
1778+file is chosen (top-down-parent policy).
1779+By user's request, aufs implements some other policies to select the
1780+writable branch, for file creation several policies, round-robin,
1781+most-free-space, and other policies. For copy-up, top-down-parent,
1782+bottom-up-parent, bottom-up and others.
1783+
1784+As expected, the round-robin policy selects the branch in circular. When
1785+you have two writable branches and creates 10 new files, 5 files will be
1786+created for each branch. mkdir(2) systemcall is an exception. When you
1787+create 10 new directories, all will be created on the same branch.
1788+And the most-free-space policy selects the one which has most free
1789+space among the writable branches. The amount of free space will be
1790+checked by aufs internally, and users can specify its time interval.
1791+
1792+The policies for copy-up is more simple,
1793+top-down-parent is equivalent to the same named on in create policy,
1794+bottom-up-parent selects the writable branch where the parent dir
1795+exists and the nearest upper one from the copyup-source,
1796+bottom-up selects the nearest upper writable branch from the
1797+copyup-source, regardless the existence of the parent dir.
1798+
1799+There are some rules or exceptions to apply these policies.
1800+- If there is a readonly branch above the policy-selected branch and
1801+ the parent dir is marked as opaque (a variation of whiteout), or the
1802+ target (creating) file is whiteout-ed on the upper readonly branch,
1803+ then the result of the policy is ignored and the target file will be
1804+ created on the nearest upper writable branch than the readonly branch.
1805+- If there is a writable branch above the policy-selected branch and
1806+ the parent dir is marked as opaque or the target file is whiteouted
1807+ on the branch, then the result of the policy is ignored and the target
1808+ file will be created on the highest one among the upper writable
1809+ branches who has diropq or whiteout. In case of whiteout, aufs removes
1810+ it as usual.
1811+- link(2) and rename(2) systemcalls are exceptions in every policy.
1812+ They try selecting the branch where the source exists as possible
1813+ since copyup a large file will take long time. If it can't be,
1814+ ie. the branch where the source exists is readonly, then they will
1815+ follow the copyup policy.
1816+- There is an exception for rename(2) when the target exists.
1817+ If the rename target exists, aufs compares the index of the branches
1818+ where the source and the target exists and selects the higher
1819+ one. If the selected branch is readonly, then aufs follows the
1820+ copyup policy.
1821diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06dirren.dot linux/Documentation/filesystems/aufs/design/06dirren.dot
1822--- /usr/share/empty/Documentation/filesystems/aufs/design/06dirren.dot 1970-01-01 01:00:00.000000000 +0100
1823+++ linux/Documentation/filesystems/aufs/design/06dirren.dot 2022-11-05 23:02:18.959222617 +0100
1824@@ -0,0 +1,44 @@
1825+
1826+// to view this graph, run dot(1) command in GRAPHVIZ.
1827+//
1828+// This program is free software; you can redistribute it and/or modify
1829+// it under the terms of the GNU General Public License as published by
1830+// the Free Software Foundation; either version 2 of the License, or
1831+// (at your option) any later version.
1832+//
1833+// This program is distributed in the hope that it will be useful,
1834+// but WITHOUT ANY WARRANTY; without even the implied warranty of
1835+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1836+// GNU General Public License for more details.
1837+//
1838+// You should have received a copy of the GNU General Public License
1839+// along with this program. If not, see <http://www.gnu.org/licenses/>.
1840+
1841+digraph G {
1842+node [shape=box];
1843+whinfo [label="detailed info file\n(lower_brid_root-hinum, h_inum, namelen, old name)"];
1844+
1845+node [shape=oval];
1846+
1847+aufs_rename -> whinfo [label="store/remove"];
1848+
1849+node [shape=oval];
1850+inode_list [label="h_inum list in branch\ncache"];
1851+
1852+node [shape=box];
1853+whinode [label="h_inum list file"];
1854+
1855+node [shape=oval];
1856+brmgmt [label="br_add/del/mod/umount"];
1857+
1858+brmgmt -> inode_list [label="create/remove"];
1859+brmgmt -> whinode [label="load/store"];
1860+
1861+inode_list -> whinode [style=dashed,dir=both];
1862+
1863+aufs_rename -> inode_list [label="add/del"];
1864+
1865+aufs_lookup -> inode_list [label="search"];
1866+
1867+aufs_lookup -> whinfo [label="load/remove"];
1868+}
1869diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06dirren.txt linux/Documentation/filesystems/aufs/design/06dirren.txt
1870--- /usr/share/empty/Documentation/filesystems/aufs/design/06dirren.txt 1970-01-01 01:00:00.000000000 +0100
1871+++ linux/Documentation/filesystems/aufs/design/06dirren.txt 2022-11-05 23:02:18.959222617 +0100
1872@@ -0,0 +1,102 @@
1873+
1874+# Copyright (C) 2017-2022 Junjiro R. Okajima
1875+#
1876+# This program is free software; you can redistribute it and/or modify
1877+# it under the terms of the GNU General Public License as published by
1878+# the Free Software Foundation; either version 2 of the License, or
1879+# (at your option) any later version.
1880+#
1881+# This program is distributed in the hope that it will be useful,
1882+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1883+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1884+# GNU General Public License for more details.
1885+#
1886+# You should have received a copy of the GNU General Public License
1887+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1888+
1889+Special handling for renaming a directory (DIRREN)
1890+----------------------------------------------------------------------
1891+First, let's assume we have a simple usecase.
1892+
1893+- /u = /rw + /ro
1894+- /rw/dirA exists
1895+- /ro/dirA and /ro/dirA/file exist too
1896+- there is no dirB on both branches
1897+- a user issues rename("dirA", "dirB")
1898+
1899+Now, what should aufs behave against this rename(2)?
1900+There are a few possible cases.
1901+
1902+A. returns EROFS.
1903+ since dirA exists on a readonly branch which cannot be renamed.
1904+B. returns EXDEV.
1905+ it is possible to copy-up dirA (only the dir itself), but the child
1906+ entries ("file" in this case) should not be. it must be a bad
1907+ approach to copy-up recursively.
1908+C. returns a success.
1909+ even the branch /ro is readonly, aufs tries renaming it. Obviously it
1910+ is a violation of aufs' policy.
1911+D. construct an extra information which indicates that /ro/dirA should
1912+ be handled as the name of dirB.
1913+ overlayfs has a similar feature called REDIRECT.
1914+
1915+Until now, aufs implements the case B only which returns EXDEV, and
1916+expects the userspace application behaves like mv(1) which tries
1917+issueing rename(2) recursively.
1918+
1919+A new aufs feature called DIRREN is introduced which implements the case
1920+D. There are several "extra information" added.
1921+
1922+1. detailed info per renamed directory
1923+ path: /rw/dirB/$AUFS_WH_DR_INFO_PFX.<lower branch-id>
1924+2. the inode-number list of directories on a branch
1925+ path: /rw/dirB/$AUFS_WH_DR_BRHINO
1926+
1927+The filename of "detailed info per directory" represents the lower
1928+branch, and its format is
1929+- a type of the branch id
1930+ one of these.
1931+ + uuid (not implemented yet)
1932+ + fsid
1933+ + dev
1934+- the inode-number of the branch root dir
1935+
1936+And it contains these info in a single regular file.
1937+- magic number
1938+- branch's inode-number of the logically renamed dir
1939+- the name of the before-renamed dir
1940+
1941+The "detailed info per directory" file is created in aufs rename(2), and
1942+loaded in any lookup.
1943+The info is considered in lookup for the matching case only. Here
1944+"matching" means that the root of branch (in the info filename) is same
1945+to the current looking-up branch. After looking-up the before-renamed
1946+name, the inode-number is compared. And the matched dentry is used.
1947+
1948+The "inode-number list of directories" is a regular file which contains
1949+simply the inode-numbers on the branch. The file is created or updated
1950+in removing the branch, and loaded in adding the branch. Its lifetime is
1951+equal to the branch.
1952+The list is referred in lookup, and when the current target inode is
1953+found in the list, the aufs tries loading the "detailed info per
1954+directory" and get the changed and valid name of the dir.
1955+
1956+Theoretically these "extra informaiton" may be able to be put into XATTR
1957+in the dir inode. But aufs doesn't choose this way because
1958+1. XATTR may not be supported by the branch (or its configuration)
1959+2. XATTR may have its size limit.
1960+3. XATTR may be less easy to convert than a regular file, when the
1961+ format of the info is changed in the future.
1962+At the same time, I agree that the regular file approach is much slower
1963+than XATTR approach. So, in the future, aufs may take the XATTR or other
1964+better approach.
1965+
1966+This DIRREN feature is enabled by aufs configuration, and is activated
1967+by a new mount option.
1968+
1969+For the more complicated case, there is a work with UDBA option, which
1970+is to dected the direct access to the branches (by-passing aufs) and to
1971+maintain the cashes in aufs. Since a single cached aufs dentry may
1972+contains two names, before- and after-rename, the name comparision in
1973+UDBA handler may not work correctly. In this case, the behaviour will be
1974+equivalen to udba=reval case.
1975diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt linux/Documentation/filesystems/aufs/design/06fhsm.txt
1976--- /usr/share/empty/Documentation/filesystems/aufs/design/06fhsm.txt 1970-01-01 01:00:00.000000000 +0100
1977+++ linux/Documentation/filesystems/aufs/design/06fhsm.txt 2022-11-05 23:02:18.959222617 +0100
1978@@ -0,0 +1,118 @@
1979+
1980+# Copyright (C) 2011-2022 Junjiro R. Okajima
1981+#
1982+# This program is free software; you can redistribute it and/or modify
1983+# it under the terms of the GNU General Public License as published by
1984+# the Free Software Foundation; either version 2 of the License, or
1985+# (at your option) any later version.
1986+#
1987+# This program is distributed in the hope that it will be useful,
1988+# but WITHOUT ANY WARRANTY; without even the implied warranty of
1989+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1990+# GNU General Public License for more details.
1991+#
1992+# You should have received a copy of the GNU General Public License
1993+# along with this program. If not, see <http://www.gnu.org/licenses/>.
1994+
1995+File-based Hierarchical Storage Management (FHSM)
1996+----------------------------------------------------------------------
1997+Hierarchical Storage Management (or HSM) is a well-known feature in the
1998+storage world. Aufs provides this feature as file-based with multiple
1999+writable branches, based upon the principle of "Colder, the Lower".
2000+Here the word "colder" means that the less used files, and "lower" means
2001+that the position in the order of the stacked branches vertically.
2002+These multiple writable branches are prioritized, ie. the topmost one
2003+should be the fastest drive and be used heavily.
2004+
2005+o Characters in aufs FHSM story
2006+- aufs itself and a new branch attribute.
2007+- a new ioctl interface to move-down and to establish a connection with
2008+ the daemon ("move-down" is a converse of "copy-up").
2009+- userspace tool and daemon.
2010+
2011+The userspace daemon establishes a connection with aufs and waits for
2012+the notification. The notified information is very similar to struct
2013+statfs containing the number of consumed blocks and inodes.
2014+When the consumed blocks/inodes of a branch exceeds the user-specified
2015+upper watermark, the daemon activates its move-down process until the
2016+consumed blocks/inodes reaches the user-specified lower watermark.
2017+
2018+The actual move-down is done by aufs based upon the request from
2019+user-space since we need to maintain the inode number and the internal
2020+pointer arrays in aufs.
2021+
2022+Currently aufs FHSM handles the regular files only. Additionally they
2023+must not be hard-linked nor pseudo-linked.
2024+
2025+
2026+o Cowork of aufs and the user-space daemon
2027+ During the userspace daemon established the connection, aufs sends a
2028+ small notification to it whenever aufs writes something into the
2029+ writable branch. But it may cost high since aufs issues statfs(2)
2030+ internally. So user can specify a new option to cache the
2031+ info. Actually the notification is controlled by these factors.
2032+ + the specified cache time.
2033+ + classified as "force" by aufs internally.
2034+ Until the specified time expires, aufs doesn't send the info
2035+ except the forced cases. When aufs decide forcing, the info is always
2036+ notified to userspace.
2037+ For example, the number of free inodes is generally large enough and
2038+ the shortage of it happens rarely. So aufs doesn't force the
2039+ notification when creating a new file, directory and others. This is
2040+ the typical case which aufs doesn't force.
2041+ When aufs writes the actual filedata and the files consumes any of new
2042+ blocks, the aufs forces notifying.
2043+
2044+
2045+o Interfaces in aufs
2046+- New branch attribute.
2047+ + fhsm
2048+ Specifies that the branch is managed by FHSM feature. In other word,
2049+ participant in the FHSM.
2050+ When nofhsm is set to the branch, it will not be the source/target
2051+ branch of the move-down operation. This attribute is set
2052+ independently from coo and moo attributes, and if you want full
2053+ FHSM, you should specify them as well.
2054+- New mount option.
2055+ + fhsm_sec
2056+ Specifies a second to suppress many less important info to be
2057+ notified.
2058+- New ioctl.
2059+ + AUFS_CTL_FHSM_FD
2060+ create a new file descriptor which userspace can read the notification
2061+ (a subset of struct statfs) from aufs.
2062+- Module parameter 'brs'
2063+ It has to be set to 1. Otherwise the new mount option 'fhsm' will not
2064+ be set.
2065+- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
2066+ When there are two or more branches with fhsm attributes,
2067+ /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
2068+ terminates it. As a result of remounting and branch-manipulation, the
2069+ number of branches with fhsm attribute can be one. In this case,
2070+ /sbin/mount.aufs will terminate the user-space daemon.
2071+
2072+
2073+Finally the operation is done as these steps in kernel-space.
2074+- make sure that,
2075+ + no one else is using the file.
2076+ + the file is not hard-linked.
2077+ + the file is not pseudo-linked.
2078+ + the file is a regular file.
2079+ + the parent dir is not opaqued.
2080+- find the target writable branch.
2081+- make sure the file is not whiteout-ed by the upper (than the target)
2082+ branch.
2083+- make the parent dir on the target branch.
2084+- mutex lock the inode on the branch.
2085+- unlink the whiteout on the target branch (if exists).
2086+- lookup and create the whiteout-ed temporary name on the target branch.
2087+- copy the file as the whiteout-ed temporary name on the target branch.
2088+- rename the whiteout-ed temporary name to the original name.
2089+- unlink the file on the source branch.
2090+- maintain the internal pointer array and the external inode number
2091+ table (XINO).
2092+- maintain the timestamps and other attributes of the parent dir and the
2093+ file.
2094+
2095+And of course, in every step, an error may happen. So the operation
2096+should restore the original file state after an error happens.
2097diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt linux/Documentation/filesystems/aufs/design/06mmap.txt
2098--- /usr/share/empty/Documentation/filesystems/aufs/design/06mmap.txt 1970-01-01 01:00:00.000000000 +0100
2099+++ linux/Documentation/filesystems/aufs/design/06mmap.txt 2022-11-05 23:02:18.959222617 +0100
2100@@ -0,0 +1,72 @@
2101+
2102+# Copyright (C) 2005-2022 Junjiro R. Okajima
2103+#
2104+# This program is free software; you can redistribute it and/or modify
2105+# it under the terms of the GNU General Public License as published by
2106+# the Free Software Foundation; either version 2 of the License, or
2107+# (at your option) any later version.
2108+#
2109+# This program is distributed in the hope that it will be useful,
2110+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2111+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2112+# GNU General Public License for more details.
2113+#
2114+# You should have received a copy of the GNU General Public License
2115+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2116+
2117+mmap(2) -- File Memory Mapping
2118+----------------------------------------------------------------------
2119+In aufs, the file-mapped pages are handled by a branch fs directly, no
2120+interaction with aufs. It means aufs_mmap() calls the branch fs's
2121+->mmap().
2122+This approach is simple and good, but there is one problem.
2123+Under /proc, several entries show the mmapped files by its path (with
2124+device and inode number), and the printed path will be the path on the
2125+branch fs's instead of virtual aufs's.
2126+This is not a problem in most cases, but some utilities lsof(1) (and its
2127+user) may expect the path on aufs.
2128+
2129+To address this issue, aufs adds a new member called vm_prfile in struct
2130+vm_area_struct (and struct vm_region). The original vm_file points to
2131+the file on the branch fs in order to handle everything correctly as
2132+usual. The new vm_prfile points to a virtual file in aufs, and the
2133+show-functions in procfs refers to vm_prfile if it is set.
2134+Also we need to maintain several other places where touching vm_file
2135+such like
2136+- fork()/clone() copies vma and the reference count of vm_file is
2137+ incremented.
2138+- merging vma maintains the ref count too.
2139+
2140+This is not a good approach. It just fakes the printed path. But it
2141+leaves all behaviour around f_mapping unchanged. This is surely an
2142+advantage.
2143+Actually aufs had adopted another complicated approach which calls
2144+generic_file_mmap() and handles struct vm_operations_struct. In this
2145+approach, aufs met a hard problem and I could not solve it without
2146+switching the approach.
2147+
2148+There may be one more another approach which is
2149+- bind-mount the branch-root onto the aufs-root internally
2150+- grab the new vfsmount (ie. struct mount)
2151+- lazy-umount the branch-root internally
2152+- in open(2) the aufs-file, open the branch-file with the hidden
2153+ vfsmount (instead of the original branch's vfsmount)
2154+- ideally this "bind-mount and lazy-umount" should be done atomically,
2155+ but it may be possible from userspace by the mount helper.
2156+
2157+Adding the internal hidden vfsmount and using it in opening a file, the
2158+file path under /proc will be printed correctly. This approach looks
2159+smarter, but is not possible I am afraid.
2160+- aufs-root may be bind-mount later. when it happens, another hidden
2161+ vfsmount will be required.
2162+- it is hard to get the chance to bind-mount and lazy-umount
2163+ + in kernel-space, FS can have vfsmount in open(2) via
2164+ file->f_path, and aufs can know its vfsmount. But several locks are
2165+ already acquired, and if aufs tries to bind-mount and lazy-umount
2166+ here, then it may cause a deadlock.
2167+ + in user-space, bind-mount doesn't invoke the mount helper.
2168+- since /proc shows dev and ino, aufs has to give vma these info. it
2169+ means a new member vm_prinode will be necessary. this is essentially
2170+ equivalent to vm_prfile described above.
2171+
2172+I have to give up this "looks-smater" approach.
2173diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt linux/Documentation/filesystems/aufs/design/06xattr.txt
2174--- /usr/share/empty/Documentation/filesystems/aufs/design/06xattr.txt 1970-01-01 01:00:00.000000000 +0100
2175+++ linux/Documentation/filesystems/aufs/design/06xattr.txt 2022-11-05 23:02:18.959222617 +0100
2176@@ -0,0 +1,94 @@
2177+
2178+# Copyright (C) 2014-2022 Junjiro R. Okajima
2179+#
2180+# This program is free software; you can redistribute it and/or modify
2181+# it under the terms of the GNU General Public License as published by
2182+# the Free Software Foundation; either version 2 of the License, or
2183+# (at your option) any later version.
2184+#
2185+# This program is distributed in the hope that it will be useful,
2186+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2187+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2188+# GNU General Public License for more details.
2189+#
2190+# You should have received a copy of the GNU General Public License
2191+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2192+
2193+Listing XATTR/EA and getting the value
2194+----------------------------------------------------------------------
2195+For the inode standard attributes (owner, group, timestamps, etc.), aufs
2196+shows the values from the topmost existing file. This behaviour is good
2197+for the non-dir entries since the bahaviour exactly matches the shown
2198+information. But for the directories, aufs considers all the same named
2199+entries on the lower branches. Which means, if one of the lower entry
2200+rejects readdir call, then aufs returns an error even if the topmost
2201+entry allows it. This behaviour is necessary to respect the branch fs's
2202+security, but can make users confused since the user-visible standard
2203+attributes don't match the behaviour.
2204+To address this issue, aufs has a mount option called dirperm1 which
2205+checks the permission for the topmost entry only, and ignores the lower
2206+entry's permission.
2207+
2208+A similar issue can happen around XATTR.
2209+getxattr(2) and listxattr(2) families behave as if dirperm1 option is
2210+always set. Otherwise these very unpleasant situation would happen.
2211+- listxattr(2) may return the duplicated entries.
2212+- users may not be able to remove or reset the XATTR forever,
2213+
2214+
2215+XATTR/EA support in the internal (copy,move)-(up,down)
2216+----------------------------------------------------------------------
2217+Generally the extended attributes of inode are categorized as these.
2218+- "security" for LSM and capability.
2219+- "system" for posix ACL, 'acl' mount option is required for the branch
2220+ fs generally.
2221+- "trusted" for userspace, CAP_SYS_ADMIN is required.
2222+- "user" for userspace, 'user_xattr' mount option is required for the
2223+ branch fs generally.
2224+
2225+Moreover there are some other categories. Aufs handles these rather
2226+unpopular categories as the ordinary ones, ie. there is no special
2227+condition nor exception.
2228+
2229+In copy-up, the support for XATTR on the dst branch may differ from the
2230+src branch. In this case, the copy-up operation will get an error and
2231+the original user operation which triggered the copy-up will fail. It
2232+can happen that even all copy-up will fail.
2233+When both of src and dst branches support XATTR and if an error occurs
2234+during copying XATTR, then the copy-up should fail obviously. That is a
2235+good reason and aufs should return an error to userspace. But when only
2236+the src branch support that XATTR, aufs should not return an error.
2237+For example, the src branch supports ACL but the dst branch doesn't
2238+because the dst branch may natively un-support it or temporary
2239+un-support it due to "noacl" mount option. Of course, the dst branch fs
2240+may NOT return an error even if the XATTR is not supported. It is
2241+totally up to the branch fs.
2242+
2243+Anyway when the aufs internal copy-up gets an error from the dst branch
2244+fs, then aufs tries removing the just copied entry and returns the error
2245+to the userspace. The worst case of this situation will be all copy-up
2246+will fail.
2247+
2248+For the copy-up operation, there two basic approaches.
2249+- copy the specified XATTR only (by category above), and return the
2250+ error unconditionally if it happens.
2251+- copy all XATTR, and ignore the error on the specified category only.
2252+
2253+In order to support XATTR and to implement the correct behaviour, aufs
2254+chooses the latter approach and introduces some new branch attributes,
2255+"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
2256+They correspond to the XATTR namespaces (see above). Additionally, to be
2257+convenient, "icex" is also provided which means all "icex*" attributes
2258+are set (here the word "icex" stands for "ignore copy-error on XATTR").
2259+
2260+The meaning of these attributes is to ignore the error from setting
2261+XATTR on that branch.
2262+Note that aufs tries copying all XATTR unconditionally, and ignores the
2263+error from the dst branch according to the specified attributes.
2264+
2265+Some XATTR may have its default value. The default value may come from
2266+the parent dir or the environment. If the default value is set at the
2267+file creating-time, it will be overwritten by copy-up.
2268+Some contradiction may happen I am afraid.
2269+Do we need another attribute to stop copying XATTR? I am unsure. For
2270+now, aufs implements the branch attributes to ignore the error.
2271diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt linux/Documentation/filesystems/aufs/design/07export.txt
2272--- /usr/share/empty/Documentation/filesystems/aufs/design/07export.txt 1970-01-01 01:00:00.000000000 +0100
2273+++ linux/Documentation/filesystems/aufs/design/07export.txt 2022-11-05 23:02:18.959222617 +0100
2274@@ -0,0 +1,58 @@
2275+
2276+# Copyright (C) 2005-2022 Junjiro R. Okajima
2277+#
2278+# This program is free software; you can redistribute it and/or modify
2279+# it under the terms of the GNU General Public License as published by
2280+# the Free Software Foundation; either version 2 of the License, or
2281+# (at your option) any later version.
2282+#
2283+# This program is distributed in the hope that it will be useful,
2284+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2285+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2286+# GNU General Public License for more details.
2287+#
2288+# You should have received a copy of the GNU General Public License
2289+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2290+
2291+Export Aufs via NFS
2292+----------------------------------------------------------------------
2293+Here is an approach.
2294+- like xino/xib, add a new file 'xigen' which stores aufs inode
2295+ generation.
2296+- iget_locked(): initialize aufs inode generation for a new inode, and
2297+ store it in xigen file.
2298+- destroy_inode(): increment aufs inode generation and store it in xigen
2299+ file. it is necessary even if it is not unlinked, because any data of
2300+ inode may be changed by UDBA.
2301+- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
2302+ build file handle by
2303+ + branch id (4 bytes)
2304+ + superblock generation (4 bytes)
2305+ + inode number (4 or 8 bytes)
2306+ + parent dir inode number (4 or 8 bytes)
2307+ + inode generation (4 bytes))
2308+ + return value of exportfs_encode_fh() for the parent on a branch (4
2309+ bytes)
2310+ + file handle for a branch (by exportfs_encode_fh())
2311+- fh_to_dentry():
2312+ + find the index of a branch from its id in handle, and check it is
2313+ still exist in aufs.
2314+ + 1st level: get the inode number from handle and search it in cache.
2315+ + 2nd level: if not found in cache, get the parent inode number from
2316+ the handle and search it in cache. and then open the found parent
2317+ dir, find the matching inode number by vfs_readdir() and get its
2318+ name, and call lookup_one_len() for the target dentry.
2319+ + 3rd level: if the parent dir is not cached, call
2320+ exportfs_decode_fh() for a branch and get the parent on a branch,
2321+ build a pathname of it, convert it a pathname in aufs, call
2322+ path_lookup(). now aufs gets a parent dir dentry, then handle it as
2323+ the 2nd level.
2324+ + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
2325+ for every branch, but not itself. to get this, (currently) aufs
2326+ searches in current->nsproxy->mnt_ns list. it may not be a good
2327+ idea, but I didn't get other approach.
2328+ + test the generation of the gotten inode.
2329+- every inode operation: they may get EBUSY due to UDBA. in this case,
2330+ convert it into ESTALE for NFSD.
2331+- readdir(): call lockdep_on/off() because filldir in NFSD calls
2332+ lookup_one_len(), vfs_getattr(), encode_fh() and others.
2333diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt linux/Documentation/filesystems/aufs/design/08shwh.txt
2334--- /usr/share/empty/Documentation/filesystems/aufs/design/08shwh.txt 1970-01-01 01:00:00.000000000 +0100
2335+++ linux/Documentation/filesystems/aufs/design/08shwh.txt 2022-11-05 23:02:18.959222617 +0100
2336@@ -0,0 +1,52 @@
2337+
2338+# Copyright (C) 2005-2022 Junjiro R. Okajima
2339+#
2340+# This program is free software; you can redistribute it and/or modify
2341+# it under the terms of the GNU General Public License as published by
2342+# the Free Software Foundation; either version 2 of the License, or
2343+# (at your option) any later version.
2344+#
2345+# This program is distributed in the hope that it will be useful,
2346+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2347+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2348+# GNU General Public License for more details.
2349+#
2350+# You should have received a copy of the GNU General Public License
2351+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2352+
2353+Show Whiteout Mode (shwh)
2354+----------------------------------------------------------------------
2355+Generally aufs hides the name of whiteouts. But in some cases, to show
2356+them is very useful for users. For instance, creating a new middle layer
2357+(branch) by merging existing layers.
2358+
2359+(borrowing aufs1 HOW-TO from a user, Michael Towers)
2360+When you have three branches,
2361+- Bottom: 'system', squashfs (underlying base system), read-only
2362+- Middle: 'mods', squashfs, read-only
2363+- Top: 'overlay', ram (tmpfs), read-write
2364+
2365+The top layer is loaded at boot time and saved at shutdown, to preserve
2366+the changes made to the system during the session.
2367+When larger changes have been made, or smaller changes have accumulated,
2368+the size of the saved top layer data grows. At this point, it would be
2369+nice to be able to merge the two overlay branches ('mods' and 'overlay')
2370+and rewrite the 'mods' squashfs, clearing the top layer and thus
2371+restoring save and load speed.
2372+
2373+This merging is simplified by the use of another aufs mount, of just the
2374+two overlay branches using the 'shwh' option.
2375+# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
2376+ aufs /livesys/merge_union
2377+
2378+A merged view of these two branches is then available at
2379+/livesys/merge_union, and the new feature is that the whiteouts are
2380+visible!
2381+Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
2382+writing to all branches. Also the default mode for all branches is 'ro'.
2383+It is now possible to save the combined contents of the two overlay
2384+branches to a new squashfs, e.g.:
2385+# mksquashfs /livesys/merge_union /path/to/newmods.squash
2386+
2387+This new squashfs archive can be stored on the boot device and the
2388+initramfs will use it to replace the old one at the next boot.
2389diff -urN /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt linux/Documentation/filesystems/aufs/design/10dynop.txt
2390--- /usr/share/empty/Documentation/filesystems/aufs/design/10dynop.txt 1970-01-01 01:00:00.000000000 +0100
2391+++ linux/Documentation/filesystems/aufs/design/10dynop.txt 2022-11-05 23:02:18.959222617 +0100
2392@@ -0,0 +1,47 @@
2393+
2394+# Copyright (C) 2010-2022 Junjiro R. Okajima
2395+#
2396+# This program is free software; you can redistribute it and/or modify
2397+# it under the terms of the GNU General Public License as published by
2398+# the Free Software Foundation; either version 2 of the License, or
2399+# (at your option) any later version.
2400+#
2401+# This program is distributed in the hope that it will be useful,
2402+# but WITHOUT ANY WARRANTY; without even the implied warranty of
2403+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2404+# GNU General Public License for more details.
2405+#
2406+# You should have received a copy of the GNU General Public License
2407+# along with this program. If not, see <http://www.gnu.org/licenses/>.
2408+
2409+Dynamically customizable FS operations
2410+----------------------------------------------------------------------
2411+Generally FS operations (struct inode_operations, struct
2412+address_space_operations, struct file_operations, etc.) are defined as
2413+"static const", but it never means that FS have only one set of
2414+operation. Some FS have multiple sets of them. For instance, ext2 has
2415+three sets, one for XIP, for NOBH, and for normal.
2416+Since aufs overrides and redirects these operations, sometimes aufs has
2417+to change its behaviour according to the branch FS type. More importantly
2418+VFS acts differently if a function (member in the struct) is set or
2419+not. It means aufs should have several sets of operations and select one
2420+among them according to the branch FS definition.
2421+
2422+In order to solve this problem and not to affect the behaviour of VFS,
2423+aufs defines these operations dynamically. For instance, aufs defines
2424+dummy direct_IO function for struct address_space_operations, but it may
2425+not be set to the address_space_operations actually. When the branch FS
2426+doesn't have it, aufs doesn't set it to its address_space_operations
2427+while the function definition itself is still alive. So the behaviour
2428+itself will not change, and it will return an error when direct_IO is
2429+not set.
2430+
2431+The lifetime of these dynamically generated operation object is
2432+maintained by aufs branch object. When the branch is removed from aufs,
2433+the reference counter of the object is decremented. When it reaches
2434+zero, the dynamically generated operation object will be freed.
2435+
2436+This approach is designed to support AIO (io_submit), Direct I/O and
2437+XIP (DAX) mainly.
2438+Currently this approach is applied to address_space_operations for
2439+regular files only.
2440diff -urN /usr/share/empty/Documentation/filesystems/aufs/README linux/Documentation/filesystems/aufs/README
2441--- /usr/share/empty/Documentation/filesystems/aufs/README 1970-01-01 01:00:00.000000000 +0100
2442+++ linux/Documentation/filesystems/aufs/README 2023-02-20 21:05:51.959693785 +0100
2443@@ -0,0 +1,408 @@
2444+
2445+Aufs6 -- advanced multi layered unification filesystem version 6.x
2446+http://aufs.sf.net
2447+Junjiro R. Okajima
2448+
2449+
2450+0. Introduction
2451+----------------------------------------
2452+In the early days, aufs was entirely re-designed and re-implemented
2453+Unionfs Version 1.x series. Adding many original ideas, approaches,
2454+improvements and implementations, it became totally different from
2455+Unionfs while keeping the basic features.
2456+Later, Unionfs Version 2.x series began taking some of the same
2457+approaches to aufs1's.
2458+Unionfs was being developed by Professor Erez Zadok at Stony Brook
2459+University and his team.
2460+
2461+Aufs6 supports linux-v6.0 and later, try aufs6.0 branch in
2462+aufs-linux.git or aufs-standalone.git.
2463+If you want older kernel version support,
2464+- for linux-v5.x series, try aufs-linux.git or aufs-standalone.git
2465+- for linux-v4.x series, try aufs4-linux.git or aufs4-standalone.git
2466+- for linux-v3.x series, try aufs3-linux.git or aufs3-standalone.git
2467+- for linux-v2.6.16 and later, try aufs2-2.6.git, aufs2-standalone.git
2468+ or aufs1 from CVS on SourceForge.
2469+
2470+Note: the name of aufs5-linux.git and aufs5-standalone.git on github
2471+ were changed. Now they are aufs-linux.git and
2472+ aufs-standalone.git and they contain aufs5 and later branches.
2473+
2474+Note: it becomes clear that "Aufs was rejected. Let's give it up."
2475+ According to Christoph Hellwig, linux rejects all union-type
2476+ filesystems but UnionMount.
2477+<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
2478+
2479+PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
2480+ UnionMount, and he pointed out an issue around a directory mutex
2481+ lock and aufs addressed it. But it is still unsure whether aufs will
2482+ be merged (or any other union solution).
2483+<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
2484+
2485+
2486+1. Features
2487+----------------------------------------
2488+- unite several directories into a single virtual filesystem. The member
2489+ directory is called as a branch.
2490+- you can specify the permission flags to the branch, which are 'readonly',
2491+ 'readwrite' and 'whiteout-able.'
2492+- by upper writable branch, internal copyup and whiteout, files/dirs on
2493+ readonly branch are modifiable logically.
2494+- dynamic branch manipulation, add, del.
2495+- etc...
2496+
2497+Also there are many enhancements in aufs, such as:
2498+- test only the highest one for the directory permission (dirperm1)
2499+- copyup on open (coo=)
2500+- 'move' policy for copy-up between two writable branches, after
2501+ checking free space.
2502+- xattr, acl
2503+- readdir(3) in userspace.
2504+- keep inode number by external inode number table
2505+- keep the timestamps of file/dir in internal copyup operation
2506+- seekable directory, supporting NFS readdir.
2507+- whiteout is hardlinked in order to reduce the consumption of inodes
2508+ on branch
2509+- do not copyup, nor create a whiteout when it is unnecessary
2510+- revert a single systemcall when an error occurs in aufs
2511+- remount interface instead of ioctl
2512+- maintain /etc/mtab by an external command, /sbin/mount.aufs.
2513+- loopback mounted filesystem as a branch
2514+- kernel thread for removing the dir who has a plenty of whiteouts
2515+- support copyup sparse file (a file which has a 'hole' in it)
2516+- default permission flags for branches
2517+- selectable permission flags for ro branch, whether whiteout can
2518+ exist or not
2519+- export via NFS.
2520+- support <sysfs>/fs/aufs and <debugfs>/aufs.
2521+- support multiple writable branches, some policies to select one
2522+ among multiple writable branches.
2523+- a new semantics for link(2) and rename(2) to support multiple
2524+ writable branches.
2525+- no glibc changes are required.
2526+- pseudo hardlink (hardlink over branches)
2527+- allow a direct access manually to a file on branch, e.g. bypassing aufs.
2528+ including NFS or remote filesystem branch.
2529+- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
2530+- and more...
2531+
2532+Currently these features are dropped temporary from aufs6.
2533+See design/08plan.txt in detail.
2534+- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
2535+ (robr)
2536+- statistics of aufs thread (/sys/fs/aufs/stat)
2537+
2538+Features or just an idea in the future (see also design/*.txt),
2539+- reorder the branch index without del/re-add.
2540+- permanent xino files for NFSD
2541+- an option for refreshing the opened files after add/del branches
2542+- light version, without branch manipulation. (unnecessary?)
2543+- copyup in userspace
2544+- inotify in userspace
2545+- readv/writev
2546+
2547+
2548+2. Download
2549+----------------------------------------
2550+There are three GIT trees for aufs6, aufs-linux.git,
2551+aufs-standalone.git, and aufs-util.git.
2552+While the aufs-util is always necessary, you need either of aufs-linux
2553+or aufs-standalone.
2554+
2555+The aufs-linux tree includes the whole linux mainline GIT tree,
2556+git://git.kernel.org/.../torvalds/linux.git.
2557+And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
2558+build aufs6 as an external kernel module.
2559+Several extra patches are not included in this tree. Only
2560+aufs-standalone tree contains them. They are described in the later
2561+section "Configuration and Compilation."
2562+
2563+On the other hand, the aufs-standalone tree has only aufs source files
2564+and necessary patches, and you can select CONFIG_AUFS_FS=m.
2565+But you need to apply all aufs patches manually.
2566+
2567+You will find GIT branches whose name is in form of "aufs6.x" where "x"
2568+represents the linux kernel version, "linux-6.x". For instance,
2569+"aufs6.0" is for linux-6.0. For latest "linux-6.x-rcN", use
2570+"aufs6.x-rcN" branch.
2571+
2572+o aufs-linux tree
2573+$ git clone --reference /your/linux/git/tree \
2574+ git://github.com/sfjro/aufs-linux.git aufs-linux.git
2575+- if you don't have linux GIT tree, then remove "--reference ..."
2576+$ cd aufs-linux.git
2577+$ git checkout origin/aufs6.0
2578+
2579+Or You may want to directly git-pull aufs into your linux GIT tree, and
2580+leave the patch-work to GIT.
2581+$ cd /your/linux/git/tree
2582+$ git remote add aufs git://github.com/sfjro/aufs-linux.git
2583+$ git fetch aufs
2584+$ git checkout -b my6.0 v6.0
2585+$ (add your local change...)
2586+$ git pull aufs aufs6.0
2587+- now you have v6.0 + your_changes + aufs6.0 in you my6.0 branch.
2588+- you may need to solve some conflicts between your_changes and
2589+ aufs6.0. in this case, git-rerere is recommended so that you can
2590+ solve the similar conflicts automatically when you upgrade to 6.1 or
2591+ later in the future.
2592+
2593+o aufs-standalone tree
2594+$ git clone git://github.com/sfjro/aufs-standalone.git aufs-standalone.git
2595+$ cd aufs-standalone.git
2596+$ git checkout origin/aufs6.0
2597+
2598+o aufs-util tree
2599+$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
2600+- note that the public aufs-util.git is on SourceForge instead of
2601+ GitHUB.
2602+$ cd aufs-util.git
2603+$ git checkout origin/aufs6.0
2604+
2605+Note: The 6.x-rcN branch is to be used with `rc' kernel versions ONLY.
2606+The minor version number, 'x' in '6.x', of aufs may not always
2607+follow the minor version number of the kernel.
2608+Because changes in the kernel that cause the use of a new
2609+minor version number do not always require changes to aufs-util.
2610+
2611+Since aufs-util has its own minor version number, you may not be
2612+able to find a GIT branch in aufs-util for your kernel's
2613+exact minor version number.
2614+In this case, you should git-checkout the branch for the
2615+nearest lower number.
2616+
2617+For (an unreleased) example:
2618+If you are using "linux-6.10" and the "aufs6.10" branch
2619+does not exist in aufs-util repository, then "aufs6.9", "aufs6.8"
2620+or something numerically smaller is the branch for your kernel.
2621+
2622+Also you can view all branches by
2623+ $ git branch -a
2624+
2625+
2626+3. Configuration and Compilation
2627+----------------------------------------
2628+Make sure you have git-checkout'ed the correct branch.
2629+
2630+For aufs-linux tree,
2631+- enable CONFIG_AUFS_FS.
2632+- set other aufs configurations if necessary.
2633+- for aufs5.13 and later
2634+ Because aufs is not only an ordinary filesystem (callee of VFS), but
2635+ also a caller of VFS functions for branch filesystems, subclassing of
2636+ the internal locks for LOCKDEP is necessary. LOCKDEP is a debugging
2637+ feature of linux kernel. If you enable CONFIG_LOCKDEP, then you will
2638+ need to customize some LOCKDEP numbers. Here are what I use on my
2639+ test environment.
2640+ CONFIG_LOCKDEP_BITS=21
2641+ CONFIG_LOCKDEP_CHAINS_BITS=21
2642+ CONFIG_LOCKDEP_STACK_TRACE_BITS=24
2643+ Also you will need to expand some constant values in LOCKDEP. Refer
2644+ to lockdep-debug.patch in aufs-standalone.git.
2645+
2646+For aufs-standalone tree,
2647+There are several ways to build.
2648+
2649+1.
2650+- apply ./aufs6-kbuild.patch to your kernel source files.
2651+- apply ./aufs6-base.patch too.
2652+- apply ./aufs6-mmap.patch too.
2653+- apply ./aufs6-standalone.patch too, if you have a plan to set
2654+ CONFIG_AUFS_FS=m. otherwise you don't need ./aufs-standalone.patch.
2655+- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
2656+ kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
2657+- enable CONFIG_AUFS_FS, you can select either
2658+ =m or =y.
2659+- and build your kernel as usual.
2660+- install the built kernel.
2661+- install the header files too by "make headers_install" to the
2662+ directory where you specify. By default, it is $PWD/usr.
2663+ "make help" shows a brief note for headers_install.
2664+- and reboot your system.
2665+
2666+2.
2667+- module only (CONFIG_AUFS_FS=m).
2668+- apply ./aufs6-base.patch to your kernel source files.
2669+- apply ./aufs6-mmap.patch too.
2670+- apply ./aufs6-standalone.patch too.
2671+- build your kernel, don't forget "make headers_install", and reboot.
2672+- edit ./config.mk and set other aufs configurations if necessary.
2673+ Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
2674+ every aufs configurations.
2675+- build the module by simple "make".
2676+- you can specify ${KDIR} make variable which points to your kernel
2677+ source tree.
2678+- install the files
2679+ + run "make install" to install the aufs module, or copy the built
2680+ $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
2681+ + run "make install_headers" (instead of headers_install) to install
2682+ the modified aufs header file (you can specify DESTDIR which is
2683+ available in aufs standalone version's Makefile only), or copy
2684+ $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
2685+ you like manually. By default, the target directory is $PWD/usr.
2686+- no need to apply aufs6-kbuild.patch, nor copying source files to your
2687+ kernel source tree.
2688+
2689+Note: The header file aufs_type.h is necessary to build aufs-util
2690+ as well as "make headers_install" in the kernel source tree.
2691+ headers_install is subject to be forgotten, but it is essentially
2692+ necessary, not only for building aufs-util.
2693+ You may not meet problems without headers_install in some older
2694+ version though.
2695+
2696+And then,
2697+- read README in aufs-util, build and install it
2698+- note that your distribution may contain an obsoleted version of
2699+ aufs_type.h in /usr/include/linux or something. When you build aufs
2700+ utilities, make sure that your compiler refers the correct aufs header
2701+ file which is built by "make headers_install."
2702+- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
2703+ then run "make install_ulib" too. And refer to the aufs manual in
2704+ detail.
2705+
2706+There several other patches in aufs-standalone.git. They are all
2707+optional. When you meet some problems, they will help you.
2708+- aufs6-loopback.patch
2709+ Supports a nested loopback mount in a branch-fs. This patch is
2710+ unnecessary until aufs produces a message like "you may want to try
2711+ another patch for loopback file".
2712+- vfs-ino.patch
2713+ Modifies a system global kernel internal function get_next_ino() in
2714+ order to stop assigning 0 for an inode-number. Not directly related to
2715+ aufs, but recommended generally.
2716+- tmpfs-idr.patch
2717+ Keeps the tmpfs inode number as the lowest value. Effective to reduce
2718+ the size of aufs XINO files for tmpfs branch. Also it prevents the
2719+ duplication of inode number, which is important for backup tools and
2720+ other utilities. When you find aufs XINO files for tmpfs branch
2721+ growing too much, try this patch.
2722+- lockdep-debug.patch
2723+ Similar to some kernel configurations for LOCKDEP (see the top of
2724+ this section), you will need expand some constants in LOCKDEP for
2725+ aufs if you enable CONFIG_LOCKDEP.
2726+
2727+
2728+4. Usage
2729+----------------------------------------
2730+At first, make sure aufs-util are installed, and please read the aufs
2731+manual, aufs.5 in aufs-util.git tree.
2732+$ man -l aufs.5
2733+
2734+And then,
2735+$ mkdir /tmp/rw /tmp/aufs
2736+# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
2737+
2738+Here is another example. The result is equivalent.
2739+# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
2740+ Or
2741+# mount -t aufs -o br:/tmp/rw none /tmp/aufs
2742+# mount -o remount,append:${HOME} /tmp/aufs
2743+
2744+Then, you can see whole tree of your home dir through /tmp/aufs. If
2745+you modify a file under /tmp/aufs, the one on your home directory is
2746+not affected, instead the same named file will be newly created under
2747+/tmp/rw. And all of your modification to a file will be applied to
2748+the one under /tmp/rw. This is called the file based Copy on Write
2749+(COW) method.
2750+Aufs mount options are described in aufs.5.
2751+If you run chroot or something and make your aufs as a root directory,
2752+then you need to customize the shutdown script. See the aufs manual in
2753+detail.
2754+
2755+Additionally, there are some sample usages of aufs which are a
2756+diskless system with network booting, and LiveCD over NFS.
2757+See sample dir in CVS tree on SourceForge.
2758+
2759+
2760+5. Contact
2761+----------------------------------------
2762+When you have any problems or strange behaviour in aufs, please let me
2763+know with:
2764+- /proc/mounts (instead of the output of mount(8))
2765+- /sys/module/aufs/*
2766+- /sys/fs/aufs/* (if you have them)
2767+- /debug/aufs/* (if you have them)
2768+- linux kernel version
2769+ if your kernel is not plain, for example modified by distributor,
2770+ the url where i can download its source is necessary too.
2771+- aufs version which was printed at loading the module or booting the
2772+ system, instead of the date you downloaded.
2773+- configuration (define/undefine CONFIG_AUFS_xxx)
2774+- kernel configuration or /proc/config.gz (if you have it)
2775+- LSM (linux security module, if you are using)
2776+- behaviour which you think to be incorrect
2777+- actual operation, reproducible one is better
2778+- mailto: aufs-users at lists.sourceforge.net
2779+
2780+Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
2781+and Feature Requests) on SourceForge. Please join and write to
2782+aufs-users ML.
2783+
2784+
2785+6. Acknowledgements
2786+----------------------------------------
2787+Thanks to everyone who have tried and are using aufs, whoever
2788+have reported a bug or any feedback.
2789+
2790+Especially donators:
2791+Tomas Matejicek(slax.org) made a donation (much more than once).
2792+ Since Apr 2010, Tomas M (the author of Slax and Linux Live
2793+ scripts) is making "doubling" donations.
2794+ Unfortunately I cannot list all of the donators, but I really
2795+ appreciate.
2796+ It ends Aug 2010, but the ordinary donation URL is still available.
2797+ <http://sourceforge.net/donate/index.php?group_id=167503>
2798+Dai Itasaka made a donation (2007/8).
2799+Chuck Smith made a donation (2008/4, 10 and 12).
2800+Henk Schoneveld made a donation (2008/9).
2801+Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
2802+Francois Dupoux made a donation (2008/11).
2803+Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
2804+ aufs2 GIT tree (2009/2).
2805+William Grant made a donation (2009/3).
2806+Patrick Lane made a donation (2009/4).
2807+The Mail Archive (mail-archive.com) made donations (2009/5).
2808+Nippy Networks (Ed Wildgoose) made a donation (2009/7).
2809+New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
2810+Pavel Pronskiy made a donation (2011/2).
2811+Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
2812+ Networks (Ed Wildgoose) made a donation for hardware (2011/3).
2813+Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
2814+11).
2815+Sam Liddicott made a donation (2011/9).
2816+Era Scarecrow made a donation (2013/4).
2817+Bor Ratajc made a donation (2013/4).
2818+Alessandro Gorreta made a donation (2013/4).
2819+POIRETTE Marc made a donation (2013/4).
2820+Alessandro Gorreta made a donation (2013/4).
2821+lauri kasvandik made a donation (2013/5).
2822+"pemasu from Finland" made a donation (2013/7).
2823+The Parted Magic Project made a donation (2013/9 and 11).
2824+Pavel Barta made a donation (2013/10).
2825+Nikolay Pertsev made a donation (2014/5).
2826+James B made a donation (2014/7, 2015/7, and 2021/12).
2827+Stefano Di Biase made a donation (2014/8).
2828+Daniel Epellei made a donation (2015/1).
2829+OmegaPhil made a donation (2016/1, 2018/4).
2830+Tomasz Szewczyk made a donation (2016/4).
2831+James Burry made a donation (2016/12).
2832+Carsten Rose made a donation (2018/9).
2833+Porteus Kiosk made a donation (2018/10).
2834+Enya Quetzalli Gomez Rodriguez made a donation (2022/5).
2835+
2836+Thank you very much.
2837+Donations are always, including future donations, very important and
2838+helpful for me to keep on developing aufs.
2839+
2840+
2841+7.
2842+----------------------------------------
2843+If you are an experienced user, no explanation is needed. Aufs is
2844+just a linux filesystem.
2845+
2846+
2847+Enjoy!
2848+
2849+# Local variables: ;
2850+# mode: text;
2851+# End: ;
2852diff -urN /usr/share/empty/fs/aufs/aufs.h linux/fs/aufs/aufs.h
2853--- /usr/share/empty/fs/aufs/aufs.h 1970-01-01 01:00:00.000000000 +0100
2854+++ linux/fs/aufs/aufs.h 2022-11-05 23:02:18.959222617 +0100
2855@@ -0,0 +1,62 @@
2856+/* SPDX-License-Identifier: GPL-2.0 */
2857+/*
2858+ * Copyright (C) 2005-2022 Junjiro R. Okajima
2859+ *
2860+ * This program is free software; you can redistribute it and/or modify
2861+ * it under the terms of the GNU General Public License as published by
2862+ * the Free Software Foundation; either version 2 of the License, or
2863+ * (at your option) any later version.
2864+ *
2865+ * This program is distributed in the hope that it will be useful,
2866+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2867+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2868+ * GNU General Public License for more details.
2869+ *
2870+ * You should have received a copy of the GNU General Public License
2871+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
2872+ */
2873+
2874+/*
2875+ * all header files
2876+ */
2877+
2878+#ifndef __AUFS_H__
2879+#define __AUFS_H__
2880+
2881+#ifdef __KERNEL__
2882+
2883+#define AuStub(type, name, body, ...) \
2884+ static inline type name(__VA_ARGS__) { body; }
2885+
2886+#define AuStubVoid(name, ...) \
2887+ AuStub(void, name, , __VA_ARGS__)
2888+#define AuStubInt0(name, ...) \
2889+ AuStub(int, name, return 0, __VA_ARGS__)
2890+
2891+#include "debug.h"
2892+
2893+#include "branch.h"
2894+#include "cpup.h"
2895+#include "dcsub.h"
2896+#include "dbgaufs.h"
2897+#include "dentry.h"
2898+#include "dir.h"
2899+#include "dirren.h"
2900+#include "dynop.h"
2901+#include "file.h"
2902+#include "fstype.h"
2903+#include "hbl.h"
2904+#include "inode.h"
2905+#include "lcnt.h"
2906+#include "loop.h"
2907+#include "module.h"
2908+#include "opts.h"
2909+#include "rwsem.h"
2910+#include "super.h"
2911+#include "sysaufs.h"
2912+#include "vfsub.h"
2913+#include "whout.h"
2914+#include "wkq.h"
2915+
2916+#endif /* __KERNEL__ */
2917+#endif /* __AUFS_H__ */
2918diff -urN /usr/share/empty/fs/aufs/branch.c linux/fs/aufs/branch.c
2919--- /usr/share/empty/fs/aufs/branch.c 1970-01-01 01:00:00.000000000 +0100
2920+++ linux/fs/aufs/branch.c 2022-11-05 23:02:18.959222617 +0100
2921@@ -0,0 +1,1427 @@
2922+// SPDX-License-Identifier: GPL-2.0
2923+/*
2924+ * Copyright (C) 2005-2022 Junjiro R. Okajima
2925+ *
2926+ * This program is free software; you can redistribute it and/or modify
2927+ * it under the terms of the GNU General Public License as published by
2928+ * the Free Software Foundation; either version 2 of the License, or
2929+ * (at your option) any later version.
2930+ *
2931+ * This program is distributed in the hope that it will be useful,
2932+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
2933+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2934+ * GNU General Public License for more details.
2935+ *
2936+ * You should have received a copy of the GNU General Public License
2937+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
2938+ */
2939+
2940+/*
2941+ * branch management
2942+ */
2943+
2944+#include <linux/compat.h>
2945+#include <linux/statfs.h>
2946+#include "aufs.h"
2947+
2948+/*
2949+ * free a single branch
2950+ */
2951+static void au_br_do_free(struct au_branch *br)
2952+{
2953+ int i;
2954+ struct au_wbr *wbr;
2955+ struct au_dykey **key;
2956+
2957+ au_hnotify_fin_br(br);
2958+ /* always, regardless the mount option */
2959+ au_dr_hino_free(&br->br_dirren);
2960+ au_xino_put(br);
2961+
2962+ AuLCntZero(au_lcnt_read(&br->br_nfiles, /*do_rev*/0));
2963+ au_lcnt_fin(&br->br_nfiles, /*do_sync*/0);
2964+ AuLCntZero(au_lcnt_read(&br->br_count, /*do_rev*/0));
2965+ au_lcnt_fin(&br->br_count, /*do_sync*/0);
2966+
2967+ wbr = br->br_wbr;
2968+ if (wbr) {
2969+ for (i = 0; i < AuBrWh_Last; i++)
2970+ dput(wbr->wbr_wh[i]);
2971+ AuDebugOn(atomic_read(&wbr->wbr_wh_running));
2972+ AuRwDestroy(&wbr->wbr_wh_rwsem);
2973+ }
2974+
2975+ if (br->br_fhsm) {
2976+ au_br_fhsm_fin(br->br_fhsm);
2977+ au_kfree_try_rcu(br->br_fhsm);
2978+ }
2979+
2980+ key = br->br_dykey;
2981+ for (i = 0; i < AuBrDynOp; i++, key++)
2982+ if (*key)
2983+ au_dy_put(*key);
2984+ else
2985+ break;
2986+
2987+ /* recursive lock, s_umount of branch's */
2988+ /* synchronize_rcu(); */ /* why? */
2989+ lockdep_off();
2990+ path_put(&br->br_path);
2991+ lockdep_on();
2992+ au_kfree_rcu(wbr);
2993+ au_lcnt_wait_for_fin(&br->br_nfiles);
2994+ au_lcnt_wait_for_fin(&br->br_count);
2995+ /* I don't know why, but percpu_refcount requires this */
2996+ /* synchronize_rcu(); */
2997+ au_kfree_rcu(br);
2998+}
2999+
3000+/*
3001+ * frees all branches
3002+ */
3003+void au_br_free(struct au_sbinfo *sbinfo)
3004+{
3005+ aufs_bindex_t bmax;
3006+ struct au_branch **br;
3007+
3008+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3009+
3010+ bmax = sbinfo->si_bbot + 1;
3011+ br = sbinfo->si_branch;
3012+ while (bmax--)
3013+ au_br_do_free(*br++);
3014+}
3015+
3016+/*
3017+ * find the index of a branch which is specified by @br_id.
3018+ */
3019+int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
3020+{
3021+ aufs_bindex_t bindex, bbot;
3022+
3023+ bbot = au_sbbot(sb);
3024+ for (bindex = 0; bindex <= bbot; bindex++)
3025+ if (au_sbr_id(sb, bindex) == br_id)
3026+ return bindex;
3027+ return -1;
3028+}
3029+
3030+/* ---------------------------------------------------------------------- */
3031+
3032+/*
3033+ * add a branch
3034+ */
3035+
3036+static int test_overlap(struct super_block *sb, struct dentry *h_adding,
3037+ struct dentry *h_root)
3038+{
3039+ if (unlikely(h_adding == h_root
3040+ || au_test_loopback_overlap(sb, h_adding)))
3041+ return 1;
3042+ if (h_adding->d_sb != h_root->d_sb)
3043+ return 0;
3044+ return au_test_subdir(h_adding, h_root)
3045+ || au_test_subdir(h_root, h_adding);
3046+}
3047+
3048+/*
3049+ * returns a newly allocated branch. @new_nbranch is a number of branches
3050+ * after adding a branch.
3051+ */
3052+static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
3053+ int perm)
3054+{
3055+ struct au_branch *add_branch;
3056+ struct dentry *root;
3057+ struct inode *inode;
3058+ int err;
3059+
3060+ err = -ENOMEM;
3061+ add_branch = kzalloc(sizeof(*add_branch), GFP_NOFS);
3062+ if (unlikely(!add_branch))
3063+ goto out;
3064+ add_branch->br_xino = au_xino_alloc(/*nfile*/1);
3065+ if (unlikely(!add_branch->br_xino))
3066+ goto out_br;
3067+ err = au_hnotify_init_br(add_branch, perm);
3068+ if (unlikely(err))
3069+ goto out_xino;
3070+
3071+ if (au_br_writable(perm)) {
3072+ /* may be freed separately at changing the branch permission */
3073+ add_branch->br_wbr = kzalloc(sizeof(*add_branch->br_wbr),
3074+ GFP_NOFS);
3075+ if (unlikely(!add_branch->br_wbr))
3076+ goto out_hnotify;
3077+ }
3078+
3079+ if (au_br_fhsm(perm)) {
3080+ err = au_fhsm_br_alloc(add_branch);
3081+ if (unlikely(err))
3082+ goto out_wbr;
3083+ }
3084+
3085+ root = sb->s_root;
3086+ err = au_sbr_realloc(au_sbi(sb), new_nbranch, /*may_shrink*/0);
3087+ if (!err)
3088+ err = au_di_realloc(au_di(root), new_nbranch, /*may_shrink*/0);
3089+ if (!err) {
3090+ inode = d_inode(root);
3091+ err = au_hinode_realloc(au_ii(inode), new_nbranch,
3092+ /*may_shrink*/0);
3093+ }
3094+ if (!err)
3095+ return add_branch; /* success */
3096+
3097+out_wbr:
3098+ au_kfree_rcu(add_branch->br_wbr);
3099+out_hnotify:
3100+ au_hnotify_fin_br(add_branch);
3101+out_xino:
3102+ au_xino_put(add_branch);
3103+out_br:
3104+ au_kfree_rcu(add_branch);
3105+out:
3106+ return ERR_PTR(err);
3107+}
3108+
3109+/*
3110+ * test if the branch permission is legal or not.
3111+ */
3112+static int test_br(struct inode *inode, int brperm, char *path)
3113+{
3114+ int err;
3115+
3116+ err = (au_br_writable(brperm) && IS_RDONLY(inode));
3117+ if (!err)
3118+ goto out;
3119+
3120+ err = -EINVAL;
3121+ pr_err("write permission for readonly mount or inode, %s\n", path);
3122+
3123+out:
3124+ return err;
3125+}
3126+
3127+/*
3128+ * returns:
3129+ * 0: success, the caller will add it
3130+ * plus: success, it is already unified, the caller should ignore it
3131+ * minus: error
3132+ */
3133+static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
3134+{
3135+ int err;
3136+ aufs_bindex_t bbot, bindex;
3137+ struct dentry *root, *h_dentry;
3138+ struct inode *inode, *h_inode;
3139+
3140+ root = sb->s_root;
3141+ bbot = au_sbbot(sb);
3142+ if (unlikely(bbot >= 0
3143+ && au_find_dbindex(root, add->path.dentry) >= 0)) {
3144+ err = 1;
3145+ if (!remount) {
3146+ err = -EINVAL;
3147+ pr_err("%s duplicated\n", add->pathname);
3148+ }
3149+ goto out;
3150+ }
3151+
3152+ err = -ENOSPC; /* -E2BIG; */
3153+ if (unlikely(AUFS_BRANCH_MAX <= add->bindex
3154+ || AUFS_BRANCH_MAX - 1 <= bbot)) {
3155+ pr_err("number of branches exceeded %s\n", add->pathname);
3156+ goto out;
3157+ }
3158+
3159+ err = -EDOM;
3160+ if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
3161+ pr_err("bad index %d\n", add->bindex);
3162+ goto out;
3163+ }
3164+
3165+ inode = d_inode(add->path.dentry);
3166+ err = -ENOENT;
3167+ if (unlikely(!inode->i_nlink)) {
3168+ pr_err("no existence %s\n", add->pathname);
3169+ goto out;
3170+ }
3171+
3172+ err = -EINVAL;
3173+ if (unlikely(inode->i_sb == sb)) {
3174+ pr_err("%s must be outside\n", add->pathname);
3175+ goto out;
3176+ }
3177+
3178+ if (unlikely(au_test_fs_unsuppoted(inode->i_sb))) {
3179+ pr_err("unsupported filesystem, %s (%s)\n",
3180+ add->pathname, au_sbtype(inode->i_sb));
3181+ goto out;
3182+ }
3183+
3184+ if (unlikely(inode->i_sb->s_stack_depth)) {
3185+ pr_err("already stacked, %s (%s)\n",
3186+ add->pathname, au_sbtype(inode->i_sb));
3187+ goto out;
3188+ }
3189+
3190+ err = test_br(d_inode(add->path.dentry), add->perm, add->pathname);
3191+ if (unlikely(err))
3192+ goto out;
3193+
3194+ if (bbot < 0)
3195+ return 0; /* success */
3196+
3197+ err = -EINVAL;
3198+ for (bindex = 0; bindex <= bbot; bindex++)
3199+ if (unlikely(test_overlap(sb, add->path.dentry,
3200+ au_h_dptr(root, bindex)))) {
3201+ pr_err("%s is overlapped\n", add->pathname);
3202+ goto out;
3203+ }
3204+
3205+ err = 0;
3206+ if (au_opt_test(au_mntflags(sb), WARN_PERM)) {
3207+ h_dentry = au_h_dptr(root, 0);
3208+ h_inode = d_inode(h_dentry);
3209+ if ((h_inode->i_mode & S_IALLUGO) != (inode->i_mode & S_IALLUGO)
3210+ || !uid_eq(h_inode->i_uid, inode->i_uid)
3211+ || !gid_eq(h_inode->i_gid, inode->i_gid))
3212+ pr_warn("uid/gid/perm %s %u/%u/0%o, %u/%u/0%o\n",
3213+ add->pathname,
3214+ i_uid_read(inode), i_gid_read(inode),
3215+ (inode->i_mode & S_IALLUGO),
3216+ i_uid_read(h_inode), i_gid_read(h_inode),
3217+ (h_inode->i_mode & S_IALLUGO));
3218+ }
3219+
3220+out:
3221+ return err;
3222+}
3223+
3224+/*
3225+ * initialize or clean the whiteouts for an adding branch
3226+ */
3227+static int au_br_init_wh(struct super_block *sb, struct au_branch *br,
3228+ int new_perm)
3229+{
3230+ int err, old_perm;
3231+ aufs_bindex_t bindex;
3232+ struct inode *h_inode;
3233+ struct au_wbr *wbr;
3234+ struct au_hinode *hdir;
3235+ struct dentry *h_dentry;
3236+
3237+ err = vfsub_mnt_want_write(au_br_mnt(br));
3238+ if (unlikely(err))
3239+ goto out;
3240+
3241+ wbr = br->br_wbr;
3242+ old_perm = br->br_perm;
3243+ br->br_perm = new_perm;
3244+ hdir = NULL;
3245+ h_inode = NULL;
3246+ bindex = au_br_index(sb, br->br_id);
3247+ if (0 <= bindex) {
3248+ hdir = au_hi(d_inode(sb->s_root), bindex);
3249+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
3250+ } else {
3251+ h_dentry = au_br_dentry(br);
3252+ h_inode = d_inode(h_dentry);
3253+ inode_lock_nested(h_inode, AuLsc_I_PARENT);
3254+ }
3255+ if (!wbr)
3256+ err = au_wh_init(br, sb);
3257+ else {
3258+ wbr_wh_write_lock(wbr);
3259+ err = au_wh_init(br, sb);
3260+ wbr_wh_write_unlock(wbr);
3261+ }
3262+ if (hdir)
3263+ au_hn_inode_unlock(hdir);
3264+ else
3265+ inode_unlock(h_inode);
3266+ vfsub_mnt_drop_write(au_br_mnt(br));
3267+ br->br_perm = old_perm;
3268+
3269+ if (!err && wbr && !au_br_writable(new_perm)) {
3270+ au_kfree_rcu(wbr);
3271+ br->br_wbr = NULL;
3272+ }
3273+
3274+out:
3275+ return err;
3276+}
3277+
3278+static int au_wbr_init(struct au_branch *br, struct super_block *sb,
3279+ int perm)
3280+{
3281+ int err;
3282+ struct kstatfs kst;
3283+ struct au_wbr *wbr;
3284+
3285+ wbr = br->br_wbr;
3286+ au_rw_init(&wbr->wbr_wh_rwsem);
3287+ atomic_set(&wbr->wbr_wh_running, 0);
3288+
3289+ /*
3290+ * a limit for rmdir/rename a dir
3291+ * cf. AUFS_MAX_NAMELEN in include/uapi/linux/aufs_type.h
3292+ */
3293+ err = vfs_statfs(&br->br_path, &kst);
3294+ if (unlikely(err))
3295+ goto out;
3296+ err = -EINVAL;
3297+ if (kst.f_namelen >= NAME_MAX)
3298+ err = au_br_init_wh(sb, br, perm);
3299+ else
3300+ pr_err("%pd(%s), unsupported namelen %ld\n",
3301+ au_br_dentry(br),
3302+ au_sbtype(au_br_dentry(br)->d_sb), kst.f_namelen);
3303+
3304+out:
3305+ return err;
3306+}
3307+
3308+/* initialize a new branch */
3309+static int au_br_init(struct au_branch *br, struct super_block *sb,
3310+ struct au_opt_add *add)
3311+{
3312+ int err;
3313+ struct au_branch *brbase;
3314+ struct file *xf;
3315+ struct inode *h_inode;
3316+
3317+ err = 0;
3318+ br->br_perm = add->perm;
3319+ br->br_path = add->path; /* set first, path_get() later */
3320+ spin_lock_init(&br->br_dykey_lock);
3321+ au_lcnt_init(&br->br_nfiles, /*release*/NULL);
3322+ au_lcnt_init(&br->br_count, /*release*/NULL);
3323+ br->br_id = au_new_br_id(sb);
3324+ AuDebugOn(br->br_id < 0);
3325+
3326+ /* always, regardless the given option */
3327+ err = au_dr_br_init(sb, br, &add->path);
3328+ if (unlikely(err))
3329+ goto out_err;
3330+
3331+ if (au_br_writable(add->perm)) {
3332+ err = au_wbr_init(br, sb, add->perm);
3333+ if (unlikely(err))
3334+ goto out_err;
3335+ }
3336+
3337+ if (au_opt_test(au_mntflags(sb), XINO)) {
3338+ brbase = au_sbr(sb, 0);
3339+ xf = au_xino_file(brbase->br_xino, /*idx*/-1);
3340+ AuDebugOn(!xf);
3341+ h_inode = d_inode(add->path.dentry);
3342+ err = au_xino_init_br(sb, br, h_inode->i_ino, &xf->f_path);
3343+ if (unlikely(err)) {
3344+ AuDebugOn(au_xino_file(br->br_xino, /*idx*/-1));
3345+ goto out_err;
3346+ }
3347+ }
3348+
3349+ sysaufs_br_init(br);
3350+ path_get(&br->br_path);
3351+ goto out; /* success */
3352+
3353+out_err:
3354+ memset(&br->br_path, 0, sizeof(br->br_path));
3355+out:
3356+ return err;
3357+}
3358+
3359+static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
3360+ struct au_branch *br, aufs_bindex_t bbot,
3361+ aufs_bindex_t amount)
3362+{
3363+ struct au_branch **brp;
3364+
3365+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3366+
3367+ brp = sbinfo->si_branch + bindex;
3368+ memmove(brp + 1, brp, sizeof(*brp) * amount);
3369+ *brp = br;
3370+ sbinfo->si_bbot++;
3371+ if (unlikely(bbot < 0))
3372+ sbinfo->si_bbot = 0;
3373+}
3374+
3375+static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
3376+ aufs_bindex_t bbot, aufs_bindex_t amount)
3377+{
3378+ struct au_hdentry *hdp;
3379+
3380+ AuRwMustWriteLock(&dinfo->di_rwsem);
3381+
3382+ hdp = au_hdentry(dinfo, bindex);
3383+ memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
3384+ au_h_dentry_init(hdp);
3385+ dinfo->di_bbot++;
3386+ if (unlikely(bbot < 0))
3387+ dinfo->di_btop = 0;
3388+}
3389+
3390+static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
3391+ aufs_bindex_t bbot, aufs_bindex_t amount)
3392+{
3393+ struct au_hinode *hip;
3394+
3395+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3396+
3397+ hip = au_hinode(iinfo, bindex);
3398+ memmove(hip + 1, hip, sizeof(*hip) * amount);
3399+ au_hinode_init(hip);
3400+ iinfo->ii_bbot++;
3401+ if (unlikely(bbot < 0))
3402+ iinfo->ii_btop = 0;
3403+}
3404+
3405+static void au_br_do_add(struct super_block *sb, struct au_branch *br,
3406+ aufs_bindex_t bindex)
3407+{
3408+ struct dentry *root, *h_dentry;
3409+ struct inode *root_inode, *h_inode;
3410+ aufs_bindex_t bbot, amount;
3411+
3412+ root = sb->s_root;
3413+ root_inode = d_inode(root);
3414+ bbot = au_sbbot(sb);
3415+ amount = bbot + 1 - bindex;
3416+ h_dentry = au_br_dentry(br);
3417+ au_sbilist_lock();
3418+ au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
3419+ au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
3420+ au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
3421+ au_set_h_dptr(root, bindex, dget(h_dentry));
3422+ h_inode = d_inode(h_dentry);
3423+ au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
3424+ au_sbilist_unlock();
3425+}
3426+
3427+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
3428+{
3429+ int err;
3430+ aufs_bindex_t bbot, add_bindex;
3431+ struct dentry *root, *h_dentry;
3432+ struct inode *root_inode;
3433+ struct au_branch *add_branch;
3434+
3435+ root = sb->s_root;
3436+ root_inode = d_inode(root);
3437+ IMustLock(root_inode);
3438+ IiMustWriteLock(root_inode);
3439+ err = test_add(sb, add, remount);
3440+ if (unlikely(err < 0))
3441+ goto out;
3442+ if (err) {
3443+ err = 0;
3444+ goto out; /* success */
3445+ }
3446+
3447+ bbot = au_sbbot(sb);
3448+ add_branch = au_br_alloc(sb, bbot + 2, add->perm);
3449+ err = PTR_ERR(add_branch);
3450+ if (IS_ERR(add_branch))
3451+ goto out;
3452+
3453+ err = au_br_init(add_branch, sb, add);
3454+ if (unlikely(err)) {
3455+ au_br_do_free(add_branch);
3456+ goto out;
3457+ }
3458+
3459+ add_bindex = add->bindex;
3460+ sysaufs_brs_del(sb, add_bindex); /* remove successors */
3461+ au_br_do_add(sb, add_branch, add_bindex);
3462+ sysaufs_brs_add(sb, add_bindex); /* append successors */
3463+ dbgaufs_brs_add(sb, add_bindex, /*topdown*/0); /* rename successors */
3464+
3465+ h_dentry = add->path.dentry;
3466+ if (!add_bindex) {
3467+ au_cpup_attr_all(root_inode, /*force*/1);
3468+ sb->s_maxbytes = h_dentry->d_sb->s_maxbytes;
3469+ } else
3470+ au_add_nlink(root_inode, d_inode(h_dentry));
3471+
3472+out:
3473+ return err;
3474+}
3475+
3476+/* ---------------------------------------------------------------------- */
3477+
3478+static unsigned long long au_farray_cb(struct super_block *sb, void *a,
3479+ unsigned long long max __maybe_unused,
3480+ void *arg)
3481+{
3482+ unsigned long long n;
3483+ struct file **p, *f;
3484+ struct hlist_bl_head *files;
3485+ struct hlist_bl_node *pos;
3486+ struct au_finfo *finfo;
3487+
3488+ n = 0;
3489+ p = a;
3490+ files = &au_sbi(sb)->si_files;
3491+ hlist_bl_lock(files);
3492+ hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
3493+ f = finfo->fi_file;
3494+ if (file_count(f)
3495+ && !special_file(file_inode(f)->i_mode)) {
3496+ get_file(f);
3497+ *p++ = f;
3498+ n++;
3499+ AuDebugOn(n > max);
3500+ }
3501+ }
3502+ hlist_bl_unlock(files);
3503+
3504+ return n;
3505+}
3506+
3507+static struct file **au_farray_alloc(struct super_block *sb,
3508+ unsigned long long *max)
3509+{
3510+ struct au_sbinfo *sbi;
3511+
3512+ sbi = au_sbi(sb);
3513+ *max = au_lcnt_read(&sbi->si_nfiles, /*do_rev*/1);
3514+ return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
3515+}
3516+
3517+static void au_farray_free(struct file **a, unsigned long long max)
3518+{
3519+ unsigned long long ull;
3520+
3521+ for (ull = 0; ull < max; ull++)
3522+ if (a[ull])
3523+ fput(a[ull]);
3524+ kvfree(a);
3525+}
3526+
3527+/* ---------------------------------------------------------------------- */
3528+
3529+/*
3530+ * delete a branch
3531+ */
3532+
3533+/* to show the line number, do not make it inlined function */
3534+#define AuVerbose(do_info, fmt, ...) do { \
3535+ if (do_info) \
3536+ pr_info(fmt, ##__VA_ARGS__); \
3537+} while (0)
3538+
3539+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
3540+ aufs_bindex_t bbot)
3541+{
3542+ return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
3543+}
3544+
3545+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
3546+ aufs_bindex_t bbot)
3547+{
3548+ return au_test_ibusy(d_inode(dentry), btop, bbot);
3549+}
3550+
3551+/*
3552+ * test if the branch is deletable or not.
3553+ */
3554+static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
3555+ unsigned int sigen, const unsigned int verbose)
3556+{
3557+ int err, i, j, ndentry;
3558+ aufs_bindex_t btop, bbot;
3559+ struct au_dcsub_pages dpages;
3560+ struct au_dpage *dpage;
3561+ struct dentry *d;
3562+
3563+ err = au_dpages_init(&dpages, GFP_NOFS);
3564+ if (unlikely(err))
3565+ goto out;
3566+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
3567+ if (unlikely(err))
3568+ goto out_dpages;
3569+
3570+ for (i = 0; !err && i < dpages.ndpage; i++) {
3571+ dpage = dpages.dpages + i;
3572+ ndentry = dpage->ndentry;
3573+ for (j = 0; !err && j < ndentry; j++) {
3574+ d = dpage->dentries[j];
3575+ AuDebugOn(au_dcount(d) <= 0);
3576+ if (!au_digen_test(d, sigen)) {
3577+ di_read_lock_child(d, AuLock_IR);
3578+ if (unlikely(au_dbrange_test(d))) {
3579+ di_read_unlock(d, AuLock_IR);
3580+ continue;
3581+ }
3582+ } else {
3583+ di_write_lock_child(d);
3584+ if (unlikely(au_dbrange_test(d))) {
3585+ di_write_unlock(d);
3586+ continue;
3587+ }
3588+ err = au_reval_dpath(d, sigen);
3589+ if (!err)
3590+ di_downgrade_lock(d, AuLock_IR);
3591+ else {
3592+ di_write_unlock(d);
3593+ break;
3594+ }
3595+ }
3596+
3597+ /* AuDbgDentry(d); */
3598+ btop = au_dbtop(d);
3599+ bbot = au_dbbot(d);
3600+ if (btop <= bindex
3601+ && bindex <= bbot
3602+ && au_h_dptr(d, bindex)
3603+ && au_test_dbusy(d, btop, bbot)) {
3604+ err = -EBUSY;
3605+ AuVerbose(verbose, "busy %pd\n", d);
3606+ AuDbgDentry(d);
3607+ }
3608+ di_read_unlock(d, AuLock_IR);
3609+ }
3610+ }
3611+
3612+out_dpages:
3613+ au_dpages_free(&dpages);
3614+out:
3615+ return err;
3616+}
3617+
3618+static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
3619+ unsigned int sigen, const unsigned int verbose)
3620+{
3621+ int err;
3622+ unsigned long long max, ull;
3623+ struct inode *i, **array;
3624+ aufs_bindex_t btop, bbot;
3625+
3626+ array = au_iarray_alloc(sb, &max);
3627+ err = PTR_ERR(array);
3628+ if (IS_ERR(array))
3629+ goto out;
3630+
3631+ err = 0;
3632+ AuDbg("b%d\n", bindex);
3633+ for (ull = 0; !err && ull < max; ull++) {
3634+ i = array[ull];
3635+ if (unlikely(!i))
3636+ break;
3637+ if (i->i_ino == AUFS_ROOT_INO)
3638+ continue;
3639+
3640+ /* AuDbgInode(i); */
3641+ if (au_iigen(i, NULL) == sigen)
3642+ ii_read_lock_child(i);
3643+ else {
3644+ ii_write_lock_child(i);
3645+ err = au_refresh_hinode_self(i);
3646+ au_iigen_dec(i);
3647+ if (!err)
3648+ ii_downgrade_lock(i);
3649+ else {
3650+ ii_write_unlock(i);
3651+ break;
3652+ }
3653+ }
3654+
3655+ btop = au_ibtop(i);
3656+ bbot = au_ibbot(i);
3657+ if (btop <= bindex
3658+ && bindex <= bbot
3659+ && au_h_iptr(i, bindex)
3660+ && au_test_ibusy(i, btop, bbot)) {
3661+ err = -EBUSY;
3662+ AuVerbose(verbose, "busy i%lu\n", i->i_ino);
3663+ AuDbgInode(i);
3664+ }
3665+ ii_read_unlock(i);
3666+ }
3667+ au_iarray_free(array, max);
3668+
3669+out:
3670+ return err;
3671+}
3672+
3673+static int test_children_busy(struct dentry *root, aufs_bindex_t bindex,
3674+ const unsigned int verbose)
3675+{
3676+ int err;
3677+ unsigned int sigen;
3678+
3679+ sigen = au_sigen(root->d_sb);
3680+ DiMustNoWaiters(root);
3681+ IiMustNoWaiters(d_inode(root));
3682+ di_write_unlock(root);
3683+ err = test_dentry_busy(root, bindex, sigen, verbose);
3684+ if (!err)
3685+ err = test_inode_busy(root->d_sb, bindex, sigen, verbose);
3686+ di_write_lock_child(root); /* aufs_write_lock() calls ..._child() */
3687+
3688+ return err;
3689+}
3690+
3691+static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
3692+ struct file **to_free, int *idx)
3693+{
3694+ int err;
3695+ unsigned char matched, root;
3696+ aufs_bindex_t bindex, bbot;
3697+ struct au_fidir *fidir;
3698+ struct au_hfile *hfile;
3699+
3700+ err = 0;
3701+ root = IS_ROOT(file->f_path.dentry);
3702+ if (root) {
3703+ get_file(file);
3704+ to_free[*idx] = file;
3705+ (*idx)++;
3706+ goto out;
3707+ }
3708+
3709+ matched = 0;
3710+ fidir = au_fi(file)->fi_hdir;
3711+ AuDebugOn(!fidir);
3712+ bbot = au_fbbot_dir(file);
3713+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
3714+ hfile = fidir->fd_hfile + bindex;
3715+ if (!hfile->hf_file)
3716+ continue;
3717+
3718+ if (hfile->hf_br->br_id == br_id) {
3719+ matched = 1;
3720+ break;
3721+ }
3722+ }
3723+ if (matched)
3724+ err = -EBUSY;
3725+
3726+out:
3727+ return err;
3728+}
3729+
3730+static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
3731+ struct file **to_free, int opened)
3732+{
3733+ int err, idx;
3734+ unsigned long long ull, max;
3735+ aufs_bindex_t btop;
3736+ struct file *file, **array;
3737+ struct dentry *root;
3738+ struct au_hfile *hfile;
3739+
3740+ array = au_farray_alloc(sb, &max);
3741+ err = PTR_ERR(array);
3742+ if (IS_ERR(array))
3743+ goto out;
3744+
3745+ err = 0;
3746+ idx = 0;
3747+ root = sb->s_root;
3748+ di_write_unlock(root);
3749+ for (ull = 0; ull < max; ull++) {
3750+ file = array[ull];
3751+ if (unlikely(!file))
3752+ break;
3753+
3754+ /* AuDbg("%pD\n", file); */
3755+ fi_read_lock(file);
3756+ btop = au_fbtop(file);
3757+ if (!d_is_dir(file->f_path.dentry)) {
3758+ hfile = &au_fi(file)->fi_htop;
3759+ if (hfile->hf_br->br_id == br_id)
3760+ err = -EBUSY;
3761+ } else
3762+ err = test_dir_busy(file, br_id, to_free, &idx);
3763+ fi_read_unlock(file);
3764+ if (unlikely(err))
3765+ break;
3766+ }
3767+ di_write_lock_child(root);
3768+ au_farray_free(array, max);
3769+ AuDebugOn(idx > opened);
3770+
3771+out:
3772+ return err;
3773+}
3774+
3775+static void br_del_file(struct file **to_free, unsigned long long opened,
3776+ aufs_bindex_t br_id)
3777+{
3778+ unsigned long long ull;
3779+ aufs_bindex_t bindex, btop, bbot, bfound;
3780+ struct file *file;
3781+ struct au_fidir *fidir;
3782+ struct au_hfile *hfile;
3783+
3784+ for (ull = 0; ull < opened; ull++) {
3785+ file = to_free[ull];
3786+ if (unlikely(!file))
3787+ break;
3788+
3789+ /* AuDbg("%pD\n", file); */
3790+ AuDebugOn(!d_is_dir(file->f_path.dentry));
3791+ bfound = -1;
3792+ fidir = au_fi(file)->fi_hdir;
3793+ AuDebugOn(!fidir);
3794+ fi_write_lock(file);
3795+ btop = au_fbtop(file);
3796+ bbot = au_fbbot_dir(file);
3797+ for (bindex = btop; bindex <= bbot; bindex++) {
3798+ hfile = fidir->fd_hfile + bindex;
3799+ if (!hfile->hf_file)
3800+ continue;
3801+
3802+ if (hfile->hf_br->br_id == br_id) {
3803+ bfound = bindex;
3804+ break;
3805+ }
3806+ }
3807+ AuDebugOn(bfound < 0);
3808+ au_set_h_fptr(file, bfound, NULL);
3809+ if (bfound == btop) {
3810+ for (btop++; btop <= bbot; btop++)
3811+ if (au_hf_dir(file, btop)) {
3812+ au_set_fbtop(file, btop);
3813+ break;
3814+ }
3815+ }
3816+ fi_write_unlock(file);
3817+ }
3818+}
3819+
3820+static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
3821+ const aufs_bindex_t bindex,
3822+ const aufs_bindex_t bbot)
3823+{
3824+ struct au_branch **brp, **p;
3825+
3826+ AuRwMustWriteLock(&sbinfo->si_rwsem);
3827+
3828+ brp = sbinfo->si_branch + bindex;
3829+ if (bindex < bbot)
3830+ memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
3831+ sbinfo->si_branch[0 + bbot] = NULL;
3832+ sbinfo->si_bbot--;
3833+
3834+ p = au_krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST,
3835+ /*may_shrink*/1);
3836+ if (p)
3837+ sbinfo->si_branch = p;
3838+ /* harmless error */
3839+}
3840+
3841+static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
3842+ const aufs_bindex_t bbot)
3843+{
3844+ struct au_hdentry *hdp, *p;
3845+
3846+ AuRwMustWriteLock(&dinfo->di_rwsem);
3847+
3848+ hdp = au_hdentry(dinfo, bindex);
3849+ if (bindex < bbot)
3850+ memmove(hdp, hdp + 1, sizeof(*hdp) * (bbot - bindex));
3851+ /* au_h_dentry_init(au_hdentry(dinfo, bbot); */
3852+ dinfo->di_bbot--;
3853+
3854+ p = au_krealloc(dinfo->di_hdentry, sizeof(*p) * bbot, AuGFP_SBILIST,
3855+ /*may_shrink*/1);
3856+ if (p)
3857+ dinfo->di_hdentry = p;
3858+ /* harmless error */
3859+}
3860+
3861+static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
3862+ const aufs_bindex_t bbot)
3863+{
3864+ struct au_hinode *hip, *p;
3865+
3866+ AuRwMustWriteLock(&iinfo->ii_rwsem);
3867+
3868+ hip = au_hinode(iinfo, bindex);
3869+ if (bindex < bbot)
3870+ memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
3871+ /* au_hinode_init(au_hinode(iinfo, bbot)); */
3872+ iinfo->ii_bbot--;
3873+
3874+ p = au_krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST,
3875+ /*may_shrink*/1);
3876+ if (p)
3877+ iinfo->ii_hinode = p;
3878+ /* harmless error */
3879+}
3880+
3881+static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
3882+ struct au_branch *br)
3883+{
3884+ aufs_bindex_t bbot;
3885+ struct au_sbinfo *sbinfo;
3886+ struct dentry *root, *h_root;
3887+ struct inode *inode, *h_inode;
3888+ struct au_hinode *hinode;
3889+
3890+ SiMustWriteLock(sb);
3891+
3892+ root = sb->s_root;
3893+ inode = d_inode(root);
3894+ sbinfo = au_sbi(sb);
3895+ bbot = sbinfo->si_bbot;
3896+
3897+ h_root = au_h_dptr(root, bindex);
3898+ hinode = au_hi(inode, bindex);
3899+ h_inode = au_igrab(hinode->hi_inode);
3900+ au_hiput(hinode);
3901+
3902+ au_sbilist_lock();
3903+ au_br_do_del_brp(sbinfo, bindex, bbot);
3904+ au_br_do_del_hdp(au_di(root), bindex, bbot);
3905+ au_br_do_del_hip(au_ii(inode), bindex, bbot);
3906+ au_sbilist_unlock();
3907+
3908+ /* ignore an error */
3909+ au_dr_br_fin(sb, br); /* always, regardless the mount option */
3910+
3911+ dput(h_root);
3912+ iput(h_inode);
3913+ au_br_do_free(br);
3914+}
3915+
3916+static unsigned long long empty_cb(struct super_block *sb, void *array,
3917+ unsigned long long max, void *arg)
3918+{
3919+ return max;
3920+}
3921+
3922+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
3923+{
3924+ int err, rerr, i;
3925+ unsigned long long opened;
3926+ unsigned int mnt_flags;
3927+ aufs_bindex_t bindex, bbot, br_id;
3928+ unsigned char do_wh, verbose;
3929+ struct au_branch *br;
3930+ struct au_wbr *wbr;
3931+ struct dentry *root;
3932+ struct file **to_free;
3933+
3934+ err = 0;
3935+ opened = 0;
3936+ to_free = NULL;
3937+ root = sb->s_root;
3938+ bindex = au_find_dbindex(root, del->h_path.dentry);
3939+ if (bindex < 0) {
3940+ if (remount)
3941+ goto out; /* success */
3942+ err = -ENOENT;
3943+ pr_err("%s no such branch\n", del->pathname);
3944+ goto out;
3945+ }
3946+ AuDbg("bindex b%d\n", bindex);
3947+
3948+ err = -EBUSY;
3949+ mnt_flags = au_mntflags(sb);
3950+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
3951+ bbot = au_sbbot(sb);
3952+ if (unlikely(!bbot)) {
3953+ AuVerbose(verbose, "no more branches left\n");
3954+ goto out;
3955+ }
3956+
3957+ br = au_sbr(sb, bindex);
3958+ AuDebugOn(!path_equal(&br->br_path, &del->h_path));
3959+ if (unlikely(au_lcnt_read(&br->br_count, /*do_rev*/1))) {
3960+ AuVerbose(verbose, "br %pd2 is busy now\n", del->h_path.dentry);
3961+ goto out;
3962+ }
3963+
3964+ br_id = br->br_id;
3965+ opened = au_lcnt_read(&br->br_nfiles, /*do_rev*/1);
3966+ if (unlikely(opened)) {
3967+ to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
3968+ err = PTR_ERR(to_free);
3969+ if (IS_ERR(to_free))
3970+ goto out;
3971+
3972+ err = test_file_busy(sb, br_id, to_free, opened);
3973+ if (unlikely(err)) {
3974+ AuVerbose(verbose, "%llu file(s) opened\n", opened);
3975+ goto out;
3976+ }
3977+ }
3978+
3979+ wbr = br->br_wbr;
3980+ do_wh = wbr && (wbr->wbr_whbase || wbr->wbr_plink || wbr->wbr_orph);
3981+ if (do_wh) {
3982+ /* instead of WbrWhMustWriteLock(wbr) */
3983+ SiMustWriteLock(sb);
3984+ for (i = 0; i < AuBrWh_Last; i++) {
3985+ dput(wbr->wbr_wh[i]);
3986+ wbr->wbr_wh[i] = NULL;
3987+ }
3988+ }
3989+
3990+ err = test_children_busy(root, bindex, verbose);
3991+ if (unlikely(err)) {
3992+ if (do_wh)
3993+ goto out_wh;
3994+ goto out;
3995+ }
3996+
3997+ err = 0;
3998+ if (to_free) {
3999+ /*
4000+ * now we confirmed the branch is deletable.
4001+ * let's free the remaining opened dirs on the branch.
4002+ */
4003+ di_write_unlock(root);
4004+ br_del_file(to_free, opened, br_id);
4005+ di_write_lock_child(root);
4006+ }
4007+
4008+ sysaufs_brs_del(sb, bindex); /* remove successors */
4009+ dbgaufs_xino_del(br); /* remove one */
4010+ au_br_do_del(sb, bindex, br);
4011+ sysaufs_brs_add(sb, bindex); /* append successors */
4012+ dbgaufs_brs_add(sb, bindex, /*topdown*/1); /* rename successors */
4013+
4014+ if (!bindex) {
4015+ au_cpup_attr_all(d_inode(root), /*force*/1);
4016+ sb->s_maxbytes = au_sbr_sb(sb, 0)->s_maxbytes;
4017+ } else
4018+ au_sub_nlink(d_inode(root), d_inode(del->h_path.dentry));
4019+ if (au_opt_test(mnt_flags, PLINK))
4020+ au_plink_half_refresh(sb, br_id);
4021+
4022+ goto out; /* success */
4023+
4024+out_wh:
4025+ /* revert */
4026+ rerr = au_br_init_wh(sb, br, br->br_perm);
4027+ if (rerr)
4028+ pr_warn("failed re-creating base whiteout, %s. (%d)\n",
4029+ del->pathname, rerr);
4030+out:
4031+ if (to_free)
4032+ au_farray_free(to_free, opened);
4033+ return err;
4034+}
4035+
4036+/* ---------------------------------------------------------------------- */
4037+
4038+static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
4039+{
4040+ int err;
4041+ aufs_bindex_t btop, bbot;
4042+ struct aufs_ibusy ibusy;
4043+ struct inode *inode, *h_inode;
4044+
4045+ err = -EPERM;
4046+ if (unlikely(!capable(CAP_SYS_ADMIN)))
4047+ goto out;
4048+
4049+ err = copy_from_user(&ibusy, arg, sizeof(ibusy));
4050+ if (!err)
4051+ /* VERIFY_WRITE */
4052+ err = !access_ok(&arg->h_ino, sizeof(arg->h_ino));
4053+ if (unlikely(err)) {
4054+ err = -EFAULT;
4055+ AuTraceErr(err);
4056+ goto out;
4057+ }
4058+
4059+ err = -EINVAL;
4060+ si_read_lock(sb, AuLock_FLUSH);
4061+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
4062+ goto out_unlock;
4063+
4064+ err = 0;
4065+ ibusy.h_ino = 0; /* invalid */
4066+ inode = ilookup(sb, ibusy.ino);
4067+ if (!inode
4068+ || inode->i_ino == AUFS_ROOT_INO
4069+ || au_is_bad_inode(inode))
4070+ goto out_unlock;
4071+
4072+ ii_read_lock_child(inode);
4073+ btop = au_ibtop(inode);
4074+ bbot = au_ibbot(inode);
4075+ if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
4076+ h_inode = au_h_iptr(inode, ibusy.bindex);
4077+ if (h_inode && au_test_ibusy(inode, btop, bbot))
4078+ ibusy.h_ino = h_inode->i_ino;
4079+ }
4080+ ii_read_unlock(inode);
4081+ iput(inode);
4082+
4083+out_unlock:
4084+ si_read_unlock(sb);
4085+ if (!err) {
4086+ err = __put_user(ibusy.h_ino, &arg->h_ino);
4087+ if (unlikely(err)) {
4088+ err = -EFAULT;
4089+ AuTraceErr(err);
4090+ }
4091+ }
4092+out:
4093+ return err;
4094+}
4095+
4096+long au_ibusy_ioctl(struct file *file, unsigned long arg)
4097+{
4098+ return au_ibusy(file->f_path.dentry->d_sb, (void __user *)arg);
4099+}
4100+
4101+#ifdef CONFIG_COMPAT
4102+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg)
4103+{
4104+ return au_ibusy(file->f_path.dentry->d_sb, compat_ptr(arg));
4105+}
4106+#endif
4107+
4108+/* ---------------------------------------------------------------------- */
4109+
4110+/*
4111+ * change a branch permission
4112+ */
4113+
4114+static void au_warn_ima(void)
4115+{
4116+#ifdef CONFIG_IMA
4117+ /* since it doesn't support mark_files_ro() */
4118+ AuWarn1("RW -> RO makes IMA to produce wrong message\n");
4119+#endif
4120+}
4121+
4122+static int do_need_sigen_inc(int a, int b)
4123+{
4124+ return au_br_whable(a) && !au_br_whable(b);
4125+}
4126+
4127+static int need_sigen_inc(int old, int new)
4128+{
4129+ return do_need_sigen_inc(old, new)
4130+ || do_need_sigen_inc(new, old);
4131+}
4132+
4133+static int au_br_mod_files_ro(struct super_block *sb, aufs_bindex_t bindex)
4134+{
4135+ int err, do_warn;
4136+ unsigned int mnt_flags;
4137+ unsigned long long ull, max;
4138+ aufs_bindex_t br_id;
4139+ unsigned char verbose, writer;
4140+ struct file *file, *hf, **array;
4141+ struct au_hfile *hfile;
4142+ struct inode *h_inode;
4143+
4144+ mnt_flags = au_mntflags(sb);
4145+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
4146+
4147+ array = au_farray_alloc(sb, &max);
4148+ err = PTR_ERR(array);
4149+ if (IS_ERR(array))
4150+ goto out;
4151+
4152+ do_warn = 0;
4153+ br_id = au_sbr_id(sb, bindex);
4154+ for (ull = 0; ull < max; ull++) {
4155+ file = array[ull];
4156+ if (unlikely(!file))
4157+ break;
4158+
4159+ /* AuDbg("%pD\n", file); */
4160+ fi_read_lock(file);
4161+ if (unlikely(au_test_mmapped(file))) {
4162+ err = -EBUSY;
4163+ AuVerbose(verbose, "mmapped %pD\n", file);
4164+ AuDbgFile(file);
4165+ FiMustNoWaiters(file);
4166+ fi_read_unlock(file);
4167+ goto out_array;
4168+ }
4169+
4170+ hfile = &au_fi(file)->fi_htop;
4171+ hf = hfile->hf_file;
4172+ if (!d_is_reg(file->f_path.dentry)
4173+ || !(file->f_mode & FMODE_WRITE)
4174+ || hfile->hf_br->br_id != br_id
4175+ || !(hf->f_mode & FMODE_WRITE))
4176+ array[ull] = NULL;
4177+ else {
4178+ do_warn = 1;
4179+ get_file(file);
4180+ }
4181+
4182+ FiMustNoWaiters(file);
4183+ fi_read_unlock(file);
4184+ fput(file);
4185+ }
4186+
4187+ err = 0;
4188+ if (do_warn)
4189+ au_warn_ima();
4190+
4191+ for (ull = 0; ull < max; ull++) {
4192+ file = array[ull];
4193+ if (!file)
4194+ continue;
4195+
4196+ /* todo: already flushed? */
4197+ /*
4198+ * fs/super.c:mark_files_ro() is gone, but aufs keeps its
4199+ * approach which resets f_mode and calls mnt_drop_write() and
4200+ * file_release_write() for each file, because the branch
4201+ * attribute in aufs world is totally different from the native
4202+ * fs rw/ro mode.
4203+ */
4204+ /* fi_read_lock(file); */
4205+ hfile = &au_fi(file)->fi_htop;
4206+ hf = hfile->hf_file;
4207+ /* fi_read_unlock(file); */
4208+ spin_lock(&hf->f_lock);
4209+ writer = !!(hf->f_mode & FMODE_WRITER);
4210+ hf->f_mode &= ~(FMODE_WRITE | FMODE_WRITER);
4211+ spin_unlock(&hf->f_lock);
4212+ if (writer) {
4213+ h_inode = file_inode(hf);
4214+ if (hf->f_mode & FMODE_READ)
4215+ i_readcount_inc(h_inode);
4216+ put_write_access(h_inode);
4217+ __mnt_drop_write(hf->f_path.mnt);
4218+ }
4219+ }
4220+
4221+out_array:
4222+ au_farray_free(array, max);
4223+out:
4224+ AuTraceErr(err);
4225+ return err;
4226+}
4227+
4228+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
4229+ int *do_refresh)
4230+{
4231+ int err, rerr;
4232+ aufs_bindex_t bindex;
4233+ struct dentry *root;
4234+ struct au_branch *br;
4235+ struct au_br_fhsm *bf;
4236+
4237+ root = sb->s_root;
4238+ bindex = au_find_dbindex(root, mod->h_root);
4239+ if (bindex < 0) {
4240+ if (remount)
4241+ return 0; /* success */
4242+ err = -ENOENT;
4243+ pr_err("%s no such branch\n", mod->path);
4244+ goto out;
4245+ }
4246+ AuDbg("bindex b%d\n", bindex);
4247+
4248+ err = test_br(d_inode(mod->h_root), mod->perm, mod->path);
4249+ if (unlikely(err))
4250+ goto out;
4251+
4252+ br = au_sbr(sb, bindex);
4253+ AuDebugOn(mod->h_root != au_br_dentry(br));
4254+ if (br->br_perm == mod->perm)
4255+ return 0; /* success */
4256+
4257+ /* pre-allocate for non-fhsm --> fhsm */
4258+ bf = NULL;
4259+ if (!au_br_fhsm(br->br_perm) && au_br_fhsm(mod->perm)) {
4260+ err = au_fhsm_br_alloc(br);
4261+ if (unlikely(err))
4262+ goto out;
4263+ bf = br->br_fhsm;
4264+ br->br_fhsm = NULL;
4265+ }
4266+
4267+ if (au_br_writable(br->br_perm)) {
4268+ /* remove whiteout base */
4269+ err = au_br_init_wh(sb, br, mod->perm);
4270+ if (unlikely(err))
4271+ goto out_bf;
4272+
4273+ if (!au_br_writable(mod->perm)) {
4274+ /* rw --> ro, file might be mmapped */
4275+ DiMustNoWaiters(root);
4276+ IiMustNoWaiters(d_inode(root));
4277+ di_write_unlock(root);
4278+ err = au_br_mod_files_ro(sb, bindex);
4279+ /* aufs_write_lock() calls ..._child() */
4280+ di_write_lock_child(root);
4281+
4282+ if (unlikely(err)) {
4283+ rerr = -ENOMEM;
4284+ br->br_wbr = kzalloc(sizeof(*br->br_wbr),
4285+ GFP_NOFS);
4286+ if (br->br_wbr)
4287+ rerr = au_wbr_init(br, sb, br->br_perm);
4288+ if (unlikely(rerr)) {
4289+ AuIOErr("nested error %d (%d)\n",
4290+ rerr, err);
4291+ br->br_perm = mod->perm;
4292+ }
4293+ }
4294+ }
4295+ } else if (au_br_writable(mod->perm)) {
4296+ /* ro --> rw */
4297+ err = -ENOMEM;
4298+ br->br_wbr = kzalloc(sizeof(*br->br_wbr), GFP_NOFS);
4299+ if (br->br_wbr) {
4300+ err = au_wbr_init(br, sb, mod->perm);
4301+ if (unlikely(err)) {
4302+ au_kfree_rcu(br->br_wbr);
4303+ br->br_wbr = NULL;
4304+ }
4305+ }
4306+ }
4307+ if (unlikely(err))
4308+ goto out_bf;
4309+
4310+ if (au_br_fhsm(br->br_perm)) {
4311+ if (!au_br_fhsm(mod->perm)) {
4312+ /* fhsm --> non-fhsm */
4313+ au_br_fhsm_fin(br->br_fhsm);
4314+ au_kfree_rcu(br->br_fhsm);
4315+ br->br_fhsm = NULL;
4316+ }
4317+ } else if (au_br_fhsm(mod->perm))
4318+ /* non-fhsm --> fhsm */
4319+ br->br_fhsm = bf;
4320+
4321+ *do_refresh |= need_sigen_inc(br->br_perm, mod->perm);
4322+ br->br_perm = mod->perm;
4323+ goto out; /* success */
4324+
4325+out_bf:
4326+ au_kfree_try_rcu(bf);
4327+out:
4328+ AuTraceErr(err);
4329+ return err;
4330+}
4331+
4332+/* ---------------------------------------------------------------------- */
4333+
4334+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs)
4335+{
4336+ int err;
4337+ struct kstatfs kstfs;
4338+
4339+ err = vfs_statfs(&br->br_path, &kstfs);
4340+ if (!err) {
4341+ stfs->f_blocks = kstfs.f_blocks;
4342+ stfs->f_bavail = kstfs.f_bavail;
4343+ stfs->f_files = kstfs.f_files;
4344+ stfs->f_ffree = kstfs.f_ffree;
4345+ }
4346+
4347+ return err;
4348+}
4349diff -urN /usr/share/empty/fs/aufs/branch.h linux/fs/aufs/branch.h
4350--- /usr/share/empty/fs/aufs/branch.h 1970-01-01 01:00:00.000000000 +0100
4351+++ linux/fs/aufs/branch.h 2022-11-05 23:02:18.959222617 +0100
4352@@ -0,0 +1,375 @@
4353+/* SPDX-License-Identifier: GPL-2.0 */
4354+/*
4355+ * Copyright (C) 2005-2022 Junjiro R. Okajima
4356+ *
4357+ * This program is free software; you can redistribute it and/or modify
4358+ * it under the terms of the GNU General Public License as published by
4359+ * the Free Software Foundation; either version 2 of the License, or
4360+ * (at your option) any later version.
4361+ *
4362+ * This program is distributed in the hope that it will be useful,
4363+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4364+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4365+ * GNU General Public License for more details.
4366+ *
4367+ * You should have received a copy of the GNU General Public License
4368+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4369+ */
4370+
4371+/*
4372+ * branch filesystems and xino for them
4373+ */
4374+
4375+#ifndef __AUFS_BRANCH_H__
4376+#define __AUFS_BRANCH_H__
4377+
4378+#ifdef __KERNEL__
4379+
4380+#include <linux/mount.h>
4381+#include "dirren.h"
4382+#include "dynop.h"
4383+#include "lcnt.h"
4384+#include "rwsem.h"
4385+#include "super.h"
4386+
4387+/* ---------------------------------------------------------------------- */
4388+
4389+/* a xino file */
4390+struct au_xino {
4391+ struct file **xi_file;
4392+ unsigned int xi_nfile;
4393+
4394+ struct {
4395+ spinlock_t spin;
4396+ ino_t *array;
4397+ int total;
4398+ /* reserved for future use */
4399+ /* unsigned long *bitmap; */
4400+ wait_queue_head_t wqh;
4401+ } xi_nondir;
4402+
4403+ struct mutex xi_mtx; /* protects xi_file array */
4404+ struct hlist_bl_head xi_writing;
4405+
4406+ atomic_t xi_truncating;
4407+
4408+ struct kref xi_kref;
4409+};
4410+
4411+/* File-based Hierarchical Storage Management */
4412+struct au_br_fhsm {
4413+#ifdef CONFIG_AUFS_FHSM
4414+ struct mutex bf_lock;
4415+ unsigned long bf_jiffy;
4416+ struct aufs_stfs bf_stfs;
4417+ int bf_readable;
4418+#endif
4419+};
4420+
4421+/* members for writable branch only */
4422+enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
4423+struct au_wbr {
4424+ struct au_rwsem wbr_wh_rwsem;
4425+ struct dentry *wbr_wh[AuBrWh_Last];
4426+ atomic_t wbr_wh_running;
4427+#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
4428+#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
4429+#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
4430+
4431+ /* mfs mode */
4432+ unsigned long long wbr_bytes;
4433+};
4434+
4435+/* ext2 has 3 types of operations at least, ext3 has 4 */
4436+#define AuBrDynOp (AuDyLast * 4)
4437+
4438+#ifdef CONFIG_AUFS_HFSNOTIFY
4439+/* support for asynchronous destruction */
4440+struct au_br_hfsnotify {
4441+ struct fsnotify_group *hfsn_group;
4442+};
4443+#endif
4444+
4445+/* sysfs entries */
4446+struct au_brsysfs {
4447+ char name[16];
4448+ struct attribute attr;
4449+};
4450+
4451+enum {
4452+ AuBrSysfs_BR,
4453+ AuBrSysfs_BRID,
4454+ AuBrSysfs_Last
4455+};
4456+
4457+/* protected by superblock rwsem */
4458+struct au_branch {
4459+ struct au_xino *br_xino;
4460+
4461+ aufs_bindex_t br_id;
4462+
4463+ int br_perm;
4464+ struct path br_path;
4465+ spinlock_t br_dykey_lock;
4466+ struct au_dykey *br_dykey[AuBrDynOp];
4467+ au_lcnt_t br_nfiles; /* opened files */
4468+ au_lcnt_t br_count; /* in-use for other */
4469+
4470+ struct au_wbr *br_wbr;
4471+ struct au_br_fhsm *br_fhsm;
4472+
4473+#ifdef CONFIG_AUFS_HFSNOTIFY
4474+ struct au_br_hfsnotify *br_hfsn;
4475+#endif
4476+
4477+#ifdef CONFIG_SYSFS
4478+ /* entries under sysfs per mount-point */
4479+ struct au_brsysfs br_sysfs[AuBrSysfs_Last];
4480+#endif
4481+
4482+#ifdef CONFIG_DEBUG_FS
4483+ struct dentry *br_dbgaufs; /* xino */
4484+#endif
4485+
4486+ struct au_dr_br br_dirren;
4487+};
4488+
4489+/* ---------------------------------------------------------------------- */
4490+
4491+static inline struct vfsmount *au_br_mnt(struct au_branch *br)
4492+{
4493+ return br->br_path.mnt;
4494+}
4495+
4496+static inline struct dentry *au_br_dentry(struct au_branch *br)
4497+{
4498+ return br->br_path.dentry;
4499+}
4500+
4501+static inline struct user_namespace *au_br_userns(struct au_branch *br)
4502+{
4503+ return mnt_user_ns(br->br_path.mnt);
4504+}
4505+
4506+static inline struct super_block *au_br_sb(struct au_branch *br)
4507+{
4508+ return au_br_mnt(br)->mnt_sb;
4509+}
4510+
4511+static inline int au_br_rdonly(struct au_branch *br)
4512+{
4513+ return (sb_rdonly(au_br_sb(br))
4514+ || !au_br_writable(br->br_perm))
4515+ ? -EROFS : 0;
4516+}
4517+
4518+static inline int au_br_hnotifyable(int brperm __maybe_unused)
4519+{
4520+#ifdef CONFIG_AUFS_HNOTIFY
4521+ return !(brperm & AuBrPerm_RR);
4522+#else
4523+ return 0;
4524+#endif
4525+}
4526+
4527+static inline int au_br_test_oflag(int oflag, struct au_branch *br)
4528+{
4529+ int err, exec_flag;
4530+
4531+ err = 0;
4532+ exec_flag = oflag & __FMODE_EXEC;
4533+ if (unlikely(exec_flag && path_noexec(&br->br_path)))
4534+ err = -EACCES;
4535+
4536+ return err;
4537+}
4538+
4539+static inline void au_xino_get(struct au_branch *br)
4540+{
4541+ struct au_xino *xi;
4542+
4543+ xi = br->br_xino;
4544+ if (xi)
4545+ kref_get(&xi->xi_kref);
4546+}
4547+
4548+static inline int au_xino_count(struct au_branch *br)
4549+{
4550+ int v;
4551+ struct au_xino *xi;
4552+
4553+ v = 0;
4554+ xi = br->br_xino;
4555+ if (xi)
4556+ v = kref_read(&xi->xi_kref);
4557+
4558+ return v;
4559+}
4560+
4561+/* ---------------------------------------------------------------------- */
4562+
4563+/* branch.c */
4564+struct au_sbinfo;
4565+void au_br_free(struct au_sbinfo *sinfo);
4566+int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
4567+struct au_opt_add;
4568+int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
4569+struct au_opt_del;
4570+int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
4571+long au_ibusy_ioctl(struct file *file, unsigned long arg);
4572+#ifdef CONFIG_COMPAT
4573+long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
4574+#endif
4575+struct au_opt_mod;
4576+int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
4577+ int *do_refresh);
4578+struct aufs_stfs;
4579+int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
4580+
4581+/* xino.c */
4582+static const loff_t au_loff_max = LLONG_MAX;
4583+
4584+aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry);
4585+struct file *au_xino_create(struct super_block *sb, char *fpath, int silent,
4586+ int wbrtop);
4587+struct file *au_xino_create2(struct super_block *sb, struct path *base,
4588+ struct file *copy_src);
4589+struct au_xi_new {
4590+ struct au_xino *xi; /* switch between xino and xigen */
4591+ int idx;
4592+ struct path *base;
4593+ struct file *copy_src;
4594+};
4595+struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew);
4596+
4597+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4598+ ino_t *ino);
4599+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4600+ ino_t ino);
4601+ssize_t xino_fread(struct file *file, void *buf, size_t size, loff_t *pos);
4602+ssize_t xino_fwrite(struct file *file, void *buf, size_t size, loff_t *pos);
4603+
4604+int au_xib_trunc(struct super_block *sb);
4605+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin);
4606+
4607+struct au_xino *au_xino_alloc(unsigned int nfile);
4608+int au_xino_put(struct au_branch *br);
4609+struct file *au_xino_file1(struct au_xino *xi);
4610+
4611+struct au_opt_xino;
4612+void au_xino_clr(struct super_block *sb);
4613+int au_xino_set(struct super_block *sb, struct au_opt_xino *xiopt, int remount);
4614+struct file *au_xino_def(struct super_block *sb);
4615+int au_xino_init_br(struct super_block *sb, struct au_branch *br, ino_t hino,
4616+ struct path *base);
4617+
4618+ino_t au_xino_new_ino(struct super_block *sb);
4619+void au_xino_delete_inode(struct inode *inode, const int unlinked);
4620+
4621+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
4622+ ino_t h_ino, int idx);
4623+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
4624+ int *idx);
4625+
4626+int au_xino_path(struct seq_file *seq, struct file *file);
4627+
4628+/* ---------------------------------------------------------------------- */
4629+
4630+/* @idx is signed to accept -1 meaning the first file */
4631+static inline struct file *au_xino_file(struct au_xino *xi, int idx)
4632+{
4633+ struct file *file;
4634+
4635+ file = NULL;
4636+ if (!xi)
4637+ goto out;
4638+
4639+ if (idx >= 0) {
4640+ if (idx < xi->xi_nfile)
4641+ file = xi->xi_file[idx];
4642+ } else
4643+ file = au_xino_file1(xi);
4644+
4645+out:
4646+ return file;
4647+}
4648+
4649+/* ---------------------------------------------------------------------- */
4650+
4651+/* Superblock to branch */
4652+static inline
4653+aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
4654+{
4655+ return au_sbr(sb, bindex)->br_id;
4656+}
4657+
4658+static inline
4659+struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
4660+{
4661+ return au_br_mnt(au_sbr(sb, bindex));
4662+}
4663+
4664+static inline
4665+struct user_namespace *au_sbr_userns(struct super_block *sb, aufs_bindex_t bindex)
4666+{
4667+ return au_br_userns(au_sbr(sb, bindex));
4668+}
4669+
4670+static inline
4671+struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
4672+{
4673+ return au_br_sb(au_sbr(sb, bindex));
4674+}
4675+
4676+static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
4677+{
4678+ return au_sbr(sb, bindex)->br_perm;
4679+}
4680+
4681+static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
4682+{
4683+ return au_br_whable(au_sbr_perm(sb, bindex));
4684+}
4685+
4686+/* ---------------------------------------------------------------------- */
4687+
4688+#define wbr_wh_read_lock(wbr) au_rw_read_lock(&(wbr)->wbr_wh_rwsem)
4689+#define wbr_wh_write_lock(wbr) au_rw_write_lock(&(wbr)->wbr_wh_rwsem)
4690+#define wbr_wh_read_trylock(wbr) au_rw_read_trylock(&(wbr)->wbr_wh_rwsem)
4691+#define wbr_wh_write_trylock(wbr) au_rw_write_trylock(&(wbr)->wbr_wh_rwsem)
4692+/*
4693+#define wbr_wh_read_trylock_nested(wbr) \
4694+ au_rw_read_trylock_nested(&(wbr)->wbr_wh_rwsem)
4695+#define wbr_wh_write_trylock_nested(wbr) \
4696+ au_rw_write_trylock_nested(&(wbr)->wbr_wh_rwsem)
4697+*/
4698+
4699+#define wbr_wh_read_unlock(wbr) au_rw_read_unlock(&(wbr)->wbr_wh_rwsem)
4700+#define wbr_wh_write_unlock(wbr) au_rw_write_unlock(&(wbr)->wbr_wh_rwsem)
4701+#define wbr_wh_downgrade_lock(wbr) au_rw_dgrade_lock(&(wbr)->wbr_wh_rwsem)
4702+
4703+#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&(wbr)->wbr_wh_rwsem)
4704+#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&(wbr)->wbr_wh_rwsem)
4705+#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&(wbr)->wbr_wh_rwsem)
4706+
4707+/* ---------------------------------------------------------------------- */
4708+
4709+#ifdef CONFIG_AUFS_FHSM
4710+static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
4711+{
4712+ mutex_init(&brfhsm->bf_lock);
4713+ brfhsm->bf_jiffy = 0;
4714+ brfhsm->bf_readable = 0;
4715+}
4716+
4717+static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
4718+{
4719+ mutex_destroy(&brfhsm->bf_lock);
4720+}
4721+#else
4722+AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
4723+AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
4724+#endif
4725+
4726+#endif /* __KERNEL__ */
4727+#endif /* __AUFS_BRANCH_H__ */
4728diff -urN /usr/share/empty/fs/aufs/conf.mk linux/fs/aufs/conf.mk
4729--- /usr/share/empty/fs/aufs/conf.mk 1970-01-01 01:00:00.000000000 +0100
4730+++ linux/fs/aufs/conf.mk 2022-11-05 23:02:18.959222617 +0100
4731@@ -0,0 +1,40 @@
4732+# SPDX-License-Identifier: GPL-2.0
4733+
4734+AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
4735+
4736+define AuConf
4737+ifdef ${1}
4738+AuConfStr += ${1}=${${1}}
4739+endif
4740+endef
4741+
4742+AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
4743+ SBILIST \
4744+ HNOTIFY HFSNOTIFY \
4745+ EXPORT INO_T_64 \
4746+ XATTR \
4747+ FHSM \
4748+ RDU \
4749+ DIRREN \
4750+ SHWH \
4751+ BR_RAMFS \
4752+ BR_FUSE POLL \
4753+ BR_HFSPLUS \
4754+ BDEV_LOOP \
4755+ DEBUG MAGIC_SYSRQ
4756+$(foreach i, ${AuConfAll}, \
4757+ $(eval $(call AuConf,CONFIG_AUFS_${i})))
4758+
4759+AuConfName = ${obj}/conf.str
4760+${AuConfName}.tmp: FORCE
4761+ @echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
4762+${AuConfName}: ${AuConfName}.tmp
4763+ @diff -q $< $@ > /dev/null 2>&1 || { \
4764+ echo ' GEN ' $@; \
4765+ cp -p $< $@; \
4766+ }
4767+FORCE:
4768+clean-files += ${AuConfName} ${AuConfName}.tmp
4769+${obj}/sysfs.o: ${AuConfName}
4770+
4771+-include ${srctree}/${src}/conf_priv.mk
4772diff -urN /usr/share/empty/fs/aufs/cpup.c linux/fs/aufs/cpup.c
4773--- /usr/share/empty/fs/aufs/cpup.c 1970-01-01 01:00:00.000000000 +0100
4774+++ linux/fs/aufs/cpup.c 2023-02-20 21:05:51.959693785 +0100
4775@@ -0,0 +1,1459 @@
4776+// SPDX-License-Identifier: GPL-2.0
4777+/*
4778+ * Copyright (C) 2005-2022 Junjiro R. Okajima
4779+ *
4780+ * This program is free software; you can redistribute it and/or modify
4781+ * it under the terms of the GNU General Public License as published by
4782+ * the Free Software Foundation; either version 2 of the License, or
4783+ * (at your option) any later version.
4784+ *
4785+ * This program is distributed in the hope that it will be useful,
4786+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
4787+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
4788+ * GNU General Public License for more details.
4789+ *
4790+ * You should have received a copy of the GNU General Public License
4791+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
4792+ */
4793+
4794+/*
4795+ * copy-up functions, see wbr_policy.c for copy-down
4796+ */
4797+
4798+#include <linux/fs_stack.h>
4799+#include <linux/mm.h>
4800+#include <linux/task_work.h>
4801+#include "aufs.h"
4802+
4803+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags)
4804+{
4805+ const unsigned int mask = S_DEAD | S_SWAPFILE | S_PRIVATE
4806+ | S_NOATIME | S_NOCMTIME | S_AUTOMOUNT;
4807+
4808+ BUILD_BUG_ON(sizeof(iflags) != sizeof(dst->i_flags));
4809+
4810+ dst->i_flags |= iflags & ~mask;
4811+ if (au_test_fs_notime(dst->i_sb))
4812+ dst->i_flags |= S_NOATIME | S_NOCMTIME;
4813+}
4814+
4815+void au_cpup_attr_timesizes(struct inode *inode)
4816+{
4817+ struct inode *h_inode;
4818+
4819+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4820+ fsstack_copy_attr_times(inode, h_inode);
4821+ fsstack_copy_inode_size(inode, h_inode);
4822+}
4823+
4824+void au_cpup_attr_nlink(struct inode *inode, int force)
4825+{
4826+ struct inode *h_inode;
4827+ struct super_block *sb;
4828+ aufs_bindex_t bindex, bbot;
4829+
4830+ sb = inode->i_sb;
4831+ bindex = au_ibtop(inode);
4832+ h_inode = au_h_iptr(inode, bindex);
4833+ if (!force
4834+ && !S_ISDIR(h_inode->i_mode)
4835+ && au_opt_test(au_mntflags(sb), PLINK)
4836+ && au_plink_test(inode))
4837+ return;
4838+
4839+ /*
4840+ * 0 can happen in revalidating.
4841+ * h_inode->i_mutex may not be held here, but it is harmless since once
4842+ * i_nlink reaches 0, it will never become positive except O_TMPFILE
4843+ * case.
4844+ * todo: O_TMPFILE+linkat(AT_SYMLINK_FOLLOW) bypassing aufs may cause
4845+ * the incorrect link count.
4846+ */
4847+ set_nlink(inode, h_inode->i_nlink);
4848+
4849+ /*
4850+ * fewer nlink makes find(1) noisy, but larger nlink doesn't.
4851+ * it may includes whplink directory.
4852+ */
4853+ if (S_ISDIR(h_inode->i_mode)) {
4854+ bbot = au_ibbot(inode);
4855+ for (bindex++; bindex <= bbot; bindex++) {
4856+ h_inode = au_h_iptr(inode, bindex);
4857+ if (h_inode)
4858+ au_add_nlink(inode, h_inode);
4859+ }
4860+ }
4861+}
4862+
4863+void au_cpup_attr_changeable(struct inode *inode)
4864+{
4865+ struct inode *h_inode;
4866+
4867+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4868+ inode->i_mode = h_inode->i_mode;
4869+ inode->i_uid = h_inode->i_uid;
4870+ inode->i_gid = h_inode->i_gid;
4871+ au_cpup_attr_timesizes(inode);
4872+ au_cpup_attr_flags(inode, h_inode->i_flags);
4873+}
4874+
4875+void au_cpup_igen(struct inode *inode, struct inode *h_inode)
4876+{
4877+ struct au_iinfo *iinfo = au_ii(inode);
4878+
4879+ IiMustWriteLock(inode);
4880+
4881+ iinfo->ii_higen = h_inode->i_generation;
4882+ iinfo->ii_hsb1 = h_inode->i_sb;
4883+}
4884+
4885+void au_cpup_attr_all(struct inode *inode, int force)
4886+{
4887+ struct inode *h_inode;
4888+
4889+ h_inode = au_h_iptr(inode, au_ibtop(inode));
4890+ au_cpup_attr_changeable(inode);
4891+ if (inode->i_nlink > 0)
4892+ au_cpup_attr_nlink(inode, force);
4893+ inode->i_rdev = h_inode->i_rdev;
4894+ inode->i_blkbits = h_inode->i_blkbits;
4895+ au_cpup_igen(inode, h_inode);
4896+}
4897+
4898+/* ---------------------------------------------------------------------- */
4899+
4900+/* Note: dt_dentry and dt_h_dentry are not dget/dput-ed */
4901+
4902+/* keep the timestamps of the parent dir when cpup */
4903+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
4904+ struct path *h_path)
4905+{
4906+ struct inode *h_inode;
4907+
4908+ dt->dt_dentry = dentry;
4909+ dt->dt_h_path = *h_path;
4910+ h_inode = d_inode(h_path->dentry);
4911+ dt->dt_atime = h_inode->i_atime;
4912+ dt->dt_mtime = h_inode->i_mtime;
4913+ /* smp_mb(); */
4914+}
4915+
4916+void au_dtime_revert(struct au_dtime *dt)
4917+{
4918+ struct iattr attr;
4919+ int err;
4920+
4921+ attr.ia_atime = dt->dt_atime;
4922+ attr.ia_mtime = dt->dt_mtime;
4923+ attr.ia_valid = ATTR_FORCE | ATTR_MTIME | ATTR_MTIME_SET
4924+ | ATTR_ATIME | ATTR_ATIME_SET;
4925+
4926+ /* no delegation since this is a directory */
4927+ err = vfsub_notify_change(&dt->dt_h_path, &attr, /*delegated*/NULL);
4928+ if (unlikely(err))
4929+ pr_warn("restoring timestamps failed(%d). ignored\n", err);
4930+}
4931+
4932+/* ---------------------------------------------------------------------- */
4933+
4934+/* internal use only */
4935+struct au_cpup_reg_attr {
4936+ int valid;
4937+ struct kstat st;
4938+ unsigned int iflags; /* inode->i_flags */
4939+};
4940+
4941+static noinline_for_stack
4942+int cpup_iattr(struct dentry *dst, aufs_bindex_t bindex, struct path *h_src,
4943+ struct au_cpup_reg_attr *h_src_attr)
4944+{
4945+ int err, sbits, icex;
4946+ unsigned int mnt_flags;
4947+ unsigned char verbose;
4948+ struct iattr ia;
4949+ struct path h_path;
4950+ struct inode *h_isrc, *h_idst;
4951+ struct kstat *h_st;
4952+ struct au_branch *br;
4953+
4954+ br = au_sbr(dst->d_sb, bindex);
4955+ h_path.mnt = au_br_mnt(br);
4956+ h_path.dentry = au_h_dptr(dst, bindex);
4957+ h_idst = d_inode(h_path.dentry);
4958+ h_isrc = d_inode(h_src->dentry);
4959+ ia.ia_valid = ATTR_FORCE | ATTR_UID | ATTR_GID
4960+ | ATTR_ATIME | ATTR_MTIME
4961+ | ATTR_ATIME_SET | ATTR_MTIME_SET;
4962+ if (h_src_attr && h_src_attr->valid) {
4963+ h_st = &h_src_attr->st;
4964+ ia.ia_uid = h_st->uid;
4965+ ia.ia_gid = h_st->gid;
4966+ ia.ia_atime = h_st->atime;
4967+ ia.ia_mtime = h_st->mtime;
4968+ if (h_idst->i_mode != h_st->mode
4969+ && !S_ISLNK(h_idst->i_mode)) {
4970+ ia.ia_valid |= ATTR_MODE;
4971+ ia.ia_mode = h_st->mode;
4972+ }
4973+ sbits = !!(h_st->mode & (S_ISUID | S_ISGID));
4974+ au_cpup_attr_flags(h_idst, h_src_attr->iflags);
4975+ } else {
4976+ ia.ia_uid = h_isrc->i_uid;
4977+ ia.ia_gid = h_isrc->i_gid;
4978+ ia.ia_atime = h_isrc->i_atime;
4979+ ia.ia_mtime = h_isrc->i_mtime;
4980+ if (h_idst->i_mode != h_isrc->i_mode
4981+ && !S_ISLNK(h_idst->i_mode)) {
4982+ ia.ia_valid |= ATTR_MODE;
4983+ ia.ia_mode = h_isrc->i_mode;
4984+ }
4985+ sbits = !!(h_isrc->i_mode & (S_ISUID | S_ISGID));
4986+ au_cpup_attr_flags(h_idst, h_isrc->i_flags);
4987+ }
4988+ /* no delegation since it is just created */
4989+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
4990+
4991+ /* is this nfs only? */
4992+ if (!err && sbits && au_test_nfs(h_path.dentry->d_sb)) {
4993+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
4994+ ia.ia_mode = h_isrc->i_mode;
4995+ err = vfsub_notify_change(&h_path, &ia, /*delegated*/NULL);
4996+ }
4997+
4998+ icex = br->br_perm & AuBrAttr_ICEX;
4999+ if (!err) {
5000+ mnt_flags = au_mntflags(dst->d_sb);
5001+ verbose = !!au_opt_test(mnt_flags, VERBOSE);
5002+ err = au_cpup_xattr(&h_path, h_src, icex, verbose);
5003+ }
5004+
5005+ return err;
5006+}
5007+
5008+/* ---------------------------------------------------------------------- */
5009+
5010+static int au_do_copy_file(struct file *dst, struct file *src, loff_t len,
5011+ char *buf, unsigned long blksize)
5012+{
5013+ int err;
5014+ size_t sz, rbytes, wbytes;
5015+ unsigned char all_zero;
5016+ char *p, *zp;
5017+ struct inode *h_inode;
5018+ /* reduce stack usage */
5019+ struct iattr *ia;
5020+
5021+ zp = page_address(ZERO_PAGE(0));
5022+ if (unlikely(!zp))
5023+ return -ENOMEM; /* possible? */
5024+
5025+ err = 0;
5026+ all_zero = 0;
5027+ while (len) {
5028+ AuDbg("len %lld\n", len);
5029+ sz = blksize;
5030+ if (len < blksize)
5031+ sz = len;
5032+
5033+ rbytes = 0;
5034+ /* todo: signal_pending? */
5035+ while (!rbytes || err == -EAGAIN || err == -EINTR) {
5036+ rbytes = vfsub_read_k(src, buf, sz, &src->f_pos);
5037+ err = rbytes;
5038+ }
5039+ if (unlikely(err < 0))
5040+ break;
5041+
5042+ all_zero = 0;
5043+ if (len >= rbytes && rbytes == blksize)
5044+ all_zero = !memcmp(buf, zp, rbytes);
5045+ if (!all_zero) {
5046+ wbytes = rbytes;
5047+ p = buf;
5048+ while (wbytes) {
5049+ size_t b;
5050+
5051+ b = vfsub_write_k(dst, p, wbytes, &dst->f_pos);
5052+ err = b;
5053+ /* todo: signal_pending? */
5054+ if (unlikely(err == -EAGAIN || err == -EINTR))
5055+ continue;
5056+ if (unlikely(err < 0))
5057+ break;
5058+ wbytes -= b;
5059+ p += b;
5060+ }
5061+ if (unlikely(err < 0))
5062+ break;
5063+ } else {
5064+ loff_t res;
5065+
5066+ AuLabel(hole);
5067+ res = vfsub_llseek(dst, rbytes, SEEK_CUR);
5068+ err = res;
5069+ if (unlikely(res < 0))
5070+ break;
5071+ }
5072+ len -= rbytes;
5073+ err = 0;
5074+ }
5075+
5076+ /* the last block may be a hole */
5077+ if (!err && all_zero) {
5078+ AuLabel(last hole);
5079+
5080+ err = 1;
5081+ if (au_test_nfs(dst->f_path.dentry->d_sb)) {
5082+ /* nfs requires this step to make last hole */
5083+ /* is this only nfs? */
5084+ do {
5085+ /* todo: signal_pending? */
5086+ err = vfsub_write_k(dst, "\0", 1, &dst->f_pos);
5087+ } while (err == -EAGAIN || err == -EINTR);
5088+ if (err == 1)
5089+ dst->f_pos--;
5090+ }
5091+
5092+ if (err == 1) {
5093+ ia = (void *)buf;
5094+ ia->ia_size = dst->f_pos;
5095+ ia->ia_valid = ATTR_SIZE | ATTR_FILE;
5096+ ia->ia_file = dst;
5097+ h_inode = file_inode(dst);
5098+ inode_lock_nested(h_inode, AuLsc_I_CHILD2);
5099+ /* no delegation since it is just created */
5100+ err = vfsub_notify_change(&dst->f_path, ia,
5101+ /*delegated*/NULL);
5102+ inode_unlock(h_inode);
5103+ }
5104+ }
5105+
5106+ return err;
5107+}
5108+
5109+int au_copy_file(struct file *dst, struct file *src, loff_t len)
5110+{
5111+ int err;
5112+ unsigned long blksize;
5113+ unsigned char do_kfree;
5114+ char *buf;
5115+ struct super_block *h_sb;
5116+
5117+ err = -ENOMEM;
5118+ h_sb = file_inode(dst)->i_sb;
5119+ blksize = h_sb->s_blocksize;
5120+ if (!blksize || PAGE_SIZE < blksize)
5121+ blksize = PAGE_SIZE;
5122+ AuDbg("blksize %lu\n", blksize);
5123+ do_kfree = (blksize != PAGE_SIZE && blksize >= sizeof(struct iattr *));
5124+ if (do_kfree)
5125+ buf = kmalloc(blksize, GFP_NOFS);
5126+ else
5127+ buf = (void *)__get_free_page(GFP_NOFS);
5128+ if (unlikely(!buf))
5129+ goto out;
5130+
5131+ if (len > (1 << 22))
5132+ AuDbg("copying a large file %lld\n", (long long)len);
5133+
5134+ src->f_pos = 0;
5135+ dst->f_pos = 0;
5136+ err = au_do_copy_file(dst, src, len, buf, blksize);
5137+ if (do_kfree) {
5138+ AuDebugOn(!au_kfree_do_sz_test(blksize));
5139+ au_kfree_do_rcu(buf);
5140+ } else
5141+ free_page((unsigned long)buf);
5142+
5143+out:
5144+ return err;
5145+}
5146+
5147+static int au_do_copy(struct file *dst, struct file *src, loff_t len)
5148+{
5149+ int err;
5150+ struct super_block *h_src_sb;
5151+ struct inode *h_src_inode;
5152+
5153+ h_src_inode = file_inode(src);
5154+ h_src_sb = h_src_inode->i_sb;
5155+
5156+ /* XFS acquires inode_lock */
5157+ if (!au_test_xfs(h_src_sb))
5158+ err = au_copy_file(dst, src, len);
5159+ else {
5160+ inode_unlock_shared(h_src_inode);
5161+ err = au_copy_file(dst, src, len);
5162+ inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
5163+ }
5164+
5165+ return err;
5166+}
5167+
5168+static int au_clone_or_copy(struct file *dst, struct file *src, loff_t len)
5169+{
5170+ int err;
5171+ loff_t lo;
5172+ struct super_block *h_src_sb;
5173+ struct inode *h_src_inode;
5174+
5175+ h_src_inode = file_inode(src);
5176+ h_src_sb = h_src_inode->i_sb;
5177+ if (h_src_sb != file_inode(dst)->i_sb
5178+ || !dst->f_op->remap_file_range) {
5179+ err = au_do_copy(dst, src, len);
5180+ goto out;
5181+ }
5182+
5183+ if (!au_test_nfs(h_src_sb)) {
5184+ inode_unlock_shared(h_src_inode);
5185+ lo = vfsub_clone_file_range(src, dst, len);
5186+ inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
5187+ } else
5188+ lo = vfsub_clone_file_range(src, dst, len);
5189+ if (lo == len) {
5190+ err = 0;
5191+ goto out; /* success */
5192+ } else if (lo >= 0)
5193+ /* todo: possible? */
5194+ /* paritially succeeded */
5195+ AuDbg("lo %lld, len %lld. Retrying.\n", lo, len);
5196+ else if (lo != -EOPNOTSUPP) {
5197+ /* older XFS has a condition in cloning */
5198+ err = lo;
5199+ goto out;
5200+ }
5201+
5202+ /* the backend fs on NFS may not support cloning */
5203+ err = au_do_copy(dst, src, len);
5204+
5205+out:
5206+ AuTraceErr(err);
5207+ return err;
5208+}
5209+
5210+/*
5211+ * to support a sparse file which is opened with O_APPEND,
5212+ * we need to close the file.
5213+ */
5214+static int au_cp_regular(struct au_cp_generic *cpg)
5215+{
5216+ int err, i;
5217+ enum { SRC, DST };
5218+ struct {
5219+ aufs_bindex_t bindex;
5220+ unsigned int flags;
5221+ struct dentry *dentry;
5222+ int force_wr;
5223+ struct file *file;
5224+ } *f, file[] = {
5225+ {
5226+ .bindex = cpg->bsrc,
5227+ .flags = O_RDONLY | O_NOATIME | O_LARGEFILE,
5228+ },
5229+ {
5230+ .bindex = cpg->bdst,
5231+ .flags = O_WRONLY | O_NOATIME | O_LARGEFILE,
5232+ .force_wr = !!au_ftest_cpup(cpg->flags, RWDST),
5233+ }
5234+ };
5235+ struct au_branch *br;
5236+ struct super_block *sb, *h_src_sb;
5237+ struct inode *h_src_inode;
5238+ struct task_struct *tsk = current;
5239+
5240+ /* bsrc branch can be ro/rw. */
5241+ sb = cpg->dentry->d_sb;
5242+ f = file;
5243+ for (i = 0; i < 2; i++, f++) {
5244+ f->dentry = au_h_dptr(cpg->dentry, f->bindex);
5245+ f->file = au_h_open(cpg->dentry, f->bindex, f->flags,
5246+ /*file*/NULL, f->force_wr);
5247+ if (IS_ERR(f->file)) {
5248+ err = PTR_ERR(f->file);
5249+ if (i == SRC)
5250+ goto out;
5251+ else
5252+ goto out_src;
5253+ }
5254+ }
5255+
5256+ /* try stopping to update while we copyup */
5257+ h_src_inode = d_inode(file[SRC].dentry);
5258+ h_src_sb = h_src_inode->i_sb;
5259+ if (!au_test_nfs(h_src_sb))
5260+ IMustLock(h_src_inode);
5261+ err = au_clone_or_copy(file[DST].file, file[SRC].file, cpg->len);
5262+
5263+ /* i wonder if we had O_NO_DELAY_FPUT flag */
5264+ if (tsk->flags & PF_KTHREAD)
5265+ __fput_sync(file[DST].file);
5266+ else {
5267+ /* it happened actually */
5268+ fput(file[DST].file);
5269+ /*
5270+ * too bad.
5271+ * we have to call both since we don't know which place the file
5272+ * was added to.
5273+ */
5274+ task_work_run();
5275+ flush_delayed_fput();
5276+ }
5277+ br = au_sbr(sb, file[DST].bindex);
5278+ au_lcnt_dec(&br->br_nfiles);
5279+
5280+out_src:
5281+ fput(file[SRC].file);
5282+ br = au_sbr(sb, file[SRC].bindex);
5283+ au_lcnt_dec(&br->br_nfiles);
5284+out:
5285+ return err;
5286+}
5287+
5288+static int au_do_cpup_regular(struct au_cp_generic *cpg,
5289+ struct au_cpup_reg_attr *h_src_attr)
5290+{
5291+ int err, rerr;
5292+ loff_t l;
5293+ struct path h_path;
5294+ struct inode *h_src_inode, *h_dst_inode;
5295+
5296+ err = 0;
5297+ h_src_inode = au_h_iptr(d_inode(cpg->dentry), cpg->bsrc);
5298+ l = i_size_read(h_src_inode);
5299+ if (cpg->len == -1 || l < cpg->len)
5300+ cpg->len = l;
5301+ if (cpg->len) {
5302+ /* try stopping to update while we are referencing */
5303+ inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
5304+ au_pin_hdir_unlock(cpg->pin);
5305+
5306+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5307+ h_path.mnt = au_sbr_mnt(cpg->dentry->d_sb, cpg->bsrc);
5308+ h_src_attr->iflags = h_src_inode->i_flags;
5309+ if (!au_test_nfs(h_src_inode->i_sb))
5310+ err = vfsub_getattr(&h_path, &h_src_attr->st);
5311+ else {
5312+ inode_unlock_shared(h_src_inode);
5313+ err = vfsub_getattr(&h_path, &h_src_attr->st);
5314+ inode_lock_shared_nested(h_src_inode, AuLsc_I_CHILD);
5315+ }
5316+ if (unlikely(err)) {
5317+ inode_unlock_shared(h_src_inode);
5318+ goto out;
5319+ }
5320+ h_src_attr->valid = 1;
5321+ if (!au_test_nfs(h_src_inode->i_sb)) {
5322+ err = au_cp_regular(cpg);
5323+ inode_unlock_shared(h_src_inode);
5324+ } else {
5325+ inode_unlock_shared(h_src_inode);
5326+ err = au_cp_regular(cpg);
5327+ }
5328+ rerr = au_pin_hdir_relock(cpg->pin);
5329+ if (!err && rerr)
5330+ err = rerr;
5331+ }
5332+ if (!err && (h_src_inode->i_state & I_LINKABLE)) {
5333+ h_path.dentry = au_h_dptr(cpg->dentry, cpg->bdst);
5334+ h_dst_inode = d_inode(h_path.dentry);
5335+ spin_lock(&h_dst_inode->i_lock);
5336+ h_dst_inode->i_state |= I_LINKABLE;
5337+ spin_unlock(&h_dst_inode->i_lock);
5338+ }
5339+
5340+out:
5341+ return err;
5342+}
5343+
5344+static int au_do_cpup_symlink(struct path *h_path, struct dentry *h_src,
5345+ struct inode *h_dir)
5346+{
5347+ int err;
5348+ DEFINE_DELAYED_CALL(done);
5349+ const char *sym;
5350+
5351+ sym = vfs_get_link(h_src, &done);
5352+ err = PTR_ERR(sym);
5353+ if (IS_ERR(sym))
5354+ goto out;
5355+
5356+ err = vfsub_symlink(h_dir, h_path, sym);
5357+
5358+out:
5359+ do_delayed_call(&done);
5360+ return err;
5361+}
5362+
5363+/*
5364+ * regardless 'acl' option, reset all ACL.
5365+ * All ACL will be copied up later from the original entry on the lower branch.
5366+ */
5367+static int au_reset_acl(struct path *h_path, umode_t mode)
5368+{
5369+ int err;
5370+ struct dentry *h_dentry;
5371+ /* struct inode *h_inode; */
5372+ struct user_namespace *h_userns;
5373+
5374+ h_userns = mnt_user_ns(h_path->mnt);
5375+ h_dentry = h_path->dentry;
5376+ /* h_inode = d_inode(h_dentry); */
5377+ /* forget_all_cached_acls(h_inode)); */
5378+ err = vfsub_remove_acl(h_userns, h_dentry, XATTR_NAME_POSIX_ACL_ACCESS);
5379+ AuTraceErr(err);
5380+ if (err == -EOPNOTSUPP)
5381+ err = 0;
5382+ if (!err)
5383+ err = vfsub_acl_chmod(h_userns, h_dentry, mode);
5384+
5385+ AuTraceErr(err);
5386+ return err;
5387+}
5388+
5389+static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
5390+ struct inode *h_dir, struct path *h_path)
5391+{
5392+ int err;
5393+ struct inode *dir, *inode;
5394+ struct user_namespace *h_userns;
5395+
5396+ h_userns = mnt_user_ns(h_path->mnt);
5397+ err = vfsub_remove_acl(h_userns, h_path->dentry,
5398+ XATTR_NAME_POSIX_ACL_DEFAULT);
5399+ AuTraceErr(err);
5400+ if (err == -EOPNOTSUPP)
5401+ err = 0;
5402+ if (unlikely(err))
5403+ goto out;
5404+
5405+ /*
5406+ * strange behaviour from the users view,
5407+ * particularly setattr case
5408+ */
5409+ dir = d_inode(dst_parent);
5410+ if (au_ibtop(dir) == cpg->bdst)
5411+ au_cpup_attr_nlink(dir, /*force*/1);
5412+ inode = d_inode(cpg->dentry);
5413+ au_cpup_attr_nlink(inode, /*force*/1);
5414+
5415+out:
5416+ return err;
5417+}
5418+
5419+static noinline_for_stack
5420+int cpup_entry(struct au_cp_generic *cpg, struct dentry *dst_parent,
5421+ struct au_cpup_reg_attr *h_src_attr)
5422+{
5423+ int err;
5424+ umode_t mode;
5425+ unsigned int mnt_flags;
5426+ unsigned char isdir, isreg, force;
5427+ const unsigned char do_dt = !!au_ftest_cpup(cpg->flags, DTIME);
5428+ struct au_dtime dt;
5429+ struct path h_path;
5430+ struct dentry *h_src, *h_dst, *h_parent;
5431+ struct inode *h_inode, *h_dir;
5432+ struct super_block *sb;
5433+
5434+ /* bsrc branch can be ro/rw. */
5435+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5436+ h_inode = d_inode(h_src);
5437+ AuDebugOn(h_inode != au_h_iptr(d_inode(cpg->dentry), cpg->bsrc));
5438+
5439+ /* try stopping to be referenced while we are creating */
5440+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5441+ if (au_ftest_cpup(cpg->flags, RENAME))
5442+ AuDebugOn(strncmp(h_dst->d_name.name, AUFS_WH_PFX,
5443+ AUFS_WH_PFX_LEN));
5444+ h_parent = h_dst->d_parent; /* dir inode is locked */
5445+ h_dir = d_inode(h_parent);
5446+ IMustLock(h_dir);
5447+ AuDebugOn(h_parent != h_dst->d_parent);
5448+
5449+ sb = cpg->dentry->d_sb;
5450+ h_path.mnt = au_sbr_mnt(sb, cpg->bdst);
5451+ if (do_dt) {
5452+ h_path.dentry = h_parent;
5453+ au_dtime_store(&dt, dst_parent, &h_path);
5454+ }
5455+ h_path.dentry = h_dst;
5456+
5457+ isreg = 0;
5458+ isdir = 0;
5459+ mode = h_inode->i_mode;
5460+ switch (mode & S_IFMT) {
5461+ case S_IFREG:
5462+ isreg = 1;
5463+ err = vfsub_create(h_dir, &h_path, 0600, /*want_excl*/true);
5464+ if (!err)
5465+ err = au_do_cpup_regular(cpg, h_src_attr);
5466+ break;
5467+ case S_IFDIR:
5468+ isdir = 1;
5469+ err = vfsub_mkdir(h_dir, &h_path, mode);
5470+ if (!err)
5471+ err = au_do_cpup_dir(cpg, dst_parent, h_dir, &h_path);
5472+ break;
5473+ case S_IFLNK:
5474+ err = au_do_cpup_symlink(&h_path, h_src, h_dir);
5475+ break;
5476+ case S_IFCHR:
5477+ case S_IFBLK:
5478+ AuDebugOn(!capable(CAP_MKNOD));
5479+ fallthrough;
5480+ case S_IFIFO:
5481+ case S_IFSOCK:
5482+ err = vfsub_mknod(h_dir, &h_path, mode, h_inode->i_rdev);
5483+ break;
5484+ default:
5485+ AuIOErr("Unknown inode type 0%o\n", mode);
5486+ err = -EIO;
5487+ }
5488+ if (!err)
5489+ err = au_reset_acl(&h_path, mode);
5490+
5491+ mnt_flags = au_mntflags(sb);
5492+ if (!au_opt_test(mnt_flags, UDBA_NONE)
5493+ && !isdir
5494+ && au_opt_test(mnt_flags, XINO)
5495+ && (h_inode->i_nlink == 1
5496+ || (h_inode->i_state & I_LINKABLE))
5497+ /* todo: unnecessary? */
5498+ /* && d_inode(cpg->dentry)->i_nlink == 1 */
5499+ && cpg->bdst < cpg->bsrc
5500+ && !au_ftest_cpup(cpg->flags, KEEPLINO))
5501+ au_xino_write(sb, cpg->bsrc, h_inode->i_ino, /*ino*/0);
5502+ /* ignore this error */
5503+
5504+ if (!err) {
5505+ force = 0;
5506+ if (isreg) {
5507+ force = !!cpg->len;
5508+ if (cpg->len == -1)
5509+ force = !!i_size_read(h_inode);
5510+ }
5511+ au_fhsm_wrote(sb, cpg->bdst, force);
5512+ }
5513+
5514+ if (do_dt)
5515+ au_dtime_revert(&dt);
5516+ return err;
5517+}
5518+
5519+static int au_do_ren_after_cpup(struct au_cp_generic *cpg, struct path *h_path)
5520+{
5521+ int err;
5522+ struct dentry *dentry, *h_dentry, *h_parent, *parent;
5523+ struct path h_ppath;
5524+ struct inode *h_dir;
5525+ aufs_bindex_t bdst;
5526+
5527+ dentry = cpg->dentry;
5528+ bdst = cpg->bdst;
5529+ h_ppath.mnt = au_sbr_mnt(dentry->d_sb, bdst);
5530+ h_dentry = au_h_dptr(dentry, bdst);
5531+ if (!au_ftest_cpup(cpg->flags, OVERWRITE)) {
5532+ dget(h_dentry);
5533+ au_set_h_dptr(dentry, bdst, NULL);
5534+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
5535+ if (!err)
5536+ h_path->dentry = dget(au_h_dptr(dentry, bdst));
5537+ au_set_h_dptr(dentry, bdst, h_dentry);
5538+ } else {
5539+ err = 0;
5540+ parent = dget_parent(dentry);
5541+ h_ppath.dentry = au_h_dptr(parent, bdst);
5542+ dput(parent);
5543+ h_path->dentry = vfsub_lkup_one(&dentry->d_name, &h_ppath);
5544+ if (IS_ERR(h_path->dentry))
5545+ err = PTR_ERR(h_path->dentry);
5546+ }
5547+ if (unlikely(err))
5548+ goto out;
5549+
5550+ h_parent = h_dentry->d_parent; /* dir inode is locked */
5551+ h_dir = d_inode(h_parent);
5552+ IMustLock(h_dir);
5553+ AuDbg("%pd %pd\n", h_dentry, h_path->dentry);
5554+ /* no delegation since it is just created */
5555+ err = vfsub_rename(h_dir, h_dentry, h_dir, h_path, /*delegated*/NULL,
5556+ /*flags*/0);
5557+ dput(h_path->dentry);
5558+
5559+out:
5560+ return err;
5561+}
5562+
5563+/*
5564+ * copyup the @dentry from @bsrc to @bdst.
5565+ * the caller must set the both of lower dentries.
5566+ * @len is for truncating when it is -1 copyup the entire file.
5567+ * in link/rename cases, @dst_parent may be different from the real one.
5568+ * basic->bsrc can be larger than basic->bdst.
5569+ * aufs doesn't touch the credential so
5570+ * security_inode_copy_up{,_xattr}() are unnecessary.
5571+ */
5572+static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
5573+{
5574+ int err, rerr;
5575+ aufs_bindex_t old_ibtop;
5576+ unsigned char isdir, plink;
5577+ struct dentry *h_src, *h_dst, *h_parent;
5578+ struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
5579+ struct super_block *sb;
5580+ struct au_branch *br;
5581+ struct path h_src_path;
5582+ /* to reduce stack size */
5583+ struct {
5584+ struct au_dtime dt;
5585+ struct path h_path;
5586+ struct au_cpup_reg_attr h_src_attr;
5587+ } *a;
5588+
5589+ err = -ENOMEM;
5590+ a = kmalloc(sizeof(*a), GFP_NOFS);
5591+ if (unlikely(!a))
5592+ goto out;
5593+ a->h_src_attr.valid = 0;
5594+
5595+ sb = cpg->dentry->d_sb;
5596+ br = au_sbr(sb, cpg->bdst);
5597+ a->h_path.mnt = au_br_mnt(br);
5598+ h_dst = au_h_dptr(cpg->dentry, cpg->bdst);
5599+ h_parent = h_dst->d_parent; /* dir inode is locked */
5600+ h_dir = d_inode(h_parent);
5601+ IMustLock(h_dir);
5602+
5603+ h_src = au_h_dptr(cpg->dentry, cpg->bsrc);
5604+ inode = d_inode(cpg->dentry);
5605+
5606+ if (!dst_parent)
5607+ dst_parent = dget_parent(cpg->dentry);
5608+ else
5609+ dget(dst_parent);
5610+
5611+ plink = !!au_opt_test(au_mntflags(sb), PLINK);
5612+ dst_inode = au_h_iptr(inode, cpg->bdst);
5613+ if (dst_inode) {
5614+ if (unlikely(!plink)) {
5615+ err = -EIO;
5616+ AuIOErr("hi%lu(i%lu) exists on b%d "
5617+ "but plink is disabled\n",
5618+ dst_inode->i_ino, inode->i_ino, cpg->bdst);
5619+ goto out_parent;
5620+ }
5621+
5622+ if (dst_inode->i_nlink) {
5623+ const int do_dt = au_ftest_cpup(cpg->flags, DTIME);
5624+
5625+ h_src = au_plink_lkup(inode, cpg->bdst);
5626+ err = PTR_ERR(h_src);
5627+ if (IS_ERR(h_src))
5628+ goto out_parent;
5629+ if (unlikely(d_is_negative(h_src))) {
5630+ err = -EIO;
5631+ AuIOErr("i%lu exists on b%d "
5632+ "but not pseudo-linked\n",
5633+ inode->i_ino, cpg->bdst);
5634+ dput(h_src);
5635+ goto out_parent;
5636+ }
5637+
5638+ if (do_dt) {
5639+ a->h_path.dentry = h_parent;
5640+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5641+ }
5642+
5643+ a->h_path.dentry = h_dst;
5644+ delegated = NULL;
5645+ err = vfsub_link(h_src, h_dir, &a->h_path, &delegated);
5646+ if (!err && au_ftest_cpup(cpg->flags, RENAME))
5647+ err = au_do_ren_after_cpup(cpg, &a->h_path);
5648+ if (do_dt)
5649+ au_dtime_revert(&a->dt);
5650+ if (unlikely(err == -EWOULDBLOCK)) {
5651+ pr_warn("cannot retry for NFSv4 delegation"
5652+ " for an internal link\n");
5653+ iput(delegated);
5654+ }
5655+ dput(h_src);
5656+ goto out_parent;
5657+ } else
5658+ /* todo: cpup_wh_file? */
5659+ /* udba work */
5660+ au_update_ibrange(inode, /*do_put_zero*/1);
5661+ }
5662+
5663+ isdir = S_ISDIR(inode->i_mode);
5664+ old_ibtop = au_ibtop(inode);
5665+ err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
5666+ if (unlikely(err))
5667+ goto out_rev;
5668+ dst_inode = d_inode(h_dst);
5669+ inode_lock_nested(dst_inode, AuLsc_I_CHILD2);
5670+ /* todo: necessary? */
5671+ /* au_pin_hdir_unlock(cpg->pin); */
5672+
5673+ h_src_path.dentry = h_src;
5674+ h_src_path.mnt = au_sbr_mnt(sb, cpg->bsrc);
5675+ err = cpup_iattr(cpg->dentry, cpg->bdst, &h_src_path, &a->h_src_attr);
5676+ if (unlikely(err)) {
5677+ /* todo: necessary? */
5678+ /* au_pin_hdir_relock(cpg->pin); */ /* ignore an error */
5679+ inode_unlock(dst_inode);
5680+ goto out_rev;
5681+ }
5682+
5683+ if (cpg->bdst < old_ibtop) {
5684+ if (S_ISREG(inode->i_mode)) {
5685+ err = au_dy_iaop(inode, cpg->bdst, dst_inode);
5686+ if (unlikely(err)) {
5687+ /* ignore an error */
5688+ /* au_pin_hdir_relock(cpg->pin); */
5689+ inode_unlock(dst_inode);
5690+ goto out_rev;
5691+ }
5692+ }
5693+ au_set_ibtop(inode, cpg->bdst);
5694+ } else
5695+ au_set_ibbot(inode, cpg->bdst);
5696+ au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
5697+ au_hi_flags(inode, isdir));
5698+
5699+ /* todo: necessary? */
5700+ /* err = au_pin_hdir_relock(cpg->pin); */
5701+ inode_unlock(dst_inode);
5702+ if (unlikely(err))
5703+ goto out_rev;
5704+
5705+ src_inode = d_inode(h_src);
5706+ if (!isdir
5707+ && (src_inode->i_nlink > 1
5708+ || src_inode->i_state & I_LINKABLE)
5709+ && plink)
5710+ au_plink_append(inode, cpg->bdst, h_dst);
5711+
5712+ if (au_ftest_cpup(cpg->flags, RENAME)) {
5713+ a->h_path.dentry = h_dst;
5714+ err = au_do_ren_after_cpup(cpg, &a->h_path);
5715+ }
5716+ if (!err)
5717+ goto out_parent; /* success */
5718+
5719+ /* revert */
5720+out_rev:
5721+ a->h_path.dentry = h_parent;
5722+ au_dtime_store(&a->dt, dst_parent, &a->h_path);
5723+ a->h_path.dentry = h_dst;
5724+ rerr = 0;
5725+ if (d_is_positive(h_dst)) {
5726+ if (!isdir) {
5727+ /* no delegation since it is just created */
5728+ rerr = vfsub_unlink(h_dir, &a->h_path,
5729+ /*delegated*/NULL, /*force*/0);
5730+ } else
5731+ rerr = vfsub_rmdir(h_dir, &a->h_path);
5732+ }
5733+ au_dtime_revert(&a->dt);
5734+ if (rerr) {
5735+ AuIOErr("failed removing broken entry(%d, %d)\n", err, rerr);
5736+ err = -EIO;
5737+ }
5738+out_parent:
5739+ dput(dst_parent);
5740+ au_kfree_rcu(a);
5741+out:
5742+ return err;
5743+}
5744+
5745+#if 0 /* reserved */
5746+struct au_cpup_single_args {
5747+ int *errp;
5748+ struct au_cp_generic *cpg;
5749+ struct dentry *dst_parent;
5750+};
5751+
5752+static void au_call_cpup_single(void *args)
5753+{
5754+ struct au_cpup_single_args *a = args;
5755+
5756+ au_pin_hdir_acquire_nest(a->cpg->pin);
5757+ *a->errp = au_cpup_single(a->cpg, a->dst_parent);
5758+ au_pin_hdir_release(a->cpg->pin);
5759+}
5760+#endif
5761+
5762+/*
5763+ * prevent SIGXFSZ in copy-up.
5764+ * testing CAP_MKNOD is for generic fs,
5765+ * but CAP_FSETID is for xfs only, currently.
5766+ */
5767+static int au_cpup_sio_test(struct au_pin *pin, umode_t mode)
5768+{
5769+ int do_sio;
5770+ struct super_block *sb;
5771+ struct inode *h_dir;
5772+
5773+ do_sio = 0;
5774+ sb = au_pinned_parent(pin)->d_sb;
5775+ if (!au_wkq_test()
5776+ && (!au_sbi(sb)->si_plink_maint_pid
5777+ || au_plink_maint(sb, AuLock_NOPLM))) {
5778+ switch (mode & S_IFMT) {
5779+ case S_IFREG:
5780+ /* no condition about RLIMIT_FSIZE and the file size */
5781+ do_sio = 1;
5782+ break;
5783+ case S_IFCHR:
5784+ case S_IFBLK:
5785+ do_sio = !capable(CAP_MKNOD);
5786+ break;
5787+ }
5788+ if (!do_sio)
5789+ do_sio = ((mode & (S_ISUID | S_ISGID))
5790+ && !capable(CAP_FSETID));
5791+ /* this workaround may be removed in the future */
5792+ if (!do_sio) {
5793+ h_dir = au_pinned_h_dir(pin);
5794+ do_sio = h_dir->i_mode & S_ISVTX;
5795+ }
5796+ }
5797+
5798+ return do_sio;
5799+}
5800+
5801+#if 0 /* reserved */
5802+int au_sio_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
5803+{
5804+ int err, wkq_err;
5805+ struct dentry *h_dentry;
5806+
5807+ h_dentry = au_h_dptr(cpg->dentry, cpg->bsrc);
5808+ if (!au_cpup_sio_test(pin, d_inode(h_dentry)->i_mode))
5809+ err = au_cpup_single(cpg, dst_parent);
5810+ else {
5811+ struct au_cpup_single_args args = {
5812+ .errp = &err,
5813+ .cpg = cpg,
5814+ .dst_parent = dst_parent
5815+ };
5816+ wkq_err = au_wkq_wait(au_call_cpup_single, &args);
5817+ if (unlikely(wkq_err))
5818+ err = wkq_err;
5819+ }
5820+
5821+ return err;
5822+}
5823+#endif
5824+
5825+/*
5826+ * copyup the @dentry from the first active lower branch to @bdst,
5827+ * using au_cpup_single().
5828+ */
5829+static int au_cpup_simple(struct au_cp_generic *cpg)
5830+{
5831+ int err;
5832+ unsigned int flags_orig;
5833+ struct dentry *dentry;
5834+
5835+ AuDebugOn(cpg->bsrc < 0);
5836+
5837+ dentry = cpg->dentry;
5838+ DiMustWriteLock(dentry);
5839+
5840+ err = au_lkup_neg(dentry, cpg->bdst, /*wh*/1);
5841+ if (!err) {
5842+ flags_orig = cpg->flags;
5843+ au_fset_cpup(cpg->flags, RENAME);
5844+ err = au_cpup_single(cpg, NULL);
5845+ cpg->flags = flags_orig;
5846+ if (!err)
5847+ return 0; /* success */
5848+
5849+ /* revert */
5850+ au_set_h_dptr(dentry, cpg->bdst, NULL);
5851+ au_set_dbtop(dentry, cpg->bsrc);
5852+ }
5853+
5854+ return err;
5855+}
5856+
5857+struct au_cpup_simple_args {
5858+ int *errp;
5859+ struct au_cp_generic *cpg;
5860+};
5861+
5862+static void au_call_cpup_simple(void *args)
5863+{
5864+ struct au_cpup_simple_args *a = args;
5865+
5866+ au_pin_hdir_acquire_nest(a->cpg->pin);
5867+ *a->errp = au_cpup_simple(a->cpg);
5868+ au_pin_hdir_release(a->cpg->pin);
5869+}
5870+
5871+static int au_do_sio_cpup_simple(struct au_cp_generic *cpg)
5872+{
5873+ int err, wkq_err;
5874+ struct dentry *dentry, *parent;
5875+ struct file *h_file;
5876+ struct inode *h_dir;
5877+ struct user_namespace *h_userns;
5878+
5879+ dentry = cpg->dentry;
5880+ h_file = NULL;
5881+ if (au_ftest_cpup(cpg->flags, HOPEN)) {
5882+ AuDebugOn(cpg->bsrc < 0);
5883+ h_file = au_h_open_pre(dentry, cpg->bsrc, /*force_wr*/0);
5884+ err = PTR_ERR(h_file);
5885+ if (IS_ERR(h_file))
5886+ goto out;
5887+ }
5888+
5889+ parent = dget_parent(dentry);
5890+ h_dir = au_h_iptr(d_inode(parent), cpg->bdst);
5891+ h_userns = au_sbr_userns(dentry->d_sb, cpg->bdst);
5892+ if (!au_test_h_perm_sio(h_userns, h_dir, MAY_EXEC | MAY_WRITE)
5893+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
5894+ err = au_cpup_simple(cpg);
5895+ else {
5896+ struct au_cpup_simple_args args = {
5897+ .errp = &err,
5898+ .cpg = cpg
5899+ };
5900+ wkq_err = au_wkq_wait(au_call_cpup_simple, &args);
5901+ if (unlikely(wkq_err))
5902+ err = wkq_err;
5903+ }
5904+
5905+ dput(parent);
5906+ if (h_file)
5907+ au_h_open_post(dentry, cpg->bsrc, h_file);
5908+
5909+out:
5910+ return err;
5911+}
5912+
5913+int au_sio_cpup_simple(struct au_cp_generic *cpg)
5914+{
5915+ aufs_bindex_t bsrc, bbot;
5916+ struct dentry *dentry, *h_dentry;
5917+
5918+ if (cpg->bsrc < 0) {
5919+ dentry = cpg->dentry;
5920+ bbot = au_dbbot(dentry);
5921+ for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
5922+ h_dentry = au_h_dptr(dentry, bsrc);
5923+ if (h_dentry) {
5924+ AuDebugOn(d_is_negative(h_dentry));
5925+ break;
5926+ }
5927+ }
5928+ AuDebugOn(bsrc > bbot);
5929+ cpg->bsrc = bsrc;
5930+ }
5931+ AuDebugOn(cpg->bsrc <= cpg->bdst);
5932+ return au_do_sio_cpup_simple(cpg);
5933+}
5934+
5935+int au_sio_cpdown_simple(struct au_cp_generic *cpg)
5936+{
5937+ AuDebugOn(cpg->bdst <= cpg->bsrc);
5938+ return au_do_sio_cpup_simple(cpg);
5939+}
5940+
5941+/* ---------------------------------------------------------------------- */
5942+
5943+/*
5944+ * copyup the deleted file for writing.
5945+ */
5946+static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
5947+ struct file *file)
5948+{
5949+ int err;
5950+ unsigned int flags_orig;
5951+ aufs_bindex_t bsrc_orig;
5952+ struct au_dinfo *dinfo;
5953+ struct {
5954+ struct au_hdentry *hd;
5955+ struct dentry *h_dentry;
5956+ } hdst, hsrc;
5957+
5958+ dinfo = au_di(cpg->dentry);
5959+ AuRwMustWriteLock(&dinfo->di_rwsem);
5960+
5961+ bsrc_orig = cpg->bsrc;
5962+ cpg->bsrc = dinfo->di_btop;
5963+ hdst.hd = au_hdentry(dinfo, cpg->bdst);
5964+ hdst.h_dentry = hdst.hd->hd_dentry;
5965+ hdst.hd->hd_dentry = wh_dentry;
5966+ dinfo->di_btop = cpg->bdst;
5967+
5968+ hsrc.h_dentry = NULL;
5969+ if (file) {
5970+ hsrc.hd = au_hdentry(dinfo, cpg->bsrc);
5971+ hsrc.h_dentry = hsrc.hd->hd_dentry;
5972+ hsrc.hd->hd_dentry = au_hf_top(file)->f_path.dentry;
5973+ }
5974+ flags_orig = cpg->flags;
5975+ cpg->flags = !AuCpup_DTIME;
5976+ err = au_cpup_single(cpg, /*h_parent*/NULL);
5977+ cpg->flags = flags_orig;
5978+ if (file) {
5979+ if (!err)
5980+ err = au_reopen_nondir(file);
5981+ hsrc.hd->hd_dentry = hsrc.h_dentry;
5982+ }
5983+ hdst.hd->hd_dentry = hdst.h_dentry;
5984+ dinfo->di_btop = cpg->bsrc;
5985+ cpg->bsrc = bsrc_orig;
5986+
5987+ return err;
5988+}
5989+
5990+static int au_cpup_wh(struct au_cp_generic *cpg, struct file *file)
5991+{
5992+ int err;
5993+ aufs_bindex_t bdst;
5994+ struct au_dtime dt;
5995+ struct dentry *dentry, *parent, *h_parent, *wh_dentry;
5996+ struct au_branch *br;
5997+ struct path h_path;
5998+
5999+ dentry = cpg->dentry;
6000+ bdst = cpg->bdst;
6001+ br = au_sbr(dentry->d_sb, bdst);
6002+ parent = dget_parent(dentry);
6003+ h_parent = au_h_dptr(parent, bdst);
6004+ wh_dentry = au_whtmp_lkup(h_parent, br, &dentry->d_name);
6005+ err = PTR_ERR(wh_dentry);
6006+ if (IS_ERR(wh_dentry))
6007+ goto out;
6008+
6009+ h_path.dentry = h_parent;
6010+ h_path.mnt = au_br_mnt(br);
6011+ au_dtime_store(&dt, parent, &h_path);
6012+ err = au_do_cpup_wh(cpg, wh_dentry, file);
6013+ if (unlikely(err))
6014+ goto out_wh;
6015+
6016+ dget(wh_dentry);
6017+ h_path.dentry = wh_dentry;
6018+ if (!d_is_dir(wh_dentry)) {
6019+ /* no delegation since it is just created */
6020+ err = vfsub_unlink(d_inode(h_parent), &h_path,
6021+ /*delegated*/NULL, /*force*/0);
6022+ } else
6023+ err = vfsub_rmdir(d_inode(h_parent), &h_path);
6024+ if (unlikely(err)) {
6025+ AuIOErr("failed remove copied-up tmp file %pd(%d)\n",
6026+ wh_dentry, err);
6027+ err = -EIO;
6028+ }
6029+ au_dtime_revert(&dt);
6030+ au_set_hi_wh(d_inode(dentry), bdst, wh_dentry);
6031+
6032+out_wh:
6033+ dput(wh_dentry);
6034+out:
6035+ dput(parent);
6036+ return err;
6037+}
6038+
6039+struct au_cpup_wh_args {
6040+ int *errp;
6041+ struct au_cp_generic *cpg;
6042+ struct file *file;
6043+};
6044+
6045+static void au_call_cpup_wh(void *args)
6046+{
6047+ struct au_cpup_wh_args *a = args;
6048+
6049+ au_pin_hdir_acquire_nest(a->cpg->pin);
6050+ *a->errp = au_cpup_wh(a->cpg, a->file);
6051+ au_pin_hdir_release(a->cpg->pin);
6052+}
6053+
6054+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file)
6055+{
6056+ int err, wkq_err;
6057+ aufs_bindex_t bdst;
6058+ struct dentry *dentry, *parent, *h_orph, *h_parent;
6059+ struct inode *dir, *h_dir, *h_tmpdir;
6060+ struct au_wbr *wbr;
6061+ struct au_pin wh_pin, *pin_orig;
6062+ struct user_namespace *h_userns;
6063+
6064+ dentry = cpg->dentry;
6065+ bdst = cpg->bdst;
6066+ parent = dget_parent(dentry);
6067+ dir = d_inode(parent);
6068+ h_orph = NULL;
6069+ h_parent = NULL;
6070+ h_dir = au_igrab(au_h_iptr(dir, bdst));
6071+ h_tmpdir = h_dir;
6072+ pin_orig = NULL;
6073+ if (!h_dir->i_nlink) {
6074+ wbr = au_sbr(dentry->d_sb, bdst)->br_wbr;
6075+ h_orph = wbr->wbr_orph;
6076+
6077+ h_parent = dget(au_h_dptr(parent, bdst));
6078+ au_set_h_dptr(parent, bdst, dget(h_orph));
6079+ h_tmpdir = d_inode(h_orph);
6080+ au_set_h_iptr(dir, bdst, au_igrab(h_tmpdir), /*flags*/0);
6081+
6082+ inode_lock_nested(h_tmpdir, AuLsc_I_PARENT3);
6083+ /* todo: au_h_open_pre()? */
6084+
6085+ pin_orig = cpg->pin;
6086+ au_pin_init(&wh_pin, dentry, bdst, AuLsc_DI_PARENT,
6087+ AuLsc_I_PARENT3, cpg->pin->udba, AuPin_DI_LOCKED);
6088+ cpg->pin = &wh_pin;
6089+ }
6090+
6091+ h_userns = au_sbr_userns(dentry->d_sb, bdst);
6092+ if (!au_test_h_perm_sio(h_userns, h_tmpdir, MAY_EXEC | MAY_WRITE)
6093+ && !au_cpup_sio_test(cpg->pin, d_inode(dentry)->i_mode))
6094+ err = au_cpup_wh(cpg, file);
6095+ else {
6096+ struct au_cpup_wh_args args = {
6097+ .errp = &err,
6098+ .cpg = cpg,
6099+ .file = file
6100+ };
6101+ wkq_err = au_wkq_wait(au_call_cpup_wh, &args);
6102+ if (unlikely(wkq_err))
6103+ err = wkq_err;
6104+ }
6105+
6106+ if (h_orph) {
6107+ inode_unlock(h_tmpdir);
6108+ /* todo: au_h_open_post()? */
6109+ au_set_h_iptr(dir, bdst, au_igrab(h_dir), /*flags*/0);
6110+ au_set_h_dptr(parent, bdst, h_parent);
6111+ AuDebugOn(!pin_orig);
6112+ cpg->pin = pin_orig;
6113+ }
6114+ iput(h_dir);
6115+ dput(parent);
6116+
6117+ return err;
6118+}
6119+
6120+/* ---------------------------------------------------------------------- */
6121+
6122+/*
6123+ * generic routine for both of copy-up and copy-down.
6124+ */
6125+/* cf. revalidate function in file.c */
6126+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
6127+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
6128+ struct au_pin *pin,
6129+ struct dentry *h_parent, void *arg),
6130+ void *arg)
6131+{
6132+ int err;
6133+ struct au_pin pin;
6134+ struct dentry *d, *parent, *h_parent, *real_parent, *h_dentry;
6135+
6136+ err = 0;
6137+ parent = dget_parent(dentry);
6138+ if (IS_ROOT(parent))
6139+ goto out;
6140+
6141+ au_pin_init(&pin, dentry, bdst, AuLsc_DI_PARENT2, AuLsc_I_PARENT2,
6142+ au_opt_udba(dentry->d_sb), AuPin_MNT_WRITE);
6143+
6144+ /* do not use au_dpage */
6145+ real_parent = parent;
6146+ while (1) {
6147+ dput(parent);
6148+ parent = dget_parent(dentry);
6149+ h_parent = au_h_dptr(parent, bdst);
6150+ if (h_parent)
6151+ goto out; /* success */
6152+
6153+ /* find top dir which is necessary to cpup */
6154+ do {
6155+ d = parent;
6156+ dput(parent);
6157+ parent = dget_parent(d);
6158+ di_read_lock_parent3(parent, !AuLock_IR);
6159+ h_parent = au_h_dptr(parent, bdst);
6160+ di_read_unlock(parent, !AuLock_IR);
6161+ } while (!h_parent);
6162+
6163+ if (d != real_parent)
6164+ di_write_lock_child3(d);
6165+
6166+ /* somebody else might create while we were sleeping */
6167+ h_dentry = au_h_dptr(d, bdst);
6168+ if (!h_dentry || d_is_negative(h_dentry)) {
6169+ if (h_dentry)
6170+ au_update_dbtop(d);
6171+
6172+ au_pin_set_dentry(&pin, d);
6173+ err = au_do_pin(&pin);
6174+ if (!err) {
6175+ err = cp(d, bdst, &pin, h_parent, arg);
6176+ au_unpin(&pin);
6177+ }
6178+ }
6179+
6180+ if (d != real_parent)
6181+ di_write_unlock(d);
6182+ if (unlikely(err))
6183+ break;
6184+ }
6185+
6186+out:
6187+ dput(parent);
6188+ return err;
6189+}
6190+
6191+static int au_cpup_dir(struct dentry *dentry, aufs_bindex_t bdst,
6192+ struct au_pin *pin,
6193+ struct dentry *h_parent __maybe_unused,
6194+ void *arg __maybe_unused)
6195+{
6196+ struct au_cp_generic cpg = {
6197+ .dentry = dentry,
6198+ .bdst = bdst,
6199+ .bsrc = -1,
6200+ .len = 0,
6201+ .pin = pin,
6202+ .flags = AuCpup_DTIME
6203+ };
6204+ return au_sio_cpup_simple(&cpg);
6205+}
6206+
6207+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6208+{
6209+ return au_cp_dirs(dentry, bdst, au_cpup_dir, NULL);
6210+}
6211+
6212+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst)
6213+{
6214+ int err;
6215+ struct dentry *parent;
6216+ struct inode *dir;
6217+
6218+ parent = dget_parent(dentry);
6219+ dir = d_inode(parent);
6220+ err = 0;
6221+ if (au_h_iptr(dir, bdst))
6222+ goto out;
6223+
6224+ di_read_unlock(parent, AuLock_IR);
6225+ di_write_lock_parent(parent);
6226+ /* someone else might change our inode while we were sleeping */
6227+ if (!au_h_iptr(dir, bdst))
6228+ err = au_cpup_dirs(dentry, bdst);
6229+ di_downgrade_lock(parent, AuLock_IR);
6230+
6231+out:
6232+ dput(parent);
6233+ return err;
6234+}
6235diff -urN /usr/share/empty/fs/aufs/cpup.h linux/fs/aufs/cpup.h
6236--- /usr/share/empty/fs/aufs/cpup.h 1970-01-01 01:00:00.000000000 +0100
6237+++ linux/fs/aufs/cpup.h 2022-11-05 23:02:18.962555950 +0100
6238@@ -0,0 +1,100 @@
6239+/* SPDX-License-Identifier: GPL-2.0 */
6240+/*
6241+ * Copyright (C) 2005-2022 Junjiro R. Okajima
6242+ *
6243+ * This program is free software; you can redistribute it and/or modify
6244+ * it under the terms of the GNU General Public License as published by
6245+ * the Free Software Foundation; either version 2 of the License, or
6246+ * (at your option) any later version.
6247+ *
6248+ * This program is distributed in the hope that it will be useful,
6249+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6250+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6251+ * GNU General Public License for more details.
6252+ *
6253+ * You should have received a copy of the GNU General Public License
6254+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6255+ */
6256+
6257+/*
6258+ * copy-up/down functions
6259+ */
6260+
6261+#ifndef __AUFS_CPUP_H__
6262+#define __AUFS_CPUP_H__
6263+
6264+#ifdef __KERNEL__
6265+
6266+#include <linux/path.h>
6267+
6268+struct inode;
6269+struct file;
6270+struct au_pin;
6271+
6272+void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
6273+void au_cpup_attr_timesizes(struct inode *inode);
6274+void au_cpup_attr_nlink(struct inode *inode, int force);
6275+void au_cpup_attr_changeable(struct inode *inode);
6276+void au_cpup_igen(struct inode *inode, struct inode *h_inode);
6277+void au_cpup_attr_all(struct inode *inode, int force);
6278+
6279+/* ---------------------------------------------------------------------- */
6280+
6281+struct au_cp_generic {
6282+ struct dentry *dentry;
6283+ aufs_bindex_t bdst, bsrc;
6284+ loff_t len;
6285+ struct au_pin *pin;
6286+ unsigned int flags;
6287+};
6288+
6289+/* cpup flags */
6290+#define AuCpup_DTIME 1 /* do dtime_store/revert */
6291+#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
6292+ for link(2) */
6293+#define AuCpup_RENAME (1 << 2) /* rename after cpup */
6294+#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
6295+ cpup */
6296+#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
6297+ existing entry */
6298+#define AuCpup_RWDST (1 << 5) /* force write target even if
6299+ the branch is marked as RO */
6300+
6301+#ifndef CONFIG_AUFS_BR_HFSPLUS
6302+#undef AuCpup_HOPEN
6303+#define AuCpup_HOPEN 0
6304+#endif
6305+
6306+#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
6307+#define au_fset_cpup(flags, name) \
6308+ do { (flags) |= AuCpup_##name; } while (0)
6309+#define au_fclr_cpup(flags, name) \
6310+ do { (flags) &= ~AuCpup_##name; } while (0)
6311+
6312+int au_copy_file(struct file *dst, struct file *src, loff_t len);
6313+int au_sio_cpup_simple(struct au_cp_generic *cpg);
6314+int au_sio_cpdown_simple(struct au_cp_generic *cpg);
6315+int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
6316+
6317+int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
6318+ int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
6319+ struct au_pin *pin,
6320+ struct dentry *h_parent, void *arg),
6321+ void *arg);
6322+int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6323+int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
6324+
6325+/* ---------------------------------------------------------------------- */
6326+
6327+/* keep timestamps when copyup */
6328+struct au_dtime {
6329+ struct dentry *dt_dentry;
6330+ struct path dt_h_path;
6331+ struct timespec64 dt_atime, dt_mtime;
6332+};
6333+void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
6334+ struct path *h_path);
6335+void au_dtime_revert(struct au_dtime *dt);
6336+
6337+#endif /* __KERNEL__ */
6338+#endif /* __AUFS_CPUP_H__ */
6339diff -urN /usr/share/empty/fs/aufs/dbgaufs.c linux/fs/aufs/dbgaufs.c
6340--- /usr/share/empty/fs/aufs/dbgaufs.c 1970-01-01 01:00:00.000000000 +0100
6341+++ linux/fs/aufs/dbgaufs.c 2022-11-05 23:02:18.962555950 +0100
6342@@ -0,0 +1,526 @@
6343+// SPDX-License-Identifier: GPL-2.0
6344+/*
6345+ * Copyright (C) 2005-2022 Junjiro R. Okajima
6346+ *
6347+ * This program is free software; you can redistribute it and/or modify
6348+ * it under the terms of the GNU General Public License as published by
6349+ * the Free Software Foundation; either version 2 of the License, or
6350+ * (at your option) any later version.
6351+ *
6352+ * This program is distributed in the hope that it will be useful,
6353+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6354+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6355+ * GNU General Public License for more details.
6356+ *
6357+ * You should have received a copy of the GNU General Public License
6358+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6359+ */
6360+
6361+/*
6362+ * debugfs interface
6363+ */
6364+
6365+#include <linux/debugfs.h>
6366+#include "aufs.h"
6367+
6368+#ifndef CONFIG_SYSFS
6369+#error DEBUG_FS depends upon SYSFS
6370+#endif
6371+
6372+static struct dentry *dbgaufs;
6373+static const mode_t dbgaufs_mode = 0444;
6374+
6375+/* 20 is max digits length of ulong 64 */
6376+struct dbgaufs_arg {
6377+ int n;
6378+ char a[20 * 4];
6379+};
6380+
6381+/*
6382+ * common function for all XINO files
6383+ */
6384+static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
6385+ struct file *file)
6386+{
6387+ void *p;
6388+
6389+ p = file->private_data;
6390+ if (p) {
6391+ /* this is struct dbgaufs_arg */
6392+ AuDebugOn(!au_kfree_sz_test(p));
6393+ au_kfree_do_rcu(p);
6394+ }
6395+ return 0;
6396+}
6397+
6398+static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt,
6399+ int cnt)
6400+{
6401+ int err;
6402+ struct kstat st;
6403+ struct dbgaufs_arg *p;
6404+
6405+ err = -ENOMEM;
6406+ p = kmalloc(sizeof(*p), GFP_NOFS);
6407+ if (unlikely(!p))
6408+ goto out;
6409+
6410+ err = 0;
6411+ p->n = 0;
6412+ file->private_data = p;
6413+ if (!xf)
6414+ goto out;
6415+
6416+ err = vfsub_getattr(&xf->f_path, &st);
6417+ if (!err) {
6418+ if (do_fcnt)
6419+ p->n = snprintf
6420+ (p->a, sizeof(p->a), "%d, %llux%u %lld\n",
6421+ cnt, st.blocks, st.blksize,
6422+ (long long)st.size);
6423+ else
6424+ p->n = snprintf(p->a, sizeof(p->a), "%llux%u %lld\n",
6425+ st.blocks, st.blksize,
6426+ (long long)st.size);
6427+ AuDebugOn(p->n >= sizeof(p->a));
6428+ } else {
6429+ p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
6430+ err = 0;
6431+ }
6432+
6433+out:
6434+ return err;
6435+}
6436+
6437+static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
6438+ size_t count, loff_t *ppos)
6439+{
6440+ struct dbgaufs_arg *p;
6441+
6442+ p = file->private_data;
6443+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6444+}
6445+
6446+/* ---------------------------------------------------------------------- */
6447+
6448+struct dbgaufs_plink_arg {
6449+ int n;
6450+ char a[];
6451+};
6452+
6453+static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
6454+ struct file *file)
6455+{
6456+ free_page((unsigned long)file->private_data);
6457+ return 0;
6458+}
6459+
6460+static int dbgaufs_plink_open(struct inode *inode, struct file *file)
6461+{
6462+ int err, i, limit;
6463+ unsigned long n, sum;
6464+ struct dbgaufs_plink_arg *p;
6465+ struct au_sbinfo *sbinfo;
6466+ struct super_block *sb;
6467+ struct hlist_bl_head *hbl;
6468+
6469+ err = -ENOMEM;
6470+ p = (void *)get_zeroed_page(GFP_NOFS);
6471+ if (unlikely(!p))
6472+ goto out;
6473+
6474+ err = -EFBIG;
6475+ sbinfo = inode->i_private;
6476+ sb = sbinfo->si_sb;
6477+ si_noflush_read_lock(sb);
6478+ if (au_opt_test(au_mntflags(sb), PLINK)) {
6479+ limit = PAGE_SIZE - sizeof(p->n);
6480+
6481+ /* the number of buckets */
6482+ n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
6483+ p->n += n;
6484+ limit -= n;
6485+
6486+ sum = 0;
6487+ for (i = 0, hbl = sbinfo->si_plink; i < AuPlink_NHASH;
6488+ i++, hbl++) {
6489+ n = au_hbl_count(hbl);
6490+ sum += n;
6491+
6492+ n = snprintf(p->a + p->n, limit, "%lu ", n);
6493+ p->n += n;
6494+ limit -= n;
6495+ if (unlikely(limit <= 0))
6496+ goto out_free;
6497+ }
6498+ p->a[p->n - 1] = '\n';
6499+
6500+ /* the sum of plinks */
6501+ n = snprintf(p->a + p->n, limit, "%lu\n", sum);
6502+ p->n += n;
6503+ limit -= n;
6504+ if (unlikely(limit <= 0))
6505+ goto out_free;
6506+ } else {
6507+#define str "1\n0\n0\n"
6508+ p->n = sizeof(str) - 1;
6509+ strcpy(p->a, str);
6510+#undef str
6511+ }
6512+ si_read_unlock(sb);
6513+
6514+ err = 0;
6515+ file->private_data = p;
6516+ goto out; /* success */
6517+
6518+out_free:
6519+ free_page((unsigned long)p);
6520+out:
6521+ return err;
6522+}
6523+
6524+static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
6525+ size_t count, loff_t *ppos)
6526+{
6527+ struct dbgaufs_plink_arg *p;
6528+
6529+ p = file->private_data;
6530+ return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
6531+}
6532+
6533+static const struct file_operations dbgaufs_plink_fop = {
6534+ .owner = THIS_MODULE,
6535+ .open = dbgaufs_plink_open,
6536+ .release = dbgaufs_plink_release,
6537+ .read = dbgaufs_plink_read
6538+};
6539+
6540+/* ---------------------------------------------------------------------- */
6541+
6542+static int dbgaufs_xib_open(struct inode *inode, struct file *file)
6543+{
6544+ int err;
6545+ struct au_sbinfo *sbinfo;
6546+ struct super_block *sb;
6547+
6548+ sbinfo = inode->i_private;
6549+ sb = sbinfo->si_sb;
6550+ si_noflush_read_lock(sb);
6551+ err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0, /*cnt*/0);
6552+ si_read_unlock(sb);
6553+ return err;
6554+}
6555+
6556+static const struct file_operations dbgaufs_xib_fop = {
6557+ .owner = THIS_MODULE,
6558+ .open = dbgaufs_xib_open,
6559+ .release = dbgaufs_xi_release,
6560+ .read = dbgaufs_xi_read
6561+};
6562+
6563+/* ---------------------------------------------------------------------- */
6564+
6565+#define DbgaufsXi_PREFIX "xi"
6566+
6567+static int dbgaufs_xino_open(struct inode *inode, struct file *file)
6568+{
6569+ int err, idx;
6570+ long l;
6571+ aufs_bindex_t bindex;
6572+ char *p, a[sizeof(DbgaufsXi_PREFIX) + 8];
6573+ struct au_sbinfo *sbinfo;
6574+ struct super_block *sb;
6575+ struct au_xino *xi;
6576+ struct file *xf;
6577+ struct qstr *name;
6578+ struct au_branch *br;
6579+
6580+ err = -ENOENT;
6581+ name = &file->f_path.dentry->d_name;
6582+ if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
6583+ || memcmp(name->name, DbgaufsXi_PREFIX,
6584+ sizeof(DbgaufsXi_PREFIX) - 1)))
6585+ goto out;
6586+
6587+ AuDebugOn(name->len >= sizeof(a));
6588+ memcpy(a, name->name, name->len);
6589+ a[name->len] = '\0';
6590+ p = strchr(a, '-');
6591+ if (p)
6592+ *p = '\0';
6593+ err = kstrtol(a + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
6594+ if (unlikely(err))
6595+ goto out;
6596+ bindex = l;
6597+ idx = 0;
6598+ if (p) {
6599+ err = kstrtol(p + 1, 10, &l);
6600+ if (unlikely(err))
6601+ goto out;
6602+ idx = l;
6603+ }
6604+
6605+ err = -ENOENT;
6606+ sbinfo = inode->i_private;
6607+ sb = sbinfo->si_sb;
6608+ si_noflush_read_lock(sb);
6609+ if (unlikely(bindex < 0 || bindex > au_sbbot(sb)))
6610+ goto out_si;
6611+ br = au_sbr(sb, bindex);
6612+ xi = br->br_xino;
6613+ if (unlikely(idx >= xi->xi_nfile))
6614+ goto out_si;
6615+ xf = au_xino_file(xi, idx);
6616+ if (xf)
6617+ err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1,
6618+ au_xino_count(br));
6619+
6620+out_si:
6621+ si_read_unlock(sb);
6622+out:
6623+ AuTraceErr(err);
6624+ return err;
6625+}
6626+
6627+static const struct file_operations dbgaufs_xino_fop = {
6628+ .owner = THIS_MODULE,
6629+ .open = dbgaufs_xino_open,
6630+ .release = dbgaufs_xi_release,
6631+ .read = dbgaufs_xi_read
6632+};
6633+
6634+void dbgaufs_xino_del(struct au_branch *br)
6635+{
6636+ struct dentry *dbgaufs;
6637+
6638+ dbgaufs = br->br_dbgaufs;
6639+ if (!dbgaufs)
6640+ return;
6641+
6642+ br->br_dbgaufs = NULL;
6643+ /* debugfs acquires the parent i_mutex */
6644+ lockdep_off();
6645+ debugfs_remove(dbgaufs);
6646+ lockdep_on();
6647+}
6648+
6649+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
6650+{
6651+ aufs_bindex_t bbot;
6652+ struct au_branch *br;
6653+
6654+ if (!au_sbi(sb)->si_dbgaufs)
6655+ return;
6656+
6657+ bbot = au_sbbot(sb);
6658+ for (; bindex <= bbot; bindex++) {
6659+ br = au_sbr(sb, bindex);
6660+ dbgaufs_xino_del(br);
6661+ }
6662+}
6663+
6664+static void dbgaufs_br_do_add(struct super_block *sb, aufs_bindex_t bindex,
6665+ unsigned int idx, struct dentry *parent,
6666+ struct au_sbinfo *sbinfo)
6667+{
6668+ struct au_branch *br;
6669+ struct dentry *d;
6670+ /* "xi" bindex(5) "-" idx(2) NULL */
6671+ char name[sizeof(DbgaufsXi_PREFIX) + 8];
6672+
6673+ if (!idx)
6674+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
6675+ else
6676+ snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d-%u",
6677+ bindex, idx);
6678+ br = au_sbr(sb, bindex);
6679+ if (br->br_dbgaufs) {
6680+ struct qstr qstr = QSTR_INIT(name, strlen(name));
6681+
6682+ if (!au_qstreq(&br->br_dbgaufs->d_name, &qstr)) {
6683+ /* debugfs acquires the parent i_mutex */
6684+ lockdep_off();
6685+ d = debugfs_rename(parent, br->br_dbgaufs, parent,
6686+ name);
6687+ lockdep_on();
6688+ if (unlikely(!d))
6689+ pr_warn("failed renaming %pd/%s, ignored.\n",
6690+ parent, name);
6691+ }
6692+ } else {
6693+ lockdep_off();
6694+ br->br_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
6695+ sbinfo, &dbgaufs_xino_fop);
6696+ lockdep_on();
6697+ if (unlikely(!br->br_dbgaufs))
6698+ pr_warn("failed creating %pd/%s, ignored.\n",
6699+ parent, name);
6700+ }
6701+}
6702+
6703+static void dbgaufs_br_add(struct super_block *sb, aufs_bindex_t bindex,
6704+ struct dentry *parent, struct au_sbinfo *sbinfo)
6705+{
6706+ struct au_branch *br;
6707+ struct au_xino *xi;
6708+ unsigned int u;
6709+
6710+ br = au_sbr(sb, bindex);
6711+ xi = br->br_xino;
6712+ for (u = 0; u < xi->xi_nfile; u++)
6713+ dbgaufs_br_do_add(sb, bindex, u, parent, sbinfo);
6714+}
6715+
6716+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex, int topdown)
6717+{
6718+ struct au_sbinfo *sbinfo;
6719+ struct dentry *parent;
6720+ aufs_bindex_t bbot;
6721+
6722+ if (!au_opt_test(au_mntflags(sb), XINO))
6723+ return;
6724+
6725+ sbinfo = au_sbi(sb);
6726+ parent = sbinfo->si_dbgaufs;
6727+ if (!parent)
6728+ return;
6729+
6730+ bbot = au_sbbot(sb);
6731+ if (topdown)
6732+ for (; bindex <= bbot; bindex++)
6733+ dbgaufs_br_add(sb, bindex, parent, sbinfo);
6734+ else
6735+ for (; bbot >= bindex; bbot--)
6736+ dbgaufs_br_add(sb, bbot, parent, sbinfo);
6737+}
6738+
6739+/* ---------------------------------------------------------------------- */
6740+
6741+#ifdef CONFIG_AUFS_EXPORT
6742+static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
6743+{
6744+ int err;
6745+ struct au_sbinfo *sbinfo;
6746+ struct super_block *sb;
6747+
6748+ sbinfo = inode->i_private;
6749+ sb = sbinfo->si_sb;
6750+ si_noflush_read_lock(sb);
6751+ err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0, /*cnt*/0);
6752+ si_read_unlock(sb);
6753+ return err;
6754+}
6755+
6756+static const struct file_operations dbgaufs_xigen_fop = {
6757+ .owner = THIS_MODULE,
6758+ .open = dbgaufs_xigen_open,
6759+ .release = dbgaufs_xi_release,
6760+ .read = dbgaufs_xi_read
6761+};
6762+
6763+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6764+{
6765+ int err;
6766+
6767+ /*
6768+ * This function is a dynamic '__init' function actually,
6769+ * so the tiny check for si_rwsem is unnecessary.
6770+ */
6771+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6772+
6773+ err = -EIO;
6774+ sbinfo->si_dbgaufs_xigen = debugfs_create_file
6775+ ("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6776+ &dbgaufs_xigen_fop);
6777+ if (sbinfo->si_dbgaufs_xigen)
6778+ err = 0;
6779+
6780+ return err;
6781+}
6782+#else
6783+static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
6784+{
6785+ return 0;
6786+}
6787+#endif /* CONFIG_AUFS_EXPORT */
6788+
6789+/* ---------------------------------------------------------------------- */
6790+
6791+void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
6792+{
6793+ /*
6794+ * This function is a dynamic '__fin' function actually,
6795+ * so the tiny check for si_rwsem is unnecessary.
6796+ */
6797+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6798+
6799+ debugfs_remove_recursive(sbinfo->si_dbgaufs);
6800+ sbinfo->si_dbgaufs = NULL;
6801+}
6802+
6803+int dbgaufs_si_init(struct au_sbinfo *sbinfo)
6804+{
6805+ int err;
6806+ char name[SysaufsSiNameLen];
6807+
6808+ /*
6809+ * This function is a dynamic '__init' function actually,
6810+ * so the tiny check for si_rwsem is unnecessary.
6811+ */
6812+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
6813+
6814+ err = -ENOENT;
6815+ if (!dbgaufs) {
6816+ AuErr1("/debug/aufs is uninitialized\n");
6817+ goto out;
6818+ }
6819+
6820+ err = -EIO;
6821+ sysaufs_name(sbinfo, name);
6822+ sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
6823+ if (unlikely(!sbinfo->si_dbgaufs))
6824+ goto out;
6825+
6826+ /* regardless plink/noplink option */
6827+ sbinfo->si_dbgaufs_plink = debugfs_create_file
6828+ ("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6829+ &dbgaufs_plink_fop);
6830+ if (unlikely(!sbinfo->si_dbgaufs_plink))
6831+ goto out_dir;
6832+
6833+ /* regardless xino/noxino option */
6834+ sbinfo->si_dbgaufs_xib = debugfs_create_file
6835+ ("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
6836+ &dbgaufs_xib_fop);
6837+ if (unlikely(!sbinfo->si_dbgaufs_xib))
6838+ goto out_dir;
6839+
6840+ err = dbgaufs_xigen_init(sbinfo);
6841+ if (!err)
6842+ goto out; /* success */
6843+
6844+out_dir:
6845+ dbgaufs_si_fin(sbinfo);
6846+out:
6847+ if (unlikely(err))
6848+ pr_err("debugfs/aufs failed\n");
6849+ return err;
6850+}
6851+
6852+/* ---------------------------------------------------------------------- */
6853+
6854+void dbgaufs_fin(void)
6855+{
6856+ debugfs_remove(dbgaufs);
6857+}
6858+
6859+int __init dbgaufs_init(void)
6860+{
6861+ int err;
6862+
6863+ err = -EIO;
6864+ dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
6865+ if (dbgaufs)
6866+ err = 0;
6867+ return err;
6868+}
6869diff -urN /usr/share/empty/fs/aufs/dbgaufs.h linux/fs/aufs/dbgaufs.h
6870--- /usr/share/empty/fs/aufs/dbgaufs.h 1970-01-01 01:00:00.000000000 +0100
6871+++ linux/fs/aufs/dbgaufs.h 2022-11-05 23:02:18.962555950 +0100
6872@@ -0,0 +1,53 @@
6873+/* SPDX-License-Identifier: GPL-2.0 */
6874+/*
6875+ * Copyright (C) 2005-2022 Junjiro R. Okajima
6876+ *
6877+ * This program is free software; you can redistribute it and/or modify
6878+ * it under the terms of the GNU General Public License as published by
6879+ * the Free Software Foundation; either version 2 of the License, or
6880+ * (at your option) any later version.
6881+ *
6882+ * This program is distributed in the hope that it will be useful,
6883+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6884+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6885+ * GNU General Public License for more details.
6886+ *
6887+ * You should have received a copy of the GNU General Public License
6888+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6889+ */
6890+
6891+/*
6892+ * debugfs interface
6893+ */
6894+
6895+#ifndef __DBGAUFS_H__
6896+#define __DBGAUFS_H__
6897+
6898+#ifdef __KERNEL__
6899+
6900+struct super_block;
6901+struct au_sbinfo;
6902+struct au_branch;
6903+
6904+#ifdef CONFIG_DEBUG_FS
6905+/* dbgaufs.c */
6906+void dbgaufs_xino_del(struct au_branch *br);
6907+void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
6908+void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex, int topdown);
6909+void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
6910+int dbgaufs_si_init(struct au_sbinfo *sbinfo);
6911+void dbgaufs_fin(void);
6912+int __init dbgaufs_init(void);
6913+#else
6914+AuStubVoid(dbgaufs_xino_del, struct au_branch *br)
6915+AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
6916+AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex,
6917+ int topdown)
6918+AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
6919+AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
6920+AuStubVoid(dbgaufs_fin, void)
6921+AuStubInt0(__init dbgaufs_init, void)
6922+#endif /* CONFIG_DEBUG_FS */
6923+
6924+#endif /* __KERNEL__ */
6925+#endif /* __DBGAUFS_H__ */
6926diff -urN /usr/share/empty/fs/aufs/dcsub.c linux/fs/aufs/dcsub.c
6927--- /usr/share/empty/fs/aufs/dcsub.c 1970-01-01 01:00:00.000000000 +0100
6928+++ linux/fs/aufs/dcsub.c 2022-11-05 23:02:18.962555950 +0100
6929@@ -0,0 +1,225 @@
6930+// SPDX-License-Identifier: GPL-2.0
6931+/*
6932+ * Copyright (C) 2005-2022 Junjiro R. Okajima
6933+ *
6934+ * This program is free software; you can redistribute it and/or modify
6935+ * it under the terms of the GNU General Public License as published by
6936+ * the Free Software Foundation; either version 2 of the License, or
6937+ * (at your option) any later version.
6938+ *
6939+ * This program is distributed in the hope that it will be useful,
6940+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
6941+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6942+ * GNU General Public License for more details.
6943+ *
6944+ * You should have received a copy of the GNU General Public License
6945+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
6946+ */
6947+
6948+/*
6949+ * sub-routines for dentry cache
6950+ */
6951+
6952+#include "aufs.h"
6953+
6954+static void au_dpage_free(struct au_dpage *dpage)
6955+{
6956+ int i;
6957+ struct dentry **p;
6958+
6959+ p = dpage->dentries;
6960+ for (i = 0; i < dpage->ndentry; i++)
6961+ dput(*p++);
6962+ free_page((unsigned long)dpage->dentries);
6963+}
6964+
6965+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
6966+{
6967+ int err;
6968+ void *p;
6969+
6970+ err = -ENOMEM;
6971+ dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
6972+ if (unlikely(!dpages->dpages))
6973+ goto out;
6974+
6975+ p = (void *)__get_free_page(gfp);
6976+ if (unlikely(!p))
6977+ goto out_dpages;
6978+
6979+ dpages->dpages[0].ndentry = 0;
6980+ dpages->dpages[0].dentries = p;
6981+ dpages->ndpage = 1;
6982+ return 0; /* success */
6983+
6984+out_dpages:
6985+ au_kfree_try_rcu(dpages->dpages);
6986+out:
6987+ return err;
6988+}
6989+
6990+void au_dpages_free(struct au_dcsub_pages *dpages)
6991+{
6992+ int i;
6993+ struct au_dpage *p;
6994+
6995+ p = dpages->dpages;
6996+ for (i = 0; i < dpages->ndpage; i++)
6997+ au_dpage_free(p++);
6998+ au_kfree_try_rcu(dpages->dpages);
6999+}
7000+
7001+static int au_dpages_append(struct au_dcsub_pages *dpages,
7002+ struct dentry *dentry, gfp_t gfp)
7003+{
7004+ int err, sz;
7005+ struct au_dpage *dpage;
7006+ void *p;
7007+
7008+ dpage = dpages->dpages + dpages->ndpage - 1;
7009+ sz = PAGE_SIZE / sizeof(dentry);
7010+ if (unlikely(dpage->ndentry >= sz)) {
7011+ AuLabel(new dpage);
7012+ err = -ENOMEM;
7013+ sz = dpages->ndpage * sizeof(*dpages->dpages);
7014+ p = au_kzrealloc(dpages->dpages, sz,
7015+ sz + sizeof(*dpages->dpages), gfp,
7016+ /*may_shrink*/0);
7017+ if (unlikely(!p))
7018+ goto out;
7019+
7020+ dpages->dpages = p;
7021+ dpage = dpages->dpages + dpages->ndpage;
7022+ p = (void *)__get_free_page(gfp);
7023+ if (unlikely(!p))
7024+ goto out;
7025+
7026+ dpage->ndentry = 0;
7027+ dpage->dentries = p;
7028+ dpages->ndpage++;
7029+ }
7030+
7031+ AuDebugOn(au_dcount(dentry) <= 0);
7032+ dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
7033+ return 0; /* success */
7034+
7035+out:
7036+ return err;
7037+}
7038+
7039+/* todo: BAD approach */
7040+/* copied from linux/fs/dcache.c */
7041+enum d_walk_ret {
7042+ D_WALK_CONTINUE,
7043+ D_WALK_QUIT,
7044+ D_WALK_NORETRY,
7045+ D_WALK_SKIP,
7046+};
7047+
7048+extern void d_walk(struct dentry *parent, void *data,
7049+ enum d_walk_ret (*enter)(void *, struct dentry *));
7050+
7051+struct ac_dpages_arg {
7052+ int err;
7053+ struct au_dcsub_pages *dpages;
7054+ struct super_block *sb;
7055+ au_dpages_test test;
7056+ void *arg;
7057+};
7058+
7059+static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
7060+{
7061+ enum d_walk_ret ret;
7062+ struct ac_dpages_arg *arg = _arg;
7063+
7064+ ret = D_WALK_CONTINUE;
7065+ if (dentry->d_sb == arg->sb
7066+ && !IS_ROOT(dentry)
7067+ && au_dcount(dentry) > 0
7068+ && au_di(dentry)
7069+ && (!arg->test || arg->test(dentry, arg->arg))) {
7070+ arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
7071+ if (unlikely(arg->err))
7072+ ret = D_WALK_QUIT;
7073+ }
7074+
7075+ return ret;
7076+}
7077+
7078+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
7079+ au_dpages_test test, void *arg)
7080+{
7081+ struct ac_dpages_arg args = {
7082+ .err = 0,
7083+ .dpages = dpages,
7084+ .sb = root->d_sb,
7085+ .test = test,
7086+ .arg = arg
7087+ };
7088+
7089+ d_walk(root, &args, au_call_dpages_append);
7090+
7091+ return args.err;
7092+}
7093+
7094+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
7095+ int do_include, au_dpages_test test, void *arg)
7096+{
7097+ int err;
7098+
7099+ err = 0;
7100+ write_seqlock(&rename_lock);
7101+ spin_lock(&dentry->d_lock);
7102+ if (do_include
7103+ && au_dcount(dentry) > 0
7104+ && (!test || test(dentry, arg)))
7105+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
7106+ spin_unlock(&dentry->d_lock);
7107+ if (unlikely(err))
7108+ goto out;
7109+
7110+ /*
7111+ * RCU for vfsmount is unnecessary since this is a traverse in a single
7112+ * mount
7113+ */
7114+ while (!IS_ROOT(dentry)) {
7115+ dentry = dentry->d_parent; /* rename_lock is locked */
7116+ spin_lock(&dentry->d_lock);
7117+ if (au_dcount(dentry) > 0
7118+ && (!test || test(dentry, arg)))
7119+ err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
7120+ spin_unlock(&dentry->d_lock);
7121+ if (unlikely(err))
7122+ break;
7123+ }
7124+
7125+out:
7126+ write_sequnlock(&rename_lock);
7127+ return err;
7128+}
7129+
7130+static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
7131+{
7132+ return au_di(dentry) && dentry->d_sb == arg;
7133+}
7134+
7135+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
7136+ struct dentry *dentry, int do_include)
7137+{
7138+ return au_dcsub_pages_rev(dpages, dentry, do_include,
7139+ au_dcsub_dpages_aufs, dentry->d_sb);
7140+}
7141+
7142+int au_test_subdir(struct dentry *d1, struct dentry *d2)
7143+{
7144+ struct path path[2] = {
7145+ {
7146+ .dentry = d1
7147+ },
7148+ {
7149+ .dentry = d2
7150+ }
7151+ };
7152+
7153+ return path_is_under(path + 0, path + 1);
7154+}
7155diff -urN /usr/share/empty/fs/aufs/dcsub.h linux/fs/aufs/dcsub.h
7156--- /usr/share/empty/fs/aufs/dcsub.h 1970-01-01 01:00:00.000000000 +0100
7157+++ linux/fs/aufs/dcsub.h 2022-11-05 23:02:18.962555950 +0100
7158@@ -0,0 +1,137 @@
7159+/* SPDX-License-Identifier: GPL-2.0 */
7160+/*
7161+ * Copyright (C) 2005-2022 Junjiro R. Okajima
7162+ *
7163+ * This program is free software; you can redistribute it and/or modify
7164+ * it under the terms of the GNU General Public License as published by
7165+ * the Free Software Foundation; either version 2 of the License, or
7166+ * (at your option) any later version.
7167+ *
7168+ * This program is distributed in the hope that it will be useful,
7169+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7170+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7171+ * GNU General Public License for more details.
7172+ *
7173+ * You should have received a copy of the GNU General Public License
7174+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7175+ */
7176+
7177+/*
7178+ * sub-routines for dentry cache
7179+ */
7180+
7181+#ifndef __AUFS_DCSUB_H__
7182+#define __AUFS_DCSUB_H__
7183+
7184+#ifdef __KERNEL__
7185+
7186+#include <linux/dcache.h>
7187+#include <linux/fs.h>
7188+
7189+struct au_dpage {
7190+ int ndentry;
7191+ struct dentry **dentries;
7192+};
7193+
7194+struct au_dcsub_pages {
7195+ int ndpage;
7196+ struct au_dpage *dpages;
7197+};
7198+
7199+/* ---------------------------------------------------------------------- */
7200+
7201+/* dcsub.c */
7202+int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
7203+void au_dpages_free(struct au_dcsub_pages *dpages);
7204+typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
7205+int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
7206+ au_dpages_test test, void *arg);
7207+int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
7208+ int do_include, au_dpages_test test, void *arg);
7209+int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
7210+ struct dentry *dentry, int do_include);
7211+int au_test_subdir(struct dentry *d1, struct dentry *d2);
7212+
7213+/* ---------------------------------------------------------------------- */
7214+
7215+/*
7216+ * todo: in linux-3.13, several similar (but faster) helpers are added to
7217+ * include/linux/dcache.h. Try them (in the future).
7218+ */
7219+
7220+static inline int au_d_hashed_positive(struct dentry *d)
7221+{
7222+ int err;
7223+ struct inode *inode = d_inode(d);
7224+
7225+ err = 0;
7226+ if (unlikely(d_unhashed(d)
7227+ || d_is_negative(d)
7228+ || !inode->i_nlink))
7229+ err = -ENOENT;
7230+ return err;
7231+}
7232+
7233+static inline int au_d_linkable(struct dentry *d)
7234+{
7235+ int err;
7236+ struct inode *inode = d_inode(d);
7237+
7238+ err = au_d_hashed_positive(d);
7239+ if (err
7240+ && d_is_positive(d)
7241+ && (inode->i_state & I_LINKABLE))
7242+ err = 0;
7243+ return err;
7244+}
7245+
7246+static inline int au_d_alive(struct dentry *d)
7247+{
7248+ int err;
7249+ struct inode *inode;
7250+
7251+ err = 0;
7252+ if (!IS_ROOT(d))
7253+ err = au_d_hashed_positive(d);
7254+ else {
7255+ inode = d_inode(d);
7256+ if (unlikely(d_unlinked(d)
7257+ || d_is_negative(d)
7258+ || !inode->i_nlink))
7259+ err = -ENOENT;
7260+ }
7261+ return err;
7262+}
7263+
7264+static inline int au_alive_dir(struct dentry *d)
7265+{
7266+ int err;
7267+
7268+ err = au_d_alive(d);
7269+ if (unlikely(err || IS_DEADDIR(d_inode(d))))
7270+ err = -ENOENT;
7271+ return err;
7272+}
7273+
7274+static inline int au_qstreq(struct qstr *a, struct qstr *b)
7275+{
7276+ return a->len == b->len
7277+ && !memcmp(a->name, b->name, a->len);
7278+}
7279+
7280+/*
7281+ * by the commit
7282+ * 360f547 2015-01-25 dcache: let the dentry count go down to zero without
7283+ * taking d_lock
7284+ * the type of d_lockref.count became int, but the inlined function d_count()
7285+ * still returns unsigned int.
7286+ * I don't know why. Maybe it is for every d_count() users?
7287+ * Anyway au_dcount() lives on.
7288+ */
7289+static inline int au_dcount(struct dentry *d)
7290+{
7291+ return (int)d_count(d);
7292+}
7293+
7294+#endif /* __KERNEL__ */
7295+#endif /* __AUFS_DCSUB_H__ */
7296diff -urN /usr/share/empty/fs/aufs/debug.c linux/fs/aufs/debug.c
7297--- /usr/share/empty/fs/aufs/debug.c 1970-01-01 01:00:00.000000000 +0100
7298+++ linux/fs/aufs/debug.c 2023-02-20 21:05:51.959693785 +0100
7299@@ -0,0 +1,446 @@
7300+// SPDX-License-Identifier: GPL-2.0
7301+/*
7302+ * Copyright (C) 2005-2022 Junjiro R. Okajima
7303+ *
7304+ * This program is free software; you can redistribute it and/or modify
7305+ * it under the terms of the GNU General Public License as published by
7306+ * the Free Software Foundation; either version 2 of the License, or
7307+ * (at your option) any later version.
7308+ *
7309+ * This program is distributed in the hope that it will be useful,
7310+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7311+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7312+ * GNU General Public License for more details.
7313+ *
7314+ * You should have received a copy of the GNU General Public License
7315+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7316+ */
7317+
7318+/*
7319+ * debug print functions
7320+ */
7321+
7322+#include <linux/iversion.h>
7323+#include "aufs.h"
7324+
7325+/* Returns 0, or -errno. arg is in kp->arg. */
7326+static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
7327+{
7328+ int err, n;
7329+
7330+ err = kstrtoint(val, 0, &n);
7331+ if (!err) {
7332+ if (n > 0)
7333+ au_debug_on();
7334+ else
7335+ au_debug_off();
7336+ }
7337+ return err;
7338+}
7339+
7340+/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
7341+static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
7342+{
7343+ atomic_t *a;
7344+
7345+ a = kp->arg;
7346+ return sprintf(buffer, "%d", atomic_read(a));
7347+}
7348+
7349+static const struct kernel_param_ops param_ops_atomic_t = {
7350+ .set = param_atomic_t_set,
7351+ .get = param_atomic_t_get
7352+ /* void (*free)(void *arg) */
7353+};
7354+
7355+atomic_t aufs_debug = ATOMIC_INIT(0);
7356+MODULE_PARM_DESC(debug, "debug print");
7357+module_param_named(debug, aufs_debug, atomic_t, 0664);
7358+
7359+DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
7360+char *au_plevel = KERN_DEBUG;
7361+#define dpri(fmt, ...) do { \
7362+ if ((au_plevel \
7363+ && strcmp(au_plevel, KERN_DEBUG)) \
7364+ || au_debug_test()) \
7365+ printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
7366+} while (0)
7367+
7368+/* ---------------------------------------------------------------------- */
7369+
7370+void au_dpri_whlist(struct au_nhash *whlist)
7371+{
7372+ unsigned long ul, n;
7373+ struct hlist_head *head;
7374+ struct au_vdir_wh *pos;
7375+
7376+ n = whlist->nh_num;
7377+ head = whlist->nh_head;
7378+ for (ul = 0; ul < n; ul++) {
7379+ hlist_for_each_entry(pos, head, wh_hash)
7380+ dpri("b%d, %.*s, %d\n",
7381+ pos->wh_bindex,
7382+ pos->wh_str.len, pos->wh_str.name,
7383+ pos->wh_str.len);
7384+ head++;
7385+ }
7386+}
7387+
7388+void au_dpri_vdir(struct au_vdir *vdir)
7389+{
7390+ unsigned long ul;
7391+ union au_vdir_deblk_p p;
7392+ unsigned char *o;
7393+
7394+ if (!vdir || IS_ERR(vdir)) {
7395+ dpri("err %ld\n", PTR_ERR(vdir));
7396+ return;
7397+ }
7398+
7399+ dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %llu\n",
7400+ vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
7401+ vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
7402+ for (ul = 0; ul < vdir->vd_nblk; ul++) {
7403+ p.deblk = vdir->vd_deblk[ul];
7404+ o = p.deblk;
7405+ dpri("[%lu]: %p\n", ul, o);
7406+ }
7407+}
7408+
7409+static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
7410+ struct dentry *wh)
7411+{
7412+ char *n = NULL;
7413+ int l = 0;
7414+
7415+ if (!inode || IS_ERR(inode)) {
7416+ dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
7417+ return -1;
7418+ }
7419+
7420+ /* the type of i_blocks depends upon CONFIG_LBDAF */
7421+ BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
7422+ && sizeof(inode->i_blocks) != sizeof(u64));
7423+ if (wh) {
7424+ n = (void *)wh->d_name.name;
7425+ l = wh->d_name.len;
7426+ }
7427+
7428+ dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
7429+ " acl %p, def_acl %p,"
7430+ " hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
7431+ bindex, inode,
7432+ inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
7433+ atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
7434+ i_size_read(inode), (unsigned long long)inode->i_blocks,
7435+ inode->i_acl, inode->i_default_acl,
7436+ hn, (long long)timespec64_to_ns(&inode->i_ctime) & 0x0ffff,
7437+ inode->i_mapping ? inode->i_mapping->nrpages : 0,
7438+ inode->i_state, inode->i_flags, inode_peek_iversion(inode),
7439+ inode->i_generation,
7440+ l ? ", wh " : "", l, n);
7441+ return 0;
7442+}
7443+
7444+void au_dpri_inode(struct inode *inode)
7445+{
7446+ struct au_iinfo *iinfo;
7447+ struct au_hinode *hi;
7448+ aufs_bindex_t bindex;
7449+ int err, hn;
7450+
7451+ err = do_pri_inode(-1, inode, -1, NULL);
7452+ if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
7453+ return;
7454+
7455+ iinfo = au_ii(inode);
7456+ dpri("i-1: btop %d, bbot %d, gen %d\n",
7457+ iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
7458+ if (iinfo->ii_btop < 0)
7459+ return;
7460+ hn = 0;
7461+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
7462+ hi = au_hinode(iinfo, bindex);
7463+ hn = !!au_hn(hi);
7464+ do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
7465+ }
7466+}
7467+
7468+void au_dpri_dalias(struct inode *inode)
7469+{
7470+ struct dentry *d;
7471+
7472+ spin_lock(&inode->i_lock);
7473+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
7474+ au_dpri_dentry(d);
7475+ spin_unlock(&inode->i_lock);
7476+}
7477+
7478+static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
7479+{
7480+ struct dentry *wh = NULL;
7481+ int hn;
7482+ struct inode *inode;
7483+ struct au_iinfo *iinfo;
7484+ struct au_hinode *hi;
7485+
7486+ if (!dentry || IS_ERR(dentry)) {
7487+ dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
7488+ return -1;
7489+ }
7490+ /* do not call dget_parent() here */
7491+ /* note: access d_xxx without d_lock */
7492+ dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
7493+ bindex, dentry, dentry,
7494+ dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
7495+ au_dcount(dentry), dentry->d_flags,
7496+ d_unhashed(dentry) ? "un" : "");
7497+ hn = -1;
7498+ inode = NULL;
7499+ if (d_is_positive(dentry))
7500+ inode = d_inode(dentry);
7501+ if (inode
7502+ && au_test_aufs(dentry->d_sb)
7503+ && bindex >= 0
7504+ && !au_is_bad_inode(inode)) {
7505+ iinfo = au_ii(inode);
7506+ hi = au_hinode(iinfo, bindex);
7507+ hn = !!au_hn(hi);
7508+ wh = hi->hi_whdentry;
7509+ }
7510+ do_pri_inode(bindex, inode, hn, wh);
7511+ return 0;
7512+}
7513+
7514+void au_dpri_dentry(struct dentry *dentry)
7515+{
7516+ struct au_dinfo *dinfo;
7517+ aufs_bindex_t bindex;
7518+ int err;
7519+
7520+ err = do_pri_dentry(-1, dentry);
7521+ if (err || !au_test_aufs(dentry->d_sb))
7522+ return;
7523+
7524+ dinfo = au_di(dentry);
7525+ if (!dinfo)
7526+ return;
7527+ dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
7528+ dinfo->di_btop, dinfo->di_bbot,
7529+ dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
7530+ dinfo->di_tmpfile);
7531+ if (dinfo->di_btop < 0)
7532+ return;
7533+ for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
7534+ do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
7535+}
7536+
7537+static int do_pri_file(aufs_bindex_t bindex, struct file *file)
7538+{
7539+ char a[32];
7540+
7541+ if (!file || IS_ERR(file)) {
7542+ dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
7543+ return -1;
7544+ }
7545+ a[0] = 0;
7546+ if (bindex < 0
7547+ && !IS_ERR_OR_NULL(file->f_path.dentry)
7548+ && au_test_aufs(file->f_path.dentry->d_sb)
7549+ && au_fi(file))
7550+ snprintf(a, sizeof(a), ", gen %d, mmapped %d",
7551+ au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
7552+ dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
7553+ bindex, file->f_mode, file->f_flags, (long)file_count(file),
7554+ file->f_version, file->f_pos, a);
7555+ if (!IS_ERR_OR_NULL(file->f_path.dentry))
7556+ do_pri_dentry(bindex, file->f_path.dentry);
7557+ return 0;
7558+}
7559+
7560+void au_dpri_file(struct file *file)
7561+{
7562+ struct au_finfo *finfo;
7563+ struct au_fidir *fidir;
7564+ struct au_hfile *hfile;
7565+ aufs_bindex_t bindex;
7566+ int err;
7567+
7568+ err = do_pri_file(-1, file);
7569+ if (err
7570+ || IS_ERR_OR_NULL(file->f_path.dentry)
7571+ || !au_test_aufs(file->f_path.dentry->d_sb))
7572+ return;
7573+
7574+ finfo = au_fi(file);
7575+ if (!finfo)
7576+ return;
7577+ if (finfo->fi_btop < 0)
7578+ return;
7579+ fidir = finfo->fi_hdir;
7580+ if (!fidir)
7581+ do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
7582+ else
7583+ for (bindex = finfo->fi_btop;
7584+ bindex >= 0 && bindex <= fidir->fd_bbot;
7585+ bindex++) {
7586+ hfile = fidir->fd_hfile + bindex;
7587+ do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
7588+ }
7589+}
7590+
7591+static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
7592+{
7593+ struct vfsmount *mnt;
7594+ struct super_block *sb;
7595+
7596+ if (!br || IS_ERR(br))
7597+ goto out;
7598+ mnt = au_br_mnt(br);
7599+ if (!mnt || IS_ERR(mnt))
7600+ goto out;
7601+ sb = mnt->mnt_sb;
7602+ if (!sb || IS_ERR(sb))
7603+ goto out;
7604+
7605+ dpri("s%d: {perm 0x%x, id %d, wbr %p}, "
7606+ "%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
7607+ "xino %d\n",
7608+ bindex, br->br_perm, br->br_id, br->br_wbr,
7609+ au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
7610+ sb->s_flags, sb->s_count,
7611+ atomic_read(&sb->s_active),
7612+ !!au_xino_file(br->br_xino, /*idx*/-1));
7613+ return 0;
7614+
7615+out:
7616+ dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
7617+ return -1;
7618+}
7619+
7620+void au_dpri_sb(struct super_block *sb)
7621+{
7622+ struct au_sbinfo *sbinfo;
7623+ aufs_bindex_t bindex;
7624+ int err;
7625+ /* to reduce stack size */
7626+ struct {
7627+ struct vfsmount mnt;
7628+ struct au_branch fake;
7629+ } *a;
7630+
7631+ /* this function can be called from magic sysrq */
7632+ a = kzalloc(sizeof(*a), GFP_ATOMIC);
7633+ if (unlikely(!a)) {
7634+ dpri("no memory\n");
7635+ return;
7636+ }
7637+
7638+ a->mnt.mnt_sb = sb;
7639+ a->fake.br_path.mnt = &a->mnt;
7640+ err = do_pri_br(-1, &a->fake);
7641+ au_kfree_rcu(a);
7642+ dpri("dev 0x%x\n", sb->s_dev);
7643+ if (err || !au_test_aufs(sb))
7644+ return;
7645+
7646+ sbinfo = au_sbi(sb);
7647+ if (!sbinfo)
7648+ return;
7649+ dpri("nw %d, gen %u, kobj %d\n",
7650+ atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
7651+ kref_read(&sbinfo->si_kobj.kref));
7652+ for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
7653+ do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
7654+}
7655+
7656+/* ---------------------------------------------------------------------- */
7657+
7658+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
7659+{
7660+ struct inode *h_inode, *inode = d_inode(dentry);
7661+ struct dentry *h_dentry;
7662+ aufs_bindex_t bindex, bbot, bi;
7663+
7664+ if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
7665+ return;
7666+
7667+ bbot = au_dbbot(dentry);
7668+ bi = au_ibbot(inode);
7669+ if (bi < bbot)
7670+ bbot = bi;
7671+ bindex = au_dbtop(dentry);
7672+ bi = au_ibtop(inode);
7673+ if (bi > bindex)
7674+ bindex = bi;
7675+
7676+ for (; bindex <= bbot; bindex++) {
7677+ h_dentry = au_h_dptr(dentry, bindex);
7678+ if (!h_dentry)
7679+ continue;
7680+ h_inode = au_h_iptr(inode, bindex);
7681+ if (unlikely(h_inode != d_inode(h_dentry))) {
7682+ au_debug_on();
7683+ AuDbg("b%d, %s:%d\n", bindex, func, line);
7684+ AuDbgDentry(dentry);
7685+ AuDbgInode(inode);
7686+ au_debug_off();
7687+ if (au_test_fuse(h_inode->i_sb))
7688+ WARN_ON_ONCE(1);
7689+ else
7690+ BUG();
7691+ }
7692+ }
7693+}
7694+
7695+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
7696+{
7697+ int err, i, j;
7698+ struct au_dcsub_pages dpages;
7699+ struct au_dpage *dpage;
7700+ struct dentry **dentries;
7701+
7702+ err = au_dpages_init(&dpages, GFP_NOFS);
7703+ AuDebugOn(err);
7704+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
7705+ AuDebugOn(err);
7706+ for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
7707+ dpage = dpages.dpages + i;
7708+ dentries = dpage->dentries;
7709+ for (j = dpage->ndentry - 1; !err && j >= 0; j--)
7710+ AuDebugOn(au_digen_test(dentries[j], sigen));
7711+ }
7712+ au_dpages_free(&dpages);
7713+}
7714+
7715+void au_dbg_verify_kthread(void)
7716+{
7717+ if (au_wkq_test()) {
7718+ au_dbg_blocked();
7719+ /*
7720+ * It may be recursive, but udba=notify between two aufs mounts,
7721+ * where a single ro branch is shared, is not a problem.
7722+ */
7723+ /* WARN_ON(1); */
7724+ }
7725+}
7726+
7727+/* ---------------------------------------------------------------------- */
7728+
7729+int __init au_debug_init(void)
7730+{
7731+ aufs_bindex_t bindex;
7732+ struct au_vdir_destr destr;
7733+
7734+ bindex = -1;
7735+ AuDebugOn(bindex >= 0);
7736+
7737+ destr.len = -1;
7738+ AuDebugOn(destr.len < NAME_MAX);
7739+
7740+#ifdef CONFIG_4KSTACKS
7741+ pr_warn("CONFIG_4KSTACKS is defined.\n");
7742+#endif
7743+
7744+ return 0;
7745+}
7746diff -urN /usr/share/empty/fs/aufs/debug.h linux/fs/aufs/debug.h
7747--- /usr/share/empty/fs/aufs/debug.h 1970-01-01 01:00:00.000000000 +0100
7748+++ linux/fs/aufs/debug.h 2022-11-05 23:02:18.962555950 +0100
7749@@ -0,0 +1,226 @@
7750+/* SPDX-License-Identifier: GPL-2.0 */
7751+/*
7752+ * Copyright (C) 2005-2022 Junjiro R. Okajima
7753+ *
7754+ * This program is free software; you can redistribute it and/or modify
7755+ * it under the terms of the GNU General Public License as published by
7756+ * the Free Software Foundation; either version 2 of the License, or
7757+ * (at your option) any later version.
7758+ *
7759+ * This program is distributed in the hope that it will be useful,
7760+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7761+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7762+ * GNU General Public License for more details.
7763+ *
7764+ * You should have received a copy of the GNU General Public License
7765+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7766+ */
7767+
7768+/*
7769+ * debug print functions
7770+ */
7771+
7772+#ifndef __AUFS_DEBUG_H__
7773+#define __AUFS_DEBUG_H__
7774+
7775+#ifdef __KERNEL__
7776+
7777+#include <linux/atomic.h>
7778+#include <linux/module.h>
7779+#include <linux/kallsyms.h>
7780+#include <linux/sysrq.h>
7781+
7782+#ifdef CONFIG_AUFS_DEBUG
7783+#define AuDebugOn(a) BUG_ON(a)
7784+
7785+/* module parameter */
7786+extern atomic_t aufs_debug;
7787+static inline void au_debug_on(void)
7788+{
7789+ atomic_inc(&aufs_debug);
7790+}
7791+static inline void au_debug_off(void)
7792+{
7793+ atomic_dec_if_positive(&aufs_debug);
7794+}
7795+
7796+static inline int au_debug_test(void)
7797+{
7798+ return atomic_read(&aufs_debug) > 0;
7799+}
7800+#else
7801+#define AuDebugOn(a) do {} while (0)
7802+AuStubVoid(au_debug_on, void)
7803+AuStubVoid(au_debug_off, void)
7804+AuStubInt0(au_debug_test, void)
7805+#endif /* CONFIG_AUFS_DEBUG */
7806+
7807+#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
7808+
7809+/* ---------------------------------------------------------------------- */
7810+
7811+/* debug print */
7812+
7813+#define AuDbg(fmt, ...) do { \
7814+ if (au_debug_test()) \
7815+ pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
7816+} while (0)
7817+#define AuLabel(l) AuDbg(#l "\n")
7818+#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
7819+#define AuWarn1(fmt, ...) do { \
7820+ static unsigned char _c; \
7821+ if (!_c++) \
7822+ pr_warn(fmt, ##__VA_ARGS__); \
7823+} while (0)
7824+
7825+#define AuErr1(fmt, ...) do { \
7826+ static unsigned char _c; \
7827+ if (!_c++) \
7828+ pr_err(fmt, ##__VA_ARGS__); \
7829+} while (0)
7830+
7831+#define AuIOErr1(fmt, ...) do { \
7832+ static unsigned char _c; \
7833+ if (!_c++) \
7834+ AuIOErr(fmt, ##__VA_ARGS__); \
7835+} while (0)
7836+
7837+#define AuUnsupportMsg "This operation is not supported." \
7838+ " Please report this application to aufs-users ML."
7839+#define AuUnsupport(fmt, ...) do { \
7840+ pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
7841+ dump_stack(); \
7842+} while (0)
7843+
7844+#define AuTraceErr(e) do { \
7845+ if (unlikely((e) < 0)) \
7846+ AuDbg("err %d\n", (int)(e)); \
7847+} while (0)
7848+
7849+#define AuTraceErrPtr(p) do { \
7850+ if (IS_ERR(p)) \
7851+ AuDbg("err %ld\n", PTR_ERR(p)); \
7852+} while (0)
7853+
7854+/* dirty macros for debug print, use with "%.*s" and caution */
7855+#define AuLNPair(qstr) (qstr)->len, (qstr)->name
7856+
7857+/* ---------------------------------------------------------------------- */
7858+
7859+struct dentry;
7860+#ifdef CONFIG_AUFS_DEBUG
7861+extern struct mutex au_dbg_mtx;
7862+extern char *au_plevel;
7863+struct au_nhash;
7864+void au_dpri_whlist(struct au_nhash *whlist);
7865+struct au_vdir;
7866+void au_dpri_vdir(struct au_vdir *vdir);
7867+struct inode;
7868+void au_dpri_inode(struct inode *inode);
7869+void au_dpri_dalias(struct inode *inode);
7870+void au_dpri_dentry(struct dentry *dentry);
7871+struct file;
7872+void au_dpri_file(struct file *filp);
7873+struct super_block;
7874+void au_dpri_sb(struct super_block *sb);
7875+
7876+#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
7877+void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
7878+void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
7879+void au_dbg_verify_kthread(void);
7880+
7881+int __init au_debug_init(void);
7882+
7883+#define AuDbgWhlist(w) do { \
7884+ mutex_lock(&au_dbg_mtx); \
7885+ AuDbg(#w "\n"); \
7886+ au_dpri_whlist(w); \
7887+ mutex_unlock(&au_dbg_mtx); \
7888+} while (0)
7889+
7890+#define AuDbgVdir(v) do { \
7891+ mutex_lock(&au_dbg_mtx); \
7892+ AuDbg(#v "\n"); \
7893+ au_dpri_vdir(v); \
7894+ mutex_unlock(&au_dbg_mtx); \
7895+} while (0)
7896+
7897+#define AuDbgInode(i) do { \
7898+ mutex_lock(&au_dbg_mtx); \
7899+ AuDbg(#i "\n"); \
7900+ au_dpri_inode(i); \
7901+ mutex_unlock(&au_dbg_mtx); \
7902+} while (0)
7903+
7904+#define AuDbgDAlias(i) do { \
7905+ mutex_lock(&au_dbg_mtx); \
7906+ AuDbg(#i "\n"); \
7907+ au_dpri_dalias(i); \
7908+ mutex_unlock(&au_dbg_mtx); \
7909+} while (0)
7910+
7911+#define AuDbgDentry(d) do { \
7912+ mutex_lock(&au_dbg_mtx); \
7913+ AuDbg(#d "\n"); \
7914+ au_dpri_dentry(d); \
7915+ mutex_unlock(&au_dbg_mtx); \
7916+} while (0)
7917+
7918+#define AuDbgFile(f) do { \
7919+ mutex_lock(&au_dbg_mtx); \
7920+ AuDbg(#f "\n"); \
7921+ au_dpri_file(f); \
7922+ mutex_unlock(&au_dbg_mtx); \
7923+} while (0)
7924+
7925+#define AuDbgSb(sb) do { \
7926+ mutex_lock(&au_dbg_mtx); \
7927+ AuDbg(#sb "\n"); \
7928+ au_dpri_sb(sb); \
7929+ mutex_unlock(&au_dbg_mtx); \
7930+} while (0)
7931+
7932+#define AuDbgSym(addr) do { \
7933+ char sym[KSYM_SYMBOL_LEN]; \
7934+ sprint_symbol(sym, (unsigned long)addr); \
7935+ AuDbg("%s\n", sym); \
7936+} while (0)
7937+#else
7938+AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
7939+AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
7940+AuStubVoid(au_dbg_verify_kthread, void)
7941+AuStubInt0(__init au_debug_init, void)
7942+
7943+#define AuDbgWhlist(w) do {} while (0)
7944+#define AuDbgVdir(v) do {} while (0)
7945+#define AuDbgInode(i) do {} while (0)
7946+#define AuDbgDAlias(i) do {} while (0)
7947+#define AuDbgDentry(d) do {} while (0)
7948+#define AuDbgFile(f) do {} while (0)
7949+#define AuDbgSb(sb) do {} while (0)
7950+#define AuDbgSym(addr) do {} while (0)
7951+#endif /* CONFIG_AUFS_DEBUG */
7952+
7953+/* ---------------------------------------------------------------------- */
7954+
7955+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
7956+int __init au_sysrq_init(void);
7957+void au_sysrq_fin(void);
7958+
7959+#ifdef CONFIG_HW_CONSOLE
7960+#define au_dbg_blocked() do { \
7961+ WARN_ON(1); \
7962+ handle_sysrq('w'); \
7963+} while (0)
7964+#else
7965+AuStubVoid(au_dbg_blocked, void)
7966+#endif
7967+
7968+#else
7969+AuStubInt0(__init au_sysrq_init, void)
7970+AuStubVoid(au_sysrq_fin, void)
7971+AuStubVoid(au_dbg_blocked, void)
7972+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
7973+
7974+#endif /* __KERNEL__ */
7975+#endif /* __AUFS_DEBUG_H__ */
7976diff -urN /usr/share/empty/fs/aufs/dentry.c linux/fs/aufs/dentry.c
7977--- /usr/share/empty/fs/aufs/dentry.c 1970-01-01 01:00:00.000000000 +0100
7978+++ linux/fs/aufs/dentry.c 2022-11-05 23:02:18.962555950 +0100
7979@@ -0,0 +1,1168 @@
7980+// SPDX-License-Identifier: GPL-2.0
7981+/*
7982+ * Copyright (C) 2005-2022 Junjiro R. Okajima
7983+ *
7984+ * This program is free software; you can redistribute it and/or modify
7985+ * it under the terms of the GNU General Public License as published by
7986+ * the Free Software Foundation; either version 2 of the License, or
7987+ * (at your option) any later version.
7988+ *
7989+ * This program is distributed in the hope that it will be useful,
7990+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
7991+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7992+ * GNU General Public License for more details.
7993+ *
7994+ * You should have received a copy of the GNU General Public License
7995+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
7996+ */
7997+
7998+/*
7999+ * lookup and dentry operations
8000+ */
8001+
8002+#include <linux/iversion.h>
8003+#include "aufs.h"
8004+
8005+/*
8006+ * returns positive/negative dentry, NULL or an error.
8007+ * NULL means whiteout-ed or not-found.
8008+ */
8009+static struct dentry*
8010+au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
8011+ aufs_bindex_t bindex, struct au_do_lookup_args *args)
8012+{
8013+ struct dentry *h_dentry;
8014+ struct inode *h_inode;
8015+ struct au_branch *br;
8016+ struct user_namespace *h_userns;
8017+ struct path h_path;
8018+ int wh_found, opq;
8019+ unsigned char wh_able;
8020+ const unsigned char allow_neg = !!au_ftest_lkup(args->flags, ALLOW_NEG);
8021+ const unsigned char ignore_perm = !!au_ftest_lkup(args->flags,
8022+ IGNORE_PERM);
8023+
8024+ wh_found = 0;
8025+ br = au_sbr(dentry->d_sb, bindex);
8026+ h_path.dentry = h_parent;
8027+ h_path.mnt = au_br_mnt(br);
8028+ h_userns = au_br_userns(br);
8029+ wh_able = !!au_br_whable(br->br_perm);
8030+ if (wh_able)
8031+ wh_found = au_wh_test(h_userns, &h_path, &args->whname,
8032+ ignore_perm);
8033+ h_dentry = ERR_PTR(wh_found);
8034+ if (!wh_found)
8035+ goto real_lookup;
8036+ if (unlikely(wh_found < 0))
8037+ goto out;
8038+
8039+ /* We found a whiteout */
8040+ /* au_set_dbbot(dentry, bindex); */
8041+ au_set_dbwh(dentry, bindex);
8042+ if (!allow_neg)
8043+ return NULL; /* success */
8044+
8045+real_lookup:
8046+ if (!ignore_perm)
8047+ h_dentry = vfsub_lkup_one(args->name, &h_path);
8048+ else
8049+ h_dentry = au_sio_lkup_one(h_userns, args->name, &h_path);
8050+ if (IS_ERR(h_dentry)) {
8051+ if (PTR_ERR(h_dentry) == -ENAMETOOLONG
8052+ && !allow_neg)
8053+ h_dentry = NULL;
8054+ goto out;
8055+ }
8056+
8057+ h_inode = d_inode(h_dentry);
8058+ if (d_is_negative(h_dentry)) {
8059+ if (!allow_neg)
8060+ goto out_neg;
8061+ } else if (wh_found
8062+ || (args->type && args->type != (h_inode->i_mode & S_IFMT)))
8063+ goto out_neg;
8064+ else if (au_ftest_lkup(args->flags, DIRREN)
8065+ /* && h_inode */
8066+ && !au_dr_lkup_h_ino(args, bindex, h_inode->i_ino)) {
8067+ AuDbg("b%d %pd ignored hi%llu\n", bindex, h_dentry,
8068+ (unsigned long long)h_inode->i_ino);
8069+ goto out_neg;
8070+ }
8071+
8072+ if (au_dbbot(dentry) <= bindex)
8073+ au_set_dbbot(dentry, bindex);
8074+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
8075+ au_set_dbtop(dentry, bindex);
8076+ au_set_h_dptr(dentry, bindex, h_dentry);
8077+
8078+ if (!d_is_dir(h_dentry)
8079+ || !wh_able
8080+ || (d_really_is_positive(dentry) && !d_is_dir(dentry)))
8081+ goto out; /* success */
8082+
8083+ h_path.dentry = h_dentry;
8084+ inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
8085+ opq = au_diropq_test(h_userns, &h_path);
8086+ inode_unlock_shared(h_inode);
8087+ if (opq > 0)
8088+ au_set_dbdiropq(dentry, bindex);
8089+ else if (unlikely(opq < 0)) {
8090+ au_set_h_dptr(dentry, bindex, NULL);
8091+ h_dentry = ERR_PTR(opq);
8092+ }
8093+ goto out;
8094+
8095+out_neg:
8096+ dput(h_dentry);
8097+ h_dentry = NULL;
8098+out:
8099+ return h_dentry;
8100+}
8101+
8102+static int au_test_shwh(struct super_block *sb, const struct qstr *name)
8103+{
8104+ if (unlikely(!au_opt_test(au_mntflags(sb), SHWH)
8105+ && !strncmp(name->name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)))
8106+ return -EPERM;
8107+ return 0;
8108+}
8109+
8110+/*
8111+ * returns the number of lower positive dentries,
8112+ * otherwise an error.
8113+ * can be called at unlinking with @type is zero.
8114+ */
8115+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
8116+ unsigned int flags)
8117+{
8118+ int npositive, err;
8119+ aufs_bindex_t bindex, btail, bdiropq;
8120+ unsigned char isdir, dirperm1, dirren;
8121+ struct au_do_lookup_args args = {
8122+ .flags = flags,
8123+ .name = &dentry->d_name
8124+ };
8125+ struct dentry *parent;
8126+ struct super_block *sb;
8127+
8128+ sb = dentry->d_sb;
8129+ err = au_test_shwh(sb, args.name);
8130+ if (unlikely(err))
8131+ goto out;
8132+
8133+ err = au_wh_name_alloc(&args.whname, args.name);
8134+ if (unlikely(err))
8135+ goto out;
8136+
8137+ isdir = !!d_is_dir(dentry);
8138+ dirperm1 = !!au_opt_test(au_mntflags(sb), DIRPERM1);
8139+ dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
8140+ if (dirren)
8141+ au_fset_lkup(args.flags, DIRREN);
8142+
8143+ npositive = 0;
8144+ parent = dget_parent(dentry);
8145+ btail = au_dbtaildir(parent);
8146+ for (bindex = btop; bindex <= btail; bindex++) {
8147+ struct dentry *h_parent, *h_dentry;
8148+ struct inode *h_inode, *h_dir;
8149+ struct au_branch *br;
8150+
8151+ h_dentry = au_h_dptr(dentry, bindex);
8152+ if (h_dentry) {
8153+ if (d_is_positive(h_dentry))
8154+ npositive++;
8155+ break;
8156+ }
8157+ h_parent = au_h_dptr(parent, bindex);
8158+ if (!h_parent || !d_is_dir(h_parent))
8159+ continue;
8160+
8161+ if (dirren) {
8162+ /* if the inum matches, then use the prepared name */
8163+ err = au_dr_lkup_name(&args, bindex);
8164+ if (unlikely(err))
8165+ goto out_parent;
8166+ }
8167+
8168+ h_dir = d_inode(h_parent);
8169+ inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
8170+ h_dentry = au_do_lookup(h_parent, dentry, bindex, &args);
8171+ inode_unlock_shared(h_dir);
8172+ err = PTR_ERR(h_dentry);
8173+ if (IS_ERR(h_dentry))
8174+ goto out_parent;
8175+ if (h_dentry)
8176+ au_fclr_lkup(args.flags, ALLOW_NEG);
8177+ if (dirperm1)
8178+ au_fset_lkup(args.flags, IGNORE_PERM);
8179+
8180+ if (au_dbwh(dentry) == bindex)
8181+ break;
8182+ if (!h_dentry)
8183+ continue;
8184+ if (d_is_negative(h_dentry))
8185+ continue;
8186+ h_inode = d_inode(h_dentry);
8187+ npositive++;
8188+ if (!args.type)
8189+ args.type = h_inode->i_mode & S_IFMT;
8190+ if (args.type != S_IFDIR)
8191+ break;
8192+ else if (isdir) {
8193+ /* the type of lower may be different */
8194+ bdiropq = au_dbdiropq(dentry);
8195+ if (bdiropq >= 0 && bdiropq <= bindex)
8196+ break;
8197+ }
8198+ br = au_sbr(sb, bindex);
8199+ if (dirren
8200+ && au_dr_hino_test_add(&br->br_dirren, h_inode->i_ino,
8201+ /*add_ent*/NULL)) {
8202+ /* prepare next name to lookup */
8203+ err = au_dr_lkup(&args, dentry, bindex);
8204+ if (unlikely(err))
8205+ goto out_parent;
8206+ }
8207+ }
8208+
8209+ if (npositive) {
8210+ AuLabel(positive);
8211+ au_update_dbtop(dentry);
8212+ }
8213+ err = npositive;
8214+ if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
8215+ && au_dbtop(dentry) < 0)) {
8216+ err = -EIO;
8217+ AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
8218+ dentry, err);
8219+ }
8220+
8221+out_parent:
8222+ dput(parent);
8223+ au_kfree_try_rcu(args.whname.name);
8224+ if (dirren)
8225+ au_dr_lkup_fin(&args);
8226+out:
8227+ return err;
8228+}
8229+
8230+struct dentry *au_sio_lkup_one(struct user_namespace *userns, struct qstr *name,
8231+ struct path *ppath)
8232+{
8233+ struct dentry *dentry;
8234+ int wkq_err;
8235+
8236+ if (!au_test_h_perm_sio(userns, d_inode(ppath->dentry), MAY_EXEC))
8237+ dentry = vfsub_lkup_one(name, ppath);
8238+ else {
8239+ struct vfsub_lkup_one_args args = {
8240+ .errp = &dentry,
8241+ .name = name,
8242+ .ppath = ppath
8243+ };
8244+
8245+ wkq_err = au_wkq_wait(vfsub_call_lkup_one, &args);
8246+ if (unlikely(wkq_err))
8247+ dentry = ERR_PTR(wkq_err);
8248+ }
8249+
8250+ return dentry;
8251+}
8252+
8253+/*
8254+ * lookup @dentry on @bindex which should be negative.
8255+ */
8256+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
8257+{
8258+ int err;
8259+ struct dentry *parent, *h_dentry;
8260+ struct au_branch *br;
8261+ struct user_namespace *h_userns;
8262+ struct path h_ppath;
8263+
8264+ parent = dget_parent(dentry);
8265+ br = au_sbr(dentry->d_sb, bindex);
8266+ h_ppath.dentry = au_h_dptr(parent, bindex);
8267+ h_ppath.mnt = au_br_mnt(br);
8268+ h_userns = au_br_userns(br);
8269+ if (wh)
8270+ h_dentry = au_whtmp_lkup(h_ppath.dentry, br, &dentry->d_name);
8271+ else
8272+ h_dentry = au_sio_lkup_one(h_userns, &dentry->d_name, &h_ppath);
8273+ err = PTR_ERR(h_dentry);
8274+ if (IS_ERR(h_dentry))
8275+ goto out;
8276+ if (unlikely(d_is_positive(h_dentry))) {
8277+ err = -EIO;
8278+ AuIOErr("%pd should be negative on b%d.\n", h_dentry, bindex);
8279+ dput(h_dentry);
8280+ goto out;
8281+ }
8282+
8283+ err = 0;
8284+ if (bindex < au_dbtop(dentry))
8285+ au_set_dbtop(dentry, bindex);
8286+ if (au_dbbot(dentry) < bindex)
8287+ au_set_dbbot(dentry, bindex);
8288+ au_set_h_dptr(dentry, bindex, h_dentry);
8289+
8290+out:
8291+ dput(parent);
8292+ return err;
8293+}
8294+
8295+/* ---------------------------------------------------------------------- */
8296+
8297+/* subset of struct inode */
8298+struct au_iattr {
8299+ unsigned long i_ino;
8300+ /* unsigned int i_nlink; */
8301+ kuid_t i_uid;
8302+ kgid_t i_gid;
8303+ u64 i_version;
8304+/*
8305+ loff_t i_size;
8306+ blkcnt_t i_blocks;
8307+*/
8308+ umode_t i_mode;
8309+};
8310+
8311+static void au_iattr_save(struct au_iattr *ia, struct inode *h_inode)
8312+{
8313+ ia->i_ino = h_inode->i_ino;
8314+ /* ia->i_nlink = h_inode->i_nlink; */
8315+ ia->i_uid = h_inode->i_uid;
8316+ ia->i_gid = h_inode->i_gid;
8317+ ia->i_version = inode_query_iversion(h_inode);
8318+/*
8319+ ia->i_size = h_inode->i_size;
8320+ ia->i_blocks = h_inode->i_blocks;
8321+*/
8322+ ia->i_mode = (h_inode->i_mode & S_IFMT);
8323+}
8324+
8325+static int au_iattr_test(struct au_iattr *ia, struct inode *h_inode)
8326+{
8327+ return ia->i_ino != h_inode->i_ino
8328+ /* || ia->i_nlink != h_inode->i_nlink */
8329+ || !uid_eq(ia->i_uid, h_inode->i_uid)
8330+ || !gid_eq(ia->i_gid, h_inode->i_gid)
8331+ || !inode_eq_iversion(h_inode, ia->i_version)
8332+/*
8333+ || ia->i_size != h_inode->i_size
8334+ || ia->i_blocks != h_inode->i_blocks
8335+*/
8336+ || ia->i_mode != (h_inode->i_mode & S_IFMT);
8337+}
8338+
8339+static int au_h_verify_dentry(struct dentry *h_dentry, struct dentry *h_parent,
8340+ struct au_branch *br)
8341+{
8342+ int err;
8343+ struct au_iattr ia;
8344+ struct inode *h_inode;
8345+ struct dentry *h_d;
8346+ struct super_block *h_sb;
8347+ struct path h_ppath;
8348+
8349+ err = 0;
8350+ memset(&ia, -1, sizeof(ia));
8351+ h_sb = h_dentry->d_sb;
8352+ h_inode = NULL;
8353+ if (d_is_positive(h_dentry)) {
8354+ h_inode = d_inode(h_dentry);
8355+ au_iattr_save(&ia, h_inode);
8356+ } else if (au_test_nfs(h_sb) || au_test_fuse(h_sb))
8357+ /* nfs d_revalidate may return 0 for negative dentry */
8358+ /* fuse d_revalidate always return 0 for negative dentry */
8359+ goto out;
8360+
8361+ /* main purpose is namei.c:cached_lookup() and d_revalidate */
8362+ h_ppath.dentry = h_parent;
8363+ h_ppath.mnt = au_br_mnt(br);
8364+ h_d = vfsub_lkup_one(&h_dentry->d_name, &h_ppath);
8365+ err = PTR_ERR(h_d);
8366+ if (IS_ERR(h_d))
8367+ goto out;
8368+
8369+ err = 0;
8370+ if (unlikely(h_d != h_dentry
8371+ || d_inode(h_d) != h_inode
8372+ || (h_inode && au_iattr_test(&ia, h_inode))))
8373+ err = au_busy_or_stale();
8374+ dput(h_d);
8375+
8376+out:
8377+ AuTraceErr(err);
8378+ return err;
8379+}
8380+
8381+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
8382+ struct dentry *h_parent, struct au_branch *br)
8383+{
8384+ int err;
8385+
8386+ err = 0;
8387+ if (udba == AuOpt_UDBA_REVAL
8388+ && !au_test_fs_remote(h_dentry->d_sb)) {
8389+ IMustLock(h_dir);
8390+ err = (d_inode(h_dentry->d_parent) != h_dir);
8391+ } else if (udba != AuOpt_UDBA_NONE)
8392+ err = au_h_verify_dentry(h_dentry, h_parent, br);
8393+
8394+ return err;
8395+}
8396+
8397+/* ---------------------------------------------------------------------- */
8398+
8399+static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
8400+{
8401+ int err;
8402+ aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
8403+ struct au_hdentry tmp, *p, *q;
8404+ struct au_dinfo *dinfo;
8405+ struct super_block *sb;
8406+
8407+ DiMustWriteLock(dentry);
8408+
8409+ sb = dentry->d_sb;
8410+ dinfo = au_di(dentry);
8411+ bbot = dinfo->di_bbot;
8412+ bwh = dinfo->di_bwh;
8413+ bdiropq = dinfo->di_bdiropq;
8414+ bindex = dinfo->di_btop;
8415+ p = au_hdentry(dinfo, bindex);
8416+ for (; bindex <= bbot; bindex++, p++) {
8417+ if (!p->hd_dentry)
8418+ continue;
8419+
8420+ new_bindex = au_br_index(sb, p->hd_id);
8421+ if (new_bindex == bindex)
8422+ continue;
8423+
8424+ if (dinfo->di_bwh == bindex)
8425+ bwh = new_bindex;
8426+ if (dinfo->di_bdiropq == bindex)
8427+ bdiropq = new_bindex;
8428+ if (new_bindex < 0) {
8429+ au_hdput(p);
8430+ p->hd_dentry = NULL;
8431+ continue;
8432+ }
8433+
8434+ /* swap two lower dentries, and loop again */
8435+ q = au_hdentry(dinfo, new_bindex);
8436+ tmp = *q;
8437+ *q = *p;
8438+ *p = tmp;
8439+ if (tmp.hd_dentry) {
8440+ bindex--;
8441+ p--;
8442+ }
8443+ }
8444+
8445+ dinfo->di_bwh = -1;
8446+ if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
8447+ dinfo->di_bwh = bwh;
8448+
8449+ dinfo->di_bdiropq = -1;
8450+ if (bdiropq >= 0
8451+ && bdiropq <= au_sbbot(sb)
8452+ && au_sbr_whable(sb, bdiropq))
8453+ dinfo->di_bdiropq = bdiropq;
8454+
8455+ err = -EIO;
8456+ dinfo->di_btop = -1;
8457+ dinfo->di_bbot = -1;
8458+ bbot = au_dbbot(parent);
8459+ bindex = 0;
8460+ p = au_hdentry(dinfo, bindex);
8461+ for (; bindex <= bbot; bindex++, p++)
8462+ if (p->hd_dentry) {
8463+ dinfo->di_btop = bindex;
8464+ break;
8465+ }
8466+
8467+ if (dinfo->di_btop >= 0) {
8468+ bindex = bbot;
8469+ p = au_hdentry(dinfo, bindex);
8470+ for (; bindex >= 0; bindex--, p--)
8471+ if (p->hd_dentry) {
8472+ dinfo->di_bbot = bindex;
8473+ err = 0;
8474+ break;
8475+ }
8476+ }
8477+
8478+ return err;
8479+}
8480+
8481+static void au_do_hide(struct dentry *dentry)
8482+{
8483+ struct inode *inode;
8484+
8485+ if (d_really_is_positive(dentry)) {
8486+ inode = d_inode(dentry);
8487+ if (!d_is_dir(dentry)) {
8488+ if (inode->i_nlink && !d_unhashed(dentry))
8489+ drop_nlink(inode);
8490+ } else {
8491+ clear_nlink(inode);
8492+ /* stop next lookup */
8493+ inode->i_flags |= S_DEAD;
8494+ }
8495+ smp_mb(); /* necessary? */
8496+ }
8497+ d_drop(dentry);
8498+}
8499+
8500+static int au_hide_children(struct dentry *parent)
8501+{
8502+ int err, i, j, ndentry;
8503+ struct au_dcsub_pages dpages;
8504+ struct au_dpage *dpage;
8505+ struct dentry *dentry;
8506+
8507+ err = au_dpages_init(&dpages, GFP_NOFS);
8508+ if (unlikely(err))
8509+ goto out;
8510+ err = au_dcsub_pages(&dpages, parent, NULL, NULL);
8511+ if (unlikely(err))
8512+ goto out_dpages;
8513+
8514+ /* in reverse order */
8515+ for (i = dpages.ndpage - 1; i >= 0; i--) {
8516+ dpage = dpages.dpages + i;
8517+ ndentry = dpage->ndentry;
8518+ for (j = ndentry - 1; j >= 0; j--) {
8519+ dentry = dpage->dentries[j];
8520+ if (dentry != parent)
8521+ au_do_hide(dentry);
8522+ }
8523+ }
8524+
8525+out_dpages:
8526+ au_dpages_free(&dpages);
8527+out:
8528+ return err;
8529+}
8530+
8531+static void au_hide(struct dentry *dentry)
8532+{
8533+ int err;
8534+
8535+ AuDbgDentry(dentry);
8536+ if (d_is_dir(dentry)) {
8537+ /* shrink_dcache_parent(dentry); */
8538+ err = au_hide_children(dentry);
8539+ if (unlikely(err))
8540+ AuIOErr("%pd, failed hiding children, ignored %d\n",
8541+ dentry, err);
8542+ }
8543+ au_do_hide(dentry);
8544+}
8545+
8546+/*
8547+ * By adding a dirty branch, a cached dentry may be affected in various ways.
8548+ *
8549+ * a dirty branch is added
8550+ * - on the top of layers
8551+ * - in the middle of layers
8552+ * - to the bottom of layers
8553+ *
8554+ * on the added branch there exists
8555+ * - a whiteout
8556+ * - a diropq
8557+ * - a same named entry
8558+ * + exist
8559+ * * negative --> positive
8560+ * * positive --> positive
8561+ * - type is unchanged
8562+ * - type is changed
8563+ * + doesn't exist
8564+ * * negative --> negative
8565+ * * positive --> negative (rejected by au_br_del() for non-dir case)
8566+ * - none
8567+ */
8568+static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
8569+ struct au_dinfo *tmp)
8570+{
8571+ int err;
8572+ aufs_bindex_t bindex, bbot;
8573+ struct {
8574+ struct dentry *dentry;
8575+ struct inode *inode;
8576+ mode_t mode;
8577+ } orig_h, tmp_h = {
8578+ .dentry = NULL
8579+ };
8580+ struct au_hdentry *hd;
8581+ struct inode *inode, *h_inode;
8582+ struct dentry *h_dentry;
8583+
8584+ err = 0;
8585+ AuDebugOn(dinfo->di_btop < 0);
8586+ orig_h.mode = 0;
8587+ orig_h.dentry = au_hdentry(dinfo, dinfo->di_btop)->hd_dentry;
8588+ orig_h.inode = NULL;
8589+ if (d_is_positive(orig_h.dentry)) {
8590+ orig_h.inode = d_inode(orig_h.dentry);
8591+ orig_h.mode = orig_h.inode->i_mode & S_IFMT;
8592+ }
8593+ if (tmp->di_btop >= 0) {
8594+ tmp_h.dentry = au_hdentry(tmp, tmp->di_btop)->hd_dentry;
8595+ if (d_is_positive(tmp_h.dentry)) {
8596+ tmp_h.inode = d_inode(tmp_h.dentry);
8597+ tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
8598+ }
8599+ }
8600+
8601+ inode = NULL;
8602+ if (d_really_is_positive(dentry))
8603+ inode = d_inode(dentry);
8604+ if (!orig_h.inode) {
8605+ AuDbg("negative originally\n");
8606+ if (inode) {
8607+ au_hide(dentry);
8608+ goto out;
8609+ }
8610+ AuDebugOn(inode);
8611+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
8612+ AuDebugOn(dinfo->di_bdiropq != -1);
8613+
8614+ if (!tmp_h.inode) {
8615+ AuDbg("negative --> negative\n");
8616+ /* should have only one negative lower */
8617+ if (tmp->di_btop >= 0
8618+ && tmp->di_btop < dinfo->di_btop) {
8619+ AuDebugOn(tmp->di_btop != tmp->di_bbot);
8620+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
8621+ au_set_h_dptr(dentry, dinfo->di_btop, NULL);
8622+ au_di_cp(dinfo, tmp);
8623+ hd = au_hdentry(tmp, tmp->di_btop);
8624+ au_set_h_dptr(dentry, tmp->di_btop,
8625+ dget(hd->hd_dentry));
8626+ }
8627+ au_dbg_verify_dinode(dentry);
8628+ } else {
8629+ AuDbg("negative --> positive\n");
8630+ /*
8631+ * similar to the behaviour of creating with bypassing
8632+ * aufs.
8633+ * unhash it in order to force an error in the
8634+ * succeeding create operation.
8635+ * we should not set S_DEAD here.
8636+ */
8637+ d_drop(dentry);
8638+ /* au_di_swap(tmp, dinfo); */
8639+ au_dbg_verify_dinode(dentry);
8640+ }
8641+ } else {
8642+ AuDbg("positive originally\n");
8643+ /* inode may be NULL */
8644+ AuDebugOn(inode && (inode->i_mode & S_IFMT) != orig_h.mode);
8645+ if (!tmp_h.inode) {
8646+ AuDbg("positive --> negative\n");
8647+ /* or bypassing aufs */
8648+ au_hide(dentry);
8649+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
8650+ dinfo->di_bwh = tmp->di_bwh;
8651+ if (inode)
8652+ err = au_refresh_hinode_self(inode);
8653+ au_dbg_verify_dinode(dentry);
8654+ } else if (orig_h.mode == tmp_h.mode) {
8655+ AuDbg("positive --> positive, same type\n");
8656+ if (!S_ISDIR(orig_h.mode)
8657+ && dinfo->di_btop > tmp->di_btop) {
8658+ /*
8659+ * similar to the behaviour of removing and
8660+ * creating.
8661+ */
8662+ au_hide(dentry);
8663+ if (inode)
8664+ err = au_refresh_hinode_self(inode);
8665+ au_dbg_verify_dinode(dentry);
8666+ } else {
8667+ /* fill empty slots */
8668+ if (dinfo->di_btop > tmp->di_btop)
8669+ dinfo->di_btop = tmp->di_btop;
8670+ if (dinfo->di_bbot < tmp->di_bbot)
8671+ dinfo->di_bbot = tmp->di_bbot;
8672+ dinfo->di_bwh = tmp->di_bwh;
8673+ dinfo->di_bdiropq = tmp->di_bdiropq;
8674+ bbot = dinfo->di_bbot;
8675+ bindex = tmp->di_btop;
8676+ hd = au_hdentry(tmp, bindex);
8677+ for (; bindex <= bbot; bindex++, hd++) {
8678+ if (au_h_dptr(dentry, bindex))
8679+ continue;
8680+ h_dentry = hd->hd_dentry;
8681+ if (!h_dentry)
8682+ continue;
8683+ AuDebugOn(d_is_negative(h_dentry));
8684+ h_inode = d_inode(h_dentry);
8685+ AuDebugOn(orig_h.mode
8686+ != (h_inode->i_mode
8687+ & S_IFMT));
8688+ au_set_h_dptr(dentry, bindex,
8689+ dget(h_dentry));
8690+ }
8691+ if (inode)
8692+ err = au_refresh_hinode(inode, dentry);
8693+ au_dbg_verify_dinode(dentry);
8694+ }
8695+ } else {
8696+ AuDbg("positive --> positive, different type\n");
8697+ /* similar to the behaviour of removing and creating */
8698+ au_hide(dentry);
8699+ if (inode)
8700+ err = au_refresh_hinode_self(inode);
8701+ au_dbg_verify_dinode(dentry);
8702+ }
8703+ }
8704+
8705+out:
8706+ return err;
8707+}
8708+
8709+void au_refresh_dop(struct dentry *dentry, int force_reval)
8710+{
8711+ const struct dentry_operations *dop
8712+ = force_reval ? &aufs_dop : dentry->d_sb->s_d_op;
8713+ static const unsigned int mask
8714+ = DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE;
8715+
8716+ BUILD_BUG_ON(sizeof(mask) != sizeof(dentry->d_flags));
8717+
8718+ if (dentry->d_op == dop)
8719+ return;
8720+
8721+ AuDbg("%pd\n", dentry);
8722+ spin_lock(&dentry->d_lock);
8723+ if (dop == &aufs_dop)
8724+ dentry->d_flags |= mask;
8725+ else
8726+ dentry->d_flags &= ~mask;
8727+ dentry->d_op = dop;
8728+ spin_unlock(&dentry->d_lock);
8729+}
8730+
8731+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
8732+{
8733+ int err, ebrange, nbr;
8734+ unsigned int sigen;
8735+ struct au_dinfo *dinfo, *tmp;
8736+ struct super_block *sb;
8737+ struct inode *inode;
8738+
8739+ DiMustWriteLock(dentry);
8740+ AuDebugOn(IS_ROOT(dentry));
8741+ AuDebugOn(d_really_is_negative(parent));
8742+
8743+ sb = dentry->d_sb;
8744+ sigen = au_sigen(sb);
8745+ err = au_digen_test(parent, sigen);
8746+ if (unlikely(err))
8747+ goto out;
8748+
8749+ nbr = au_sbbot(sb) + 1;
8750+ dinfo = au_di(dentry);
8751+ err = au_di_realloc(dinfo, nbr, /*may_shrink*/0);
8752+ if (unlikely(err))
8753+ goto out;
8754+ ebrange = au_dbrange_test(dentry);
8755+ if (!ebrange)
8756+ ebrange = au_do_refresh_hdentry(dentry, parent);
8757+
8758+ if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
8759+ AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
8760+ if (d_really_is_positive(dentry)) {
8761+ inode = d_inode(dentry);
8762+ err = au_refresh_hinode_self(inode);
8763+ }
8764+ au_dbg_verify_dinode(dentry);
8765+ if (!err)
8766+ goto out_dgen; /* success */
8767+ goto out;
8768+ }
8769+
8770+ /* temporary dinfo */
8771+ AuDbgDentry(dentry);
8772+ err = -ENOMEM;
8773+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
8774+ if (unlikely(!tmp))
8775+ goto out;
8776+ au_di_swap(tmp, dinfo);
8777+ /* returns the number of positive dentries */
8778+ /*
8779+ * if current working dir is removed, it returns an error.
8780+ * but the dentry is legal.
8781+ */
8782+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
8783+ AuDbgDentry(dentry);
8784+ au_di_swap(tmp, dinfo);
8785+ if (err == -ENOENT)
8786+ err = 0;
8787+ if (err >= 0) {
8788+ /* compare/refresh by dinfo */
8789+ AuDbgDentry(dentry);
8790+ err = au_refresh_by_dinfo(dentry, dinfo, tmp);
8791+ au_dbg_verify_dinode(dentry);
8792+ AuTraceErr(err);
8793+ }
8794+ au_di_realloc(dinfo, nbr, /*may_shrink*/1); /* harmless if err */
8795+ au_rw_write_unlock(&tmp->di_rwsem);
8796+ au_di_free(tmp);
8797+ if (unlikely(err))
8798+ goto out;
8799+
8800+out_dgen:
8801+ au_update_digen(dentry);
8802+out:
8803+ if (unlikely(err && !(dentry->d_flags & DCACHE_NFSFS_RENAMED))) {
8804+ AuIOErr("failed refreshing %pd, %d\n", dentry, err);
8805+ AuDbgDentry(dentry);
8806+ }
8807+ AuTraceErr(err);
8808+ return err;
8809+}
8810+
8811+static int au_do_h_d_reval(struct dentry *h_dentry, unsigned int flags,
8812+ struct dentry *dentry, aufs_bindex_t bindex)
8813+{
8814+ int err, valid;
8815+
8816+ err = 0;
8817+ if (!(h_dentry->d_flags & DCACHE_OP_REVALIDATE))
8818+ goto out;
8819+
8820+ AuDbg("b%d\n", bindex);
8821+ /*
8822+ * gave up supporting LOOKUP_CREATE/OPEN for lower fs,
8823+ * due to whiteout and branch permission.
8824+ */
8825+ flags &= ~(/*LOOKUP_PARENT |*/ LOOKUP_OPEN | LOOKUP_CREATE
8826+ | LOOKUP_FOLLOW | LOOKUP_EXCL);
8827+ /* it may return tri-state */
8828+ valid = h_dentry->d_op->d_revalidate(h_dentry, flags);
8829+
8830+ if (unlikely(valid < 0))
8831+ err = valid;
8832+ else if (!valid)
8833+ err = -EINVAL;
8834+
8835+out:
8836+ AuTraceErr(err);
8837+ return err;
8838+}
8839+
8840+/* todo: remove this */
8841+static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
8842+ unsigned int flags, int do_udba, int dirren)
8843+{
8844+ int err;
8845+ umode_t mode, h_mode;
8846+ aufs_bindex_t bindex, btail, btop, ibs, ibe;
8847+ unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
8848+ struct inode *h_inode, *h_cached_inode;
8849+ struct dentry *h_dentry;
8850+ struct qstr *name, *h_name;
8851+
8852+ err = 0;
8853+ plus = 0;
8854+ mode = 0;
8855+ ibs = -1;
8856+ ibe = -1;
8857+ unhashed = !!d_unhashed(dentry);
8858+ is_root = !!IS_ROOT(dentry);
8859+ name = &dentry->d_name;
8860+ tmpfile = au_di(dentry)->di_tmpfile;
8861+
8862+ /*
8863+ * Theoretically, REVAL test should be unnecessary in case of
8864+ * {FS,I}NOTIFY.
8865+ * But {fs,i}notify doesn't fire some necessary events,
8866+ * IN_ATTRIB for atime/nlink/pageio
8867+ * Let's do REVAL test too.
8868+ */
8869+ if (do_udba && inode) {
8870+ mode = (inode->i_mode & S_IFMT);
8871+ plus = (inode->i_nlink > 0);
8872+ ibs = au_ibtop(inode);
8873+ ibe = au_ibbot(inode);
8874+ }
8875+
8876+ btop = au_dbtop(dentry);
8877+ btail = btop;
8878+ if (inode && S_ISDIR(inode->i_mode))
8879+ btail = au_dbtaildir(dentry);
8880+ for (bindex = btop; bindex <= btail; bindex++) {
8881+ h_dentry = au_h_dptr(dentry, bindex);
8882+ if (!h_dentry)
8883+ continue;
8884+
8885+ AuDbg("b%d, %pd\n", bindex, h_dentry);
8886+ h_nfs = !!au_test_nfs(h_dentry->d_sb);
8887+ spin_lock(&h_dentry->d_lock);
8888+ h_name = &h_dentry->d_name;
8889+ if (unlikely(do_udba
8890+ && !is_root
8891+ && ((!h_nfs
8892+ && (unhashed != !!d_unhashed(h_dentry)
8893+ || (!tmpfile && !dirren
8894+ && !au_qstreq(name, h_name))
8895+ ))
8896+ || (h_nfs
8897+ && !(flags & LOOKUP_OPEN)
8898+ && (h_dentry->d_flags
8899+ & DCACHE_NFSFS_RENAMED)))
8900+ )) {
8901+ int h_unhashed;
8902+
8903+ h_unhashed = d_unhashed(h_dentry);
8904+ spin_unlock(&h_dentry->d_lock);
8905+ AuDbg("unhash 0x%x 0x%x, %pd %pd\n",
8906+ unhashed, h_unhashed, dentry, h_dentry);
8907+ goto err;
8908+ }
8909+ spin_unlock(&h_dentry->d_lock);
8910+
8911+ err = au_do_h_d_reval(h_dentry, flags, dentry, bindex);
8912+ if (unlikely(err))
8913+ /* do not goto err, to keep the errno */
8914+ break;
8915+
8916+ /* todo: plink too? */
8917+ if (!do_udba)
8918+ continue;
8919+
8920+ /* UDBA tests */
8921+ if (unlikely(!!inode != d_is_positive(h_dentry)))
8922+ goto err;
8923+
8924+ h_inode = NULL;
8925+ if (d_is_positive(h_dentry))
8926+ h_inode = d_inode(h_dentry);
8927+ h_plus = plus;
8928+ h_mode = mode;
8929+ h_cached_inode = h_inode;
8930+ if (h_inode) {
8931+ h_mode = (h_inode->i_mode & S_IFMT);
8932+ h_plus = (h_inode->i_nlink > 0);
8933+ }
8934+ if (inode && ibs <= bindex && bindex <= ibe)
8935+ h_cached_inode = au_h_iptr(inode, bindex);
8936+
8937+ if (!h_nfs) {
8938+ if (unlikely(plus != h_plus && !tmpfile))
8939+ goto err;
8940+ } else {
8941+ if (unlikely(!(h_dentry->d_flags & DCACHE_NFSFS_RENAMED)
8942+ && !is_root
8943+ && !IS_ROOT(h_dentry)
8944+ && unhashed != d_unhashed(h_dentry)))
8945+ goto err;
8946+ }
8947+ if (unlikely(mode != h_mode
8948+ || h_cached_inode != h_inode))
8949+ goto err;
8950+ continue;
8951+
8952+err:
8953+ err = -EINVAL;
8954+ break;
8955+ }
8956+
8957+ AuTraceErr(err);
8958+ return err;
8959+}
8960+
8961+/* todo: consolidate with do_refresh() and au_reval_for_attr() */
8962+static int simple_reval_dpath(struct dentry *dentry, unsigned int sigen)
8963+{
8964+ int err;
8965+ struct dentry *parent;
8966+
8967+ if (!au_digen_test(dentry, sigen))
8968+ return 0;
8969+
8970+ parent = dget_parent(dentry);
8971+ di_read_lock_parent(parent, AuLock_IR);
8972+ AuDebugOn(au_digen_test(parent, sigen));
8973+ au_dbg_verify_gen(parent, sigen);
8974+ err = au_refresh_dentry(dentry, parent);
8975+ di_read_unlock(parent, AuLock_IR);
8976+ dput(parent);
8977+ AuTraceErr(err);
8978+ return err;
8979+}
8980+
8981+int au_reval_dpath(struct dentry *dentry, unsigned int sigen)
8982+{
8983+ int err;
8984+ struct dentry *d, *parent;
8985+
8986+ if (!au_ftest_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR))
8987+ return simple_reval_dpath(dentry, sigen);
8988+
8989+ /* slow loop, keep it simple and stupid */
8990+ /* cf: au_cpup_dirs() */
8991+ err = 0;
8992+ parent = NULL;
8993+ while (au_digen_test(dentry, sigen)) {
8994+ d = dentry;
8995+ while (1) {
8996+ dput(parent);
8997+ parent = dget_parent(d);
8998+ if (!au_digen_test(parent, sigen))
8999+ break;
9000+ d = parent;
9001+ }
9002+
9003+ if (d != dentry)
9004+ di_write_lock_child2(d);
9005+
9006+ /* someone might update our dentry while we were sleeping */
9007+ if (au_digen_test(d, sigen)) {
9008+ /*
9009+ * todo: consolidate with simple_reval_dpath(),
9010+ * do_refresh() and au_reval_for_attr().
9011+ */
9012+ di_read_lock_parent(parent, AuLock_IR);
9013+ err = au_refresh_dentry(d, parent);
9014+ di_read_unlock(parent, AuLock_IR);
9015+ }
9016+
9017+ if (d != dentry)
9018+ di_write_unlock(d);
9019+ dput(parent);
9020+ if (unlikely(err))
9021+ break;
9022+ }
9023+
9024+ return err;
9025+}
9026+
9027+/*
9028+ * if valid returns 1, otherwise 0.
9029+ */
9030+static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
9031+{
9032+ int valid, err;
9033+ unsigned int sigen;
9034+ unsigned char do_udba, dirren;
9035+ struct super_block *sb;
9036+ struct inode *inode;
9037+
9038+ /* todo: support rcu-walk? */
9039+ if (flags & LOOKUP_RCU)
9040+ return -ECHILD;
9041+
9042+ valid = 0;
9043+ if (unlikely(!au_di(dentry)))
9044+ goto out;
9045+
9046+ valid = 1;
9047+ sb = dentry->d_sb;
9048+ /*
9049+ * todo: very ugly
9050+ * i_mutex of parent dir may be held,
9051+ * but we should not return 'invalid' due to busy.
9052+ */
9053+ err = aufs_read_lock(dentry, AuLock_FLUSH | AuLock_DW | AuLock_NOPLM);
9054+ if (unlikely(err)) {
9055+ valid = err;
9056+ AuTraceErr(err);
9057+ goto out;
9058+ }
9059+ inode = NULL;
9060+ if (d_really_is_positive(dentry))
9061+ inode = d_inode(dentry);
9062+ if (unlikely(inode && au_is_bad_inode(inode))) {
9063+ err = -EINVAL;
9064+ AuTraceErr(err);
9065+ goto out_dgrade;
9066+ }
9067+ if (unlikely(au_dbrange_test(dentry))) {
9068+ err = -EINVAL;
9069+ AuTraceErr(err);
9070+ goto out_dgrade;
9071+ }
9072+
9073+ sigen = au_sigen(sb);
9074+ if (au_digen_test(dentry, sigen)) {
9075+ AuDebugOn(IS_ROOT(dentry));
9076+ err = au_reval_dpath(dentry, sigen);
9077+ if (unlikely(err)) {
9078+ AuTraceErr(err);
9079+ goto out_dgrade;
9080+ }
9081+ }
9082+ di_downgrade_lock(dentry, AuLock_IR);
9083+
9084+ err = -EINVAL;
9085+ if (!(flags & (LOOKUP_OPEN | LOOKUP_EMPTY))
9086+ && inode
9087+ && !(inode->i_state && I_LINKABLE)
9088+ && (IS_DEADDIR(inode) || !inode->i_nlink)) {
9089+ AuTraceErr(err);
9090+ goto out_inval;
9091+ }
9092+
9093+ do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
9094+ if (do_udba && inode) {
9095+ aufs_bindex_t btop = au_ibtop(inode);
9096+ struct inode *h_inode;
9097+
9098+ if (btop >= 0) {
9099+ h_inode = au_h_iptr(inode, btop);
9100+ if (h_inode && au_test_higen(inode, h_inode)) {
9101+ AuTraceErr(err);
9102+ goto out_inval;
9103+ }
9104+ }
9105+ }
9106+
9107+ dirren = !!au_opt_test(au_mntflags(sb), DIRREN);
9108+ err = h_d_revalidate(dentry, inode, flags, do_udba, dirren);
9109+ if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
9110+ err = -EIO;
9111+ AuDbg("both of real entry and whiteout found, %p, err %d\n",
9112+ dentry, err);
9113+ }
9114+ goto out_inval;
9115+
9116+out_dgrade:
9117+ di_downgrade_lock(dentry, AuLock_IR);
9118+out_inval:
9119+ aufs_read_unlock(dentry, AuLock_IR);
9120+ AuTraceErr(err);
9121+ valid = !err;
9122+out:
9123+ if (!valid) {
9124+ AuDbg("%pd invalid, %d\n", dentry, valid);
9125+ d_drop(dentry);
9126+ }
9127+ return valid;
9128+}
9129+
9130+static void aufs_d_release(struct dentry *dentry)
9131+{
9132+ if (au_di(dentry)) {
9133+ au_di_fin(dentry);
9134+ au_hn_di_reinit(dentry);
9135+ }
9136+}
9137+
9138+const struct dentry_operations aufs_dop = {
9139+ .d_revalidate = aufs_d_revalidate,
9140+ .d_weak_revalidate = aufs_d_revalidate,
9141+ .d_release = aufs_d_release
9142+};
9143+
9144+/* aufs_dop without d_revalidate */
9145+const struct dentry_operations aufs_dop_noreval = {
9146+ .d_release = aufs_d_release
9147+};
9148diff -urN /usr/share/empty/fs/aufs/dentry.h linux/fs/aufs/dentry.h
9149--- /usr/share/empty/fs/aufs/dentry.h 1970-01-01 01:00:00.000000000 +0100
9150+++ linux/fs/aufs/dentry.h 2022-12-17 09:21:34.796521861 +0100
9151@@ -0,0 +1,270 @@
9152+/* SPDX-License-Identifier: GPL-2.0 */
9153+/*
9154+ * Copyright (C) 2005-2022 Junjiro R. Okajima
9155+ *
9156+ * This program is free software; you can redistribute it and/or modify
9157+ * it under the terms of the GNU General Public License as published by
9158+ * the Free Software Foundation; either version 2 of the License, or
9159+ * (at your option) any later version.
9160+ *
9161+ * This program is distributed in the hope that it will be useful,
9162+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9163+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9164+ * GNU General Public License for more details.
9165+ *
9166+ * You should have received a copy of the GNU General Public License
9167+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
9168+ */
9169+
9170+/*
9171+ * lookup and dentry operations
9172+ */
9173+
9174+#ifndef __AUFS_DENTRY_H__
9175+#define __AUFS_DENTRY_H__
9176+
9177+#ifdef __KERNEL__
9178+
9179+#include <linux/dcache.h>
9180+#include "dirren.h"
9181+#include "rwsem.h"
9182+
9183+struct au_hdentry {
9184+ struct dentry *hd_dentry;
9185+ aufs_bindex_t hd_id;
9186+};
9187+
9188+struct au_dinfo {
9189+ atomic_t di_generation;
9190+
9191+ struct au_rwsem di_rwsem;
9192+ aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
9193+ unsigned char di_tmpfile; /* to allow the different name */
9194+ struct au_hdentry *di_hdentry;
9195+ struct file *di_htmpfile;
9196+ struct rcu_head rcu;
9197+} ____cacheline_aligned_in_smp;
9198+
9199+/* ---------------------------------------------------------------------- */
9200+
9201+/* flags for au_lkup_dentry() */
9202+#define AuLkup_ALLOW_NEG 1
9203+#define AuLkup_IGNORE_PERM (1 << 1)
9204+#define AuLkup_DIRREN (1 << 2)
9205+#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
9206+#define au_fset_lkup(flags, name) \
9207+ do { (flags) |= AuLkup_##name; } while (0)
9208+#define au_fclr_lkup(flags, name) \
9209+ do { (flags) &= ~AuLkup_##name; } while (0)
9210+
9211+#ifndef CONFIG_AUFS_DIRREN
9212+#undef AuLkup_DIRREN
9213+#define AuLkup_DIRREN 0
9214+#endif
9215+
9216+struct au_do_lookup_args {
9217+ unsigned int flags;
9218+ mode_t type;
9219+ struct qstr whname, *name;
9220+ struct au_dr_lookup dirren;
9221+};
9222+
9223+/* ---------------------------------------------------------------------- */
9224+
9225+/* dentry.c */
9226+extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
9227+struct au_branch;
9228+struct dentry *au_sio_lkup_one(struct user_namespace *userns, struct qstr *name,
9229+ struct path *ppath);
9230+int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
9231+ struct dentry *h_parent, struct au_branch *br);
9232+
9233+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
9234+ unsigned int flags);
9235+int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
9236+int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
9237+int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
9238+void au_refresh_dop(struct dentry *dentry, int force_reval);
9239+
9240+/* dinfo.c */
9241+void au_di_init_once(void *_di);
9242+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
9243+void au_di_free(struct au_dinfo *dinfo);
9244+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
9245+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
9246+int au_di_init(struct dentry *dentry);
9247+void au_di_fin(struct dentry *dentry);
9248+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
9249+
9250+void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
9251+void di_read_unlock(struct dentry *d, int flags);
9252+void di_downgrade_lock(struct dentry *d, int flags);
9253+void di_write_lock(struct dentry *d, unsigned int lsc);
9254+void di_write_unlock(struct dentry *d);
9255+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
9256+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
9257+void di_write_unlock2(struct dentry *d1, struct dentry *d2);
9258+
9259+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
9260+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
9261+aufs_bindex_t au_dbtail(struct dentry *dentry);
9262+aufs_bindex_t au_dbtaildir(struct dentry *dentry);
9263+
9264+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9265+ struct dentry *h_dentry);
9266+int au_digen_test(struct dentry *dentry, unsigned int sigen);
9267+int au_dbrange_test(struct dentry *dentry);
9268+void au_update_digen(struct dentry *dentry);
9269+void au_update_dbrange(struct dentry *dentry, int do_put_zero);
9270+void au_update_dbtop(struct dentry *dentry);
9271+void au_update_dbbot(struct dentry *dentry);
9272+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
9273+
9274+/* ---------------------------------------------------------------------- */
9275+
9276+static inline struct au_dinfo *au_di(struct dentry *dentry)
9277+{
9278+ return dentry->d_fsdata;
9279+}
9280+
9281+/* ---------------------------------------------------------------------- */
9282+
9283+/* lock subclass for dinfo */
9284+enum {
9285+ AuLsc_DI_CHILD, /* child first */
9286+ AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
9287+ AuLsc_DI_CHILD3, /* copyup dirs */
9288+ AuLsc_DI_PARENT,
9289+ AuLsc_DI_PARENT2,
9290+ AuLsc_DI_PARENT3,
9291+ AuLsc_DI_TMP /* temp for replacing dinfo */
9292+};
9293+
9294+/*
9295+ * di_read_lock_child, di_write_lock_child,
9296+ * di_read_lock_child2, di_write_lock_child2,
9297+ * di_read_lock_child3, di_write_lock_child3,
9298+ * di_read_lock_parent, di_write_lock_parent,
9299+ * di_read_lock_parent2, di_write_lock_parent2,
9300+ * di_read_lock_parent3, di_write_lock_parent3,
9301+ */
9302+#define AuReadLockFunc(name, lsc) \
9303+static inline void di_read_lock_##name(struct dentry *d, int flags) \
9304+{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
9305+
9306+#define AuWriteLockFunc(name, lsc) \
9307+static inline void di_write_lock_##name(struct dentry *d) \
9308+{ di_write_lock(d, AuLsc_DI_##lsc); }
9309+
9310+#define AuRWLockFuncs(name, lsc) \
9311+ AuReadLockFunc(name, lsc) \
9312+ AuWriteLockFunc(name, lsc)
9313+
9314+AuRWLockFuncs(child, CHILD);
9315+AuRWLockFuncs(child2, CHILD2);
9316+AuRWLockFuncs(child3, CHILD3);
9317+AuRWLockFuncs(parent, PARENT);
9318+AuRWLockFuncs(parent2, PARENT2);
9319+AuRWLockFuncs(parent3, PARENT3);
9320+
9321+#undef AuReadLockFunc
9322+#undef AuWriteLockFunc
9323+#undef AuRWLockFuncs
9324+
9325+#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
9326+#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
9327+#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
9328+
9329+/* ---------------------------------------------------------------------- */
9330+
9331+/* todo: memory barrier? */
9332+static inline unsigned int au_digen(struct dentry *d)
9333+{
9334+ return atomic_read(&au_di(d)->di_generation);
9335+}
9336+
9337+static inline void au_h_dentry_init(struct au_hdentry *hdentry)
9338+{
9339+ hdentry->hd_dentry = NULL;
9340+}
9341+
9342+static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
9343+ aufs_bindex_t bindex)
9344+{
9345+ return di->di_hdentry + bindex;
9346+}
9347+
9348+static inline void au_hdput(struct au_hdentry *hd)
9349+{
9350+ if (hd)
9351+ dput(hd->hd_dentry);
9352+}
9353+
9354+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
9355+{
9356+ DiMustAnyLock(dentry);
9357+ return au_di(dentry)->di_btop;
9358+}
9359+
9360+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
9361+{
9362+ DiMustAnyLock(dentry);
9363+ return au_di(dentry)->di_bbot;
9364+}
9365+
9366+static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
9367+{
9368+ DiMustAnyLock(dentry);
9369+ return au_di(dentry)->di_bwh;
9370+}
9371+
9372+static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
9373+{
9374+ DiMustAnyLock(dentry);
9375+ return au_di(dentry)->di_bdiropq;
9376+}
9377+
9378+/* todo: hard/soft set? */
9379+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
9380+{
9381+ DiMustWriteLock(dentry);
9382+ au_di(dentry)->di_btop = bindex;
9383+}
9384+
9385+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
9386+{
9387+ DiMustWriteLock(dentry);
9388+ au_di(dentry)->di_bbot = bindex;
9389+}
9390+
9391+static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
9392+{
9393+ DiMustWriteLock(dentry);
9394+ /* dbwh can be outside of btop - bbot range */
9395+ au_di(dentry)->di_bwh = bindex;
9396+}
9397+
9398+static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
9399+{
9400+ DiMustWriteLock(dentry);
9401+ au_di(dentry)->di_bdiropq = bindex;
9402+}
9403+
9404+/* ---------------------------------------------------------------------- */
9405+
9406+#ifdef CONFIG_AUFS_HNOTIFY
9407+static inline void au_digen_dec(struct dentry *d)
9408+{
9409+ atomic_dec(&au_di(d)->di_generation);
9410+}
9411+
9412+static inline void au_hn_di_reinit(struct dentry *dentry)
9413+{
9414+ dentry->d_fsdata = NULL;
9415+}
9416+#else
9417+AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
9418+#endif /* CONFIG_AUFS_HNOTIFY */
9419+
9420+#endif /* __KERNEL__ */
9421+#endif /* __AUFS_DENTRY_H__ */
9422diff -urN /usr/share/empty/fs/aufs/dinfo.c linux/fs/aufs/dinfo.c
9423--- /usr/share/empty/fs/aufs/dinfo.c 1970-01-01 01:00:00.000000000 +0100
9424+++ linux/fs/aufs/dinfo.c 2022-12-17 09:21:34.796521861 +0100
9425@@ -0,0 +1,555 @@
9426+// SPDX-License-Identifier: GPL-2.0
9427+/*
9428+ * Copyright (C) 2005-2022 Junjiro R. Okajima
9429+ *
9430+ * This program is free software; you can redistribute it and/or modify
9431+ * it under the terms of the GNU General Public License as published by
9432+ * the Free Software Foundation; either version 2 of the License, or
9433+ * (at your option) any later version.
9434+ *
9435+ * This program is distributed in the hope that it will be useful,
9436+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9437+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9438+ * GNU General Public License for more details.
9439+ *
9440+ * You should have received a copy of the GNU General Public License
9441+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
9442+ */
9443+
9444+/*
9445+ * dentry private data
9446+ */
9447+
9448+#include "aufs.h"
9449+
9450+void au_di_init_once(void *_dinfo)
9451+{
9452+ struct au_dinfo *dinfo = _dinfo;
9453+
9454+ au_rw_init(&dinfo->di_rwsem);
9455+}
9456+
9457+struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
9458+{
9459+ struct au_dinfo *dinfo;
9460+ int nbr, i;
9461+
9462+ dinfo = au_cache_alloc_dinfo();
9463+ if (unlikely(!dinfo))
9464+ goto out;
9465+
9466+ nbr = au_sbbot(sb) + 1;
9467+ if (nbr <= 0)
9468+ nbr = 1;
9469+ dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
9470+ if (dinfo->di_hdentry) {
9471+ au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
9472+ dinfo->di_btop = -1;
9473+ dinfo->di_bbot = -1;
9474+ dinfo->di_bwh = -1;
9475+ dinfo->di_bdiropq = -1;
9476+ dinfo->di_tmpfile = 0;
9477+ for (i = 0; i < nbr; i++)
9478+ dinfo->di_hdentry[i].hd_id = -1;
9479+ dinfo->di_htmpfile = NULL;
9480+ goto out;
9481+ }
9482+
9483+ au_cache_free_dinfo(dinfo);
9484+ dinfo = NULL;
9485+
9486+out:
9487+ return dinfo;
9488+}
9489+
9490+void au_di_free(struct au_dinfo *dinfo)
9491+{
9492+ struct au_hdentry *p;
9493+ aufs_bindex_t bbot, bindex;
9494+
9495+ /* dentry may not be revalidated */
9496+ bindex = dinfo->di_btop;
9497+ if (bindex >= 0) {
9498+ bbot = dinfo->di_bbot;
9499+ p = au_hdentry(dinfo, bindex);
9500+ while (bindex++ <= bbot)
9501+ au_hdput(p++);
9502+ }
9503+ au_kfree_try_rcu(dinfo->di_hdentry);
9504+ au_cache_free_dinfo(dinfo);
9505+}
9506+
9507+void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
9508+{
9509+ struct au_hdentry *p;
9510+ aufs_bindex_t bi;
9511+
9512+ AuRwMustWriteLock(&a->di_rwsem);
9513+ AuRwMustWriteLock(&b->di_rwsem);
9514+
9515+#define DiSwap(v, name) \
9516+ do { \
9517+ v = a->di_##name; \
9518+ a->di_##name = b->di_##name; \
9519+ b->di_##name = v; \
9520+ } while (0)
9521+
9522+ DiSwap(p, hdentry);
9523+ DiSwap(bi, btop);
9524+ DiSwap(bi, bbot);
9525+ DiSwap(bi, bwh);
9526+ DiSwap(bi, bdiropq);
9527+ /* smp_mb(); */
9528+
9529+#undef DiSwap
9530+}
9531+
9532+void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
9533+{
9534+ AuRwMustWriteLock(&dst->di_rwsem);
9535+ AuRwMustWriteLock(&src->di_rwsem);
9536+
9537+ dst->di_btop = src->di_btop;
9538+ dst->di_bbot = src->di_bbot;
9539+ dst->di_bwh = src->di_bwh;
9540+ dst->di_bdiropq = src->di_bdiropq;
9541+ /* smp_mb(); */
9542+}
9543+
9544+int au_di_init(struct dentry *dentry)
9545+{
9546+ int err;
9547+ struct super_block *sb;
9548+ struct au_dinfo *dinfo;
9549+
9550+ err = 0;
9551+ sb = dentry->d_sb;
9552+ dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
9553+ if (dinfo) {
9554+ atomic_set(&dinfo->di_generation, au_sigen(sb));
9555+ /* smp_mb(); */ /* atomic_set */
9556+ dentry->d_fsdata = dinfo;
9557+ } else
9558+ err = -ENOMEM;
9559+
9560+ return err;
9561+}
9562+
9563+void au_di_fin(struct dentry *dentry)
9564+{
9565+ struct au_dinfo *dinfo;
9566+
9567+ dinfo = au_di(dentry);
9568+ AuRwDestroy(&dinfo->di_rwsem);
9569+ au_di_free(dinfo);
9570+}
9571+
9572+int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
9573+{
9574+ int err, sz;
9575+ struct au_hdentry *hdp;
9576+
9577+ AuRwMustWriteLock(&dinfo->di_rwsem);
9578+
9579+ err = -ENOMEM;
9580+ sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
9581+ if (!sz)
9582+ sz = sizeof(*hdp);
9583+ hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
9584+ may_shrink);
9585+ if (hdp) {
9586+ dinfo->di_hdentry = hdp;
9587+ err = 0;
9588+ }
9589+
9590+ return err;
9591+}
9592+
9593+/* ---------------------------------------------------------------------- */
9594+
9595+static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
9596+{
9597+ switch (lsc) {
9598+ case AuLsc_DI_CHILD:
9599+ ii_write_lock_child(inode);
9600+ break;
9601+ case AuLsc_DI_CHILD2:
9602+ ii_write_lock_child2(inode);
9603+ break;
9604+ case AuLsc_DI_CHILD3:
9605+ ii_write_lock_child3(inode);
9606+ break;
9607+ case AuLsc_DI_PARENT:
9608+ ii_write_lock_parent(inode);
9609+ break;
9610+ case AuLsc_DI_PARENT2:
9611+ ii_write_lock_parent2(inode);
9612+ break;
9613+ case AuLsc_DI_PARENT3:
9614+ ii_write_lock_parent3(inode);
9615+ break;
9616+ default:
9617+ BUG();
9618+ }
9619+}
9620+
9621+static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
9622+{
9623+ switch (lsc) {
9624+ case AuLsc_DI_CHILD:
9625+ ii_read_lock_child(inode);
9626+ break;
9627+ case AuLsc_DI_CHILD2:
9628+ ii_read_lock_child2(inode);
9629+ break;
9630+ case AuLsc_DI_CHILD3:
9631+ ii_read_lock_child3(inode);
9632+ break;
9633+ case AuLsc_DI_PARENT:
9634+ ii_read_lock_parent(inode);
9635+ break;
9636+ case AuLsc_DI_PARENT2:
9637+ ii_read_lock_parent2(inode);
9638+ break;
9639+ case AuLsc_DI_PARENT3:
9640+ ii_read_lock_parent3(inode);
9641+ break;
9642+ default:
9643+ BUG();
9644+ }
9645+}
9646+
9647+void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
9648+{
9649+ struct inode *inode;
9650+
9651+ au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
9652+ if (d_really_is_positive(d)) {
9653+ inode = d_inode(d);
9654+ if (au_ftest_lock(flags, IW))
9655+ do_ii_write_lock(inode, lsc);
9656+ else if (au_ftest_lock(flags, IR))
9657+ do_ii_read_lock(inode, lsc);
9658+ }
9659+}
9660+
9661+void di_read_unlock(struct dentry *d, int flags)
9662+{
9663+ struct inode *inode;
9664+
9665+ if (d_really_is_positive(d)) {
9666+ inode = d_inode(d);
9667+ if (au_ftest_lock(flags, IW)) {
9668+ au_dbg_verify_dinode(d);
9669+ ii_write_unlock(inode);
9670+ } else if (au_ftest_lock(flags, IR)) {
9671+ au_dbg_verify_dinode(d);
9672+ ii_read_unlock(inode);
9673+ }
9674+ }
9675+ au_rw_read_unlock(&au_di(d)->di_rwsem);
9676+}
9677+
9678+void di_downgrade_lock(struct dentry *d, int flags)
9679+{
9680+ if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
9681+ ii_downgrade_lock(d_inode(d));
9682+ au_rw_dgrade_lock(&au_di(d)->di_rwsem);
9683+}
9684+
9685+void di_write_lock(struct dentry *d, unsigned int lsc)
9686+{
9687+ au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
9688+ if (d_really_is_positive(d))
9689+ do_ii_write_lock(d_inode(d), lsc);
9690+}
9691+
9692+void di_write_unlock(struct dentry *d)
9693+{
9694+ au_dbg_verify_dinode(d);
9695+ if (d_really_is_positive(d))
9696+ ii_write_unlock(d_inode(d));
9697+ au_rw_write_unlock(&au_di(d)->di_rwsem);
9698+}
9699+
9700+void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
9701+{
9702+ AuDebugOn(d1 == d2
9703+ || d_inode(d1) == d_inode(d2)
9704+ || d1->d_sb != d2->d_sb);
9705+
9706+ if ((isdir && au_test_subdir(d1, d2))
9707+ || d1 < d2) {
9708+ di_write_lock_child(d1);
9709+ di_write_lock_child2(d2);
9710+ } else {
9711+ di_write_lock_child(d2);
9712+ di_write_lock_child2(d1);
9713+ }
9714+}
9715+
9716+void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
9717+{
9718+ AuDebugOn(d1 == d2
9719+ || d_inode(d1) == d_inode(d2)
9720+ || d1->d_sb != d2->d_sb);
9721+
9722+ if ((isdir && au_test_subdir(d1, d2))
9723+ || d1 < d2) {
9724+ di_write_lock_parent(d1);
9725+ di_write_lock_parent2(d2);
9726+ } else {
9727+ di_write_lock_parent(d2);
9728+ di_write_lock_parent2(d1);
9729+ }
9730+}
9731+
9732+void di_write_unlock2(struct dentry *d1, struct dentry *d2)
9733+{
9734+ di_write_unlock(d1);
9735+ if (d_inode(d1) == d_inode(d2))
9736+ au_rw_write_unlock(&au_di(d2)->di_rwsem);
9737+ else
9738+ di_write_unlock(d2);
9739+}
9740+
9741+/* ---------------------------------------------------------------------- */
9742+
9743+struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
9744+{
9745+ struct dentry *d;
9746+
9747+ DiMustAnyLock(dentry);
9748+
9749+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
9750+ return NULL;
9751+ AuDebugOn(bindex < 0);
9752+ d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
9753+ AuDebugOn(d && au_dcount(d) <= 0);
9754+ return d;
9755+}
9756+
9757+/*
9758+ * extended version of au_h_dptr().
9759+ * returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
9760+ * error.
9761+ */
9762+struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
9763+{
9764+ struct dentry *h_dentry;
9765+ struct inode *inode, *h_inode;
9766+
9767+ AuDebugOn(d_really_is_negative(dentry));
9768+
9769+ h_dentry = NULL;
9770+ if (au_dbtop(dentry) <= bindex
9771+ && bindex <= au_dbbot(dentry))
9772+ h_dentry = au_h_dptr(dentry, bindex);
9773+ if (h_dentry && !au_d_linkable(h_dentry)) {
9774+ dget(h_dentry);
9775+ goto out; /* success */
9776+ }
9777+
9778+ inode = d_inode(dentry);
9779+ AuDebugOn(bindex < au_ibtop(inode));
9780+ AuDebugOn(au_ibbot(inode) < bindex);
9781+ h_inode = au_h_iptr(inode, bindex);
9782+ h_dentry = d_find_alias(h_inode);
9783+ if (h_dentry) {
9784+ if (!IS_ERR(h_dentry)) {
9785+ if (!au_d_linkable(h_dentry))
9786+ goto out; /* success */
9787+ dput(h_dentry);
9788+ } else
9789+ goto out;
9790+ }
9791+
9792+ if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
9793+ h_dentry = au_plink_lkup(inode, bindex);
9794+ AuDebugOn(!h_dentry);
9795+ if (!IS_ERR(h_dentry)) {
9796+ if (!au_d_hashed_positive(h_dentry))
9797+ goto out; /* success */
9798+ dput(h_dentry);
9799+ h_dentry = NULL;
9800+ }
9801+ }
9802+
9803+out:
9804+ AuDbgDentry(h_dentry);
9805+ return h_dentry;
9806+}
9807+
9808+aufs_bindex_t au_dbtail(struct dentry *dentry)
9809+{
9810+ aufs_bindex_t bbot, bwh;
9811+
9812+ bbot = au_dbbot(dentry);
9813+ if (0 <= bbot) {
9814+ bwh = au_dbwh(dentry);
9815+ if (!bwh)
9816+ return bwh;
9817+ if (0 < bwh && bwh < bbot)
9818+ return bwh - 1;
9819+ }
9820+ return bbot;
9821+}
9822+
9823+aufs_bindex_t au_dbtaildir(struct dentry *dentry)
9824+{
9825+ aufs_bindex_t bbot, bopq;
9826+
9827+ bbot = au_dbtail(dentry);
9828+ if (0 <= bbot) {
9829+ bopq = au_dbdiropq(dentry);
9830+ if (0 <= bopq && bopq < bbot)
9831+ bbot = bopq;
9832+ }
9833+ return bbot;
9834+}
9835+
9836+/* ---------------------------------------------------------------------- */
9837+
9838+void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
9839+ struct dentry *h_dentry)
9840+{
9841+ struct au_dinfo *dinfo;
9842+ struct au_hdentry *hd;
9843+ struct au_branch *br;
9844+
9845+ DiMustWriteLock(dentry);
9846+
9847+ dinfo = au_di(dentry);
9848+ hd = au_hdentry(dinfo, bindex);
9849+ au_hdput(hd);
9850+ hd->hd_dentry = h_dentry;
9851+ if (h_dentry) {
9852+ br = au_sbr(dentry->d_sb, bindex);
9853+ hd->hd_id = br->br_id;
9854+ }
9855+}
9856+
9857+int au_dbrange_test(struct dentry *dentry)
9858+{
9859+ int err;
9860+ aufs_bindex_t btop, bbot;
9861+
9862+ err = 0;
9863+ btop = au_dbtop(dentry);
9864+ bbot = au_dbbot(dentry);
9865+ if (btop >= 0)
9866+ AuDebugOn(bbot < 0 && btop > bbot);
9867+ else {
9868+ err = -EIO;
9869+ AuDebugOn(bbot >= 0);
9870+ }
9871+
9872+ return err;
9873+}
9874+
9875+int au_digen_test(struct dentry *dentry, unsigned int sigen)
9876+{
9877+ int err;
9878+
9879+ err = 0;
9880+ if (unlikely(au_digen(dentry) != sigen
9881+ || au_iigen_test(d_inode(dentry), sigen)))
9882+ err = -EIO;
9883+
9884+ return err;
9885+}
9886+
9887+void au_update_digen(struct dentry *dentry)
9888+{
9889+ atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
9890+ /* smp_mb(); */ /* atomic_set */
9891+}
9892+
9893+void au_update_dbrange(struct dentry *dentry, int do_put_zero)
9894+{
9895+ struct au_dinfo *dinfo;
9896+ struct dentry *h_d;
9897+ struct au_hdentry *hdp;
9898+ aufs_bindex_t bindex, bbot;
9899+
9900+ DiMustWriteLock(dentry);
9901+
9902+ dinfo = au_di(dentry);
9903+ if (!dinfo || dinfo->di_btop < 0)
9904+ return;
9905+
9906+ if (do_put_zero) {
9907+ bbot = dinfo->di_bbot;
9908+ bindex = dinfo->di_btop;
9909+ hdp = au_hdentry(dinfo, bindex);
9910+ for (; bindex <= bbot; bindex++, hdp++) {
9911+ h_d = hdp->hd_dentry;
9912+ if (h_d && d_is_negative(h_d))
9913+ au_set_h_dptr(dentry, bindex, NULL);
9914+ }
9915+ }
9916+
9917+ dinfo->di_btop = 0;
9918+ hdp = au_hdentry(dinfo, dinfo->di_btop);
9919+ for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
9920+ if (hdp->hd_dentry)
9921+ break;
9922+ if (dinfo->di_btop > dinfo->di_bbot) {
9923+ dinfo->di_btop = -1;
9924+ dinfo->di_bbot = -1;
9925+ return;
9926+ }
9927+
9928+ hdp = au_hdentry(dinfo, dinfo->di_bbot);
9929+ for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
9930+ if (hdp->hd_dentry)
9931+ break;
9932+ AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
9933+}
9934+
9935+void au_update_dbtop(struct dentry *dentry)
9936+{
9937+ aufs_bindex_t bindex, bbot;
9938+ struct dentry *h_dentry;
9939+
9940+ bbot = au_dbbot(dentry);
9941+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
9942+ h_dentry = au_h_dptr(dentry, bindex);
9943+ if (!h_dentry)
9944+ continue;
9945+ if (d_is_positive(h_dentry)) {
9946+ au_set_dbtop(dentry, bindex);
9947+ return;
9948+ }
9949+ au_set_h_dptr(dentry, bindex, NULL);
9950+ }
9951+}
9952+
9953+void au_update_dbbot(struct dentry *dentry)
9954+{
9955+ aufs_bindex_t bindex, btop;
9956+ struct dentry *h_dentry;
9957+
9958+ btop = au_dbtop(dentry);
9959+ for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
9960+ h_dentry = au_h_dptr(dentry, bindex);
9961+ if (!h_dentry)
9962+ continue;
9963+ if (d_is_positive(h_dentry)) {
9964+ au_set_dbbot(dentry, bindex);
9965+ return;
9966+ }
9967+ au_set_h_dptr(dentry, bindex, NULL);
9968+ }
9969+}
9970+
9971+int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
9972+{
9973+ aufs_bindex_t bindex, bbot;
9974+
9975+ bbot = au_dbbot(dentry);
9976+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
9977+ if (au_h_dptr(dentry, bindex) == h_dentry)
9978+ return bindex;
9979+ return -1;
9980+}
9981diff -urN /usr/share/empty/fs/aufs/dir.c linux/fs/aufs/dir.c
9982--- /usr/share/empty/fs/aufs/dir.c 1970-01-01 01:00:00.000000000 +0100
9983+++ linux/fs/aufs/dir.c 2022-12-17 09:21:34.796521861 +0100
9984@@ -0,0 +1,765 @@
9985+// SPDX-License-Identifier: GPL-2.0
9986+/*
9987+ * Copyright (C) 2005-2022 Junjiro R. Okajima
9988+ *
9989+ * This program is free software; you can redistribute it and/or modify
9990+ * it under the terms of the GNU General Public License as published by
9991+ * the Free Software Foundation; either version 2 of the License, or
9992+ * (at your option) any later version.
9993+ *
9994+ * This program is distributed in the hope that it will be useful,
9995+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
9996+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9997+ * GNU General Public License for more details.
9998+ *
9999+ * You should have received a copy of the GNU General Public License
10000+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10001+ */
10002+
10003+/*
10004+ * directory operations
10005+ */
10006+
10007+#include <linux/fs_stack.h>
10008+#include <linux/iversion.h>
10009+#include "aufs.h"
10010+
10011+void au_add_nlink(struct inode *dir, struct inode *h_dir)
10012+{
10013+ unsigned int nlink;
10014+
10015+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
10016+
10017+ nlink = dir->i_nlink;
10018+ nlink += h_dir->i_nlink - 2;
10019+ if (h_dir->i_nlink < 2)
10020+ nlink += 2;
10021+ smp_mb(); /* for i_nlink */
10022+ /* 0 can happen in revaliding */
10023+ set_nlink(dir, nlink);
10024+}
10025+
10026+void au_sub_nlink(struct inode *dir, struct inode *h_dir)
10027+{
10028+ unsigned int nlink;
10029+
10030+ AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
10031+
10032+ nlink = dir->i_nlink;
10033+ nlink -= h_dir->i_nlink - 2;
10034+ if (h_dir->i_nlink < 2)
10035+ nlink -= 2;
10036+ smp_mb(); /* for i_nlink */
10037+ /* nlink == 0 means the branch-fs is broken */
10038+ set_nlink(dir, nlink);
10039+}
10040+
10041+loff_t au_dir_size(struct file *file, struct dentry *dentry)
10042+{
10043+ loff_t sz;
10044+ aufs_bindex_t bindex, bbot;
10045+ struct file *h_file;
10046+ struct dentry *h_dentry;
10047+
10048+ sz = 0;
10049+ if (file) {
10050+ AuDebugOn(!d_is_dir(file->f_path.dentry));
10051+
10052+ bbot = au_fbbot_dir(file);
10053+ for (bindex = au_fbtop(file);
10054+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
10055+ bindex++) {
10056+ h_file = au_hf_dir(file, bindex);
10057+ if (h_file && file_inode(h_file))
10058+ sz += vfsub_f_size_read(h_file);
10059+ }
10060+ } else {
10061+ AuDebugOn(!dentry);
10062+ AuDebugOn(!d_is_dir(dentry));
10063+
10064+ bbot = au_dbtaildir(dentry);
10065+ for (bindex = au_dbtop(dentry);
10066+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
10067+ bindex++) {
10068+ h_dentry = au_h_dptr(dentry, bindex);
10069+ if (h_dentry && d_is_positive(h_dentry))
10070+ sz += i_size_read(d_inode(h_dentry));
10071+ }
10072+ }
10073+ if (sz < KMALLOC_MAX_SIZE)
10074+ sz = roundup_pow_of_two(sz);
10075+ if (sz > KMALLOC_MAX_SIZE)
10076+ sz = KMALLOC_MAX_SIZE;
10077+ else if (sz < NAME_MAX) {
10078+ BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
10079+ sz = AUFS_RDBLK_DEF;
10080+ }
10081+ return sz;
10082+}
10083+
10084+struct au_dir_ts_arg {
10085+ struct dentry *dentry;
10086+ aufs_bindex_t brid;
10087+};
10088+
10089+static void au_do_dir_ts(void *arg)
10090+{
10091+ struct au_dir_ts_arg *a = arg;
10092+ struct au_dtime dt;
10093+ struct path h_path;
10094+ struct inode *dir, *h_dir;
10095+ struct super_block *sb;
10096+ struct au_branch *br;
10097+ struct au_hinode *hdir;
10098+ int err;
10099+ aufs_bindex_t btop, bindex;
10100+
10101+ sb = a->dentry->d_sb;
10102+ if (d_really_is_negative(a->dentry))
10103+ goto out;
10104+ /* no dir->i_mutex lock */
10105+ aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
10106+
10107+ dir = d_inode(a->dentry);
10108+ btop = au_ibtop(dir);
10109+ bindex = au_br_index(sb, a->brid);
10110+ if (bindex < btop)
10111+ goto out_unlock;
10112+
10113+ br = au_sbr(sb, bindex);
10114+ h_path.dentry = au_h_dptr(a->dentry, bindex);
10115+ if (!h_path.dentry)
10116+ goto out_unlock;
10117+ h_path.mnt = au_br_mnt(br);
10118+ au_dtime_store(&dt, a->dentry, &h_path);
10119+
10120+ br = au_sbr(sb, btop);
10121+ if (!au_br_writable(br->br_perm))
10122+ goto out_unlock;
10123+ h_path.dentry = au_h_dptr(a->dentry, btop);
10124+ h_path.mnt = au_br_mnt(br);
10125+ err = vfsub_mnt_want_write(h_path.mnt);
10126+ if (err)
10127+ goto out_unlock;
10128+ hdir = au_hi(dir, btop);
10129+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
10130+ h_dir = au_h_iptr(dir, btop);
10131+ if (h_dir->i_nlink
10132+ && timespec64_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
10133+ dt.dt_h_path = h_path;
10134+ au_dtime_revert(&dt);
10135+ }
10136+ au_hn_inode_unlock(hdir);
10137+ vfsub_mnt_drop_write(h_path.mnt);
10138+ au_cpup_attr_timesizes(dir);
10139+
10140+out_unlock:
10141+ aufs_read_unlock(a->dentry, AuLock_DW);
10142+out:
10143+ dput(a->dentry);
10144+ au_nwt_done(&au_sbi(sb)->si_nowait);
10145+ au_kfree_try_rcu(arg);
10146+}
10147+
10148+void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
10149+{
10150+ int perm, wkq_err;
10151+ aufs_bindex_t btop;
10152+ struct au_dir_ts_arg *arg;
10153+ struct dentry *dentry;
10154+ struct super_block *sb;
10155+
10156+ IMustLock(dir);
10157+
10158+ dentry = d_find_any_alias(dir);
10159+ AuDebugOn(!dentry);
10160+ sb = dentry->d_sb;
10161+ btop = au_ibtop(dir);
10162+ if (btop == bindex) {
10163+ au_cpup_attr_timesizes(dir);
10164+ goto out;
10165+ }
10166+
10167+ perm = au_sbr_perm(sb, btop);
10168+ if (!au_br_writable(perm))
10169+ goto out;
10170+
10171+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
10172+ if (!arg)
10173+ goto out;
10174+
10175+ arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
10176+ arg->brid = au_sbr_id(sb, bindex);
10177+ wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
10178+ if (unlikely(wkq_err)) {
10179+ pr_err("wkq %d\n", wkq_err);
10180+ dput(dentry);
10181+ au_kfree_try_rcu(arg);
10182+ }
10183+
10184+out:
10185+ dput(dentry);
10186+}
10187+
10188+/* ---------------------------------------------------------------------- */
10189+
10190+static int reopen_dir(struct file *file)
10191+{
10192+ int err;
10193+ unsigned int flags;
10194+ aufs_bindex_t bindex, btail, btop;
10195+ struct dentry *dentry, *h_dentry;
10196+ struct file *h_file;
10197+
10198+ /* open all lower dirs */
10199+ dentry = file->f_path.dentry;
10200+ btop = au_dbtop(dentry);
10201+ for (bindex = au_fbtop(file); bindex < btop; bindex++)
10202+ au_set_h_fptr(file, bindex, NULL);
10203+ au_set_fbtop(file, btop);
10204+
10205+ btail = au_dbtaildir(dentry);
10206+ for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
10207+ au_set_h_fptr(file, bindex, NULL);
10208+ au_set_fbbot_dir(file, btail);
10209+
10210+ flags = vfsub_file_flags(file);
10211+ for (bindex = btop; bindex <= btail; bindex++) {
10212+ h_dentry = au_h_dptr(dentry, bindex);
10213+ if (!h_dentry)
10214+ continue;
10215+ h_file = au_hf_dir(file, bindex);
10216+ if (h_file)
10217+ continue;
10218+
10219+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
10220+ err = PTR_ERR(h_file);
10221+ if (IS_ERR(h_file))
10222+ goto out; /* close all? */
10223+ au_set_h_fptr(file, bindex, h_file);
10224+ }
10225+ au_update_figen(file);
10226+ /* todo: necessary? */
10227+ /* file->f_ra = h_file->f_ra; */
10228+ err = 0;
10229+
10230+out:
10231+ return err;
10232+}
10233+
10234+static int do_open_dir(struct file *file, int flags, struct file *h_file)
10235+{
10236+ int err;
10237+ aufs_bindex_t bindex, btail;
10238+ struct dentry *dentry, *h_dentry;
10239+ struct vfsmount *mnt;
10240+
10241+ FiMustWriteLock(file);
10242+ AuDebugOn(h_file);
10243+
10244+ err = 0;
10245+ mnt = file->f_path.mnt;
10246+ dentry = file->f_path.dentry;
10247+ file->f_version = inode_query_iversion(d_inode(dentry));
10248+ bindex = au_dbtop(dentry);
10249+ au_set_fbtop(file, bindex);
10250+ btail = au_dbtaildir(dentry);
10251+ au_set_fbbot_dir(file, btail);
10252+ for (; !err && bindex <= btail; bindex++) {
10253+ h_dentry = au_h_dptr(dentry, bindex);
10254+ if (!h_dentry)
10255+ continue;
10256+
10257+ err = vfsub_test_mntns(mnt, h_dentry->d_sb);
10258+ if (unlikely(err))
10259+ break;
10260+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
10261+ if (IS_ERR(h_file)) {
10262+ err = PTR_ERR(h_file);
10263+ break;
10264+ }
10265+ au_set_h_fptr(file, bindex, h_file);
10266+ }
10267+ au_update_figen(file);
10268+ /* todo: necessary? */
10269+ /* file->f_ra = h_file->f_ra; */
10270+ if (!err)
10271+ return 0; /* success */
10272+
10273+ /* close all */
10274+ for (bindex = au_fbtop(file); bindex <= btail; bindex++)
10275+ au_set_h_fptr(file, bindex, NULL);
10276+ au_set_fbtop(file, -1);
10277+ au_set_fbbot_dir(file, -1);
10278+
10279+ return err;
10280+}
10281+
10282+static int aufs_open_dir(struct inode *inode __maybe_unused,
10283+ struct file *file)
10284+{
10285+ int err;
10286+ struct super_block *sb;
10287+ struct au_fidir *fidir;
10288+
10289+ err = -ENOMEM;
10290+ sb = file->f_path.dentry->d_sb;
10291+ si_read_lock(sb, AuLock_FLUSH);
10292+ fidir = au_fidir_alloc(sb);
10293+ if (fidir) {
10294+ struct au_do_open_args args = {
10295+ .open = do_open_dir,
10296+ .fidir = fidir
10297+ };
10298+ err = au_do_open(file, &args);
10299+ if (unlikely(err))
10300+ au_kfree_rcu(fidir);
10301+ }
10302+ si_read_unlock(sb);
10303+ return err;
10304+}
10305+
10306+static int aufs_release_dir(struct inode *inode __maybe_unused,
10307+ struct file *file)
10308+{
10309+ struct au_vdir *vdir_cache;
10310+ struct au_finfo *finfo;
10311+ struct au_fidir *fidir;
10312+ struct au_hfile *hf;
10313+ aufs_bindex_t bindex, bbot;
10314+
10315+ finfo = au_fi(file);
10316+ fidir = finfo->fi_hdir;
10317+ if (fidir) {
10318+ au_hbl_del(&finfo->fi_hlist,
10319+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
10320+ vdir_cache = fidir->fd_vdir_cache; /* lock-free */
10321+ if (vdir_cache)
10322+ au_vdir_free(vdir_cache);
10323+
10324+ bindex = finfo->fi_btop;
10325+ if (bindex >= 0) {
10326+ hf = fidir->fd_hfile + bindex;
10327+ /*
10328+ * calls fput() instead of filp_close(),
10329+ * since no dnotify or lock for the lower file.
10330+ */
10331+ bbot = fidir->fd_bbot;
10332+ for (; bindex <= bbot; bindex++, hf++)
10333+ if (hf->hf_file)
10334+ au_hfput(hf, /*execed*/0);
10335+ }
10336+ au_kfree_rcu(fidir);
10337+ finfo->fi_hdir = NULL;
10338+ }
10339+ au_finfo_fin(file);
10340+ return 0;
10341+}
10342+
10343+/* ---------------------------------------------------------------------- */
10344+
10345+static int au_do_flush_dir(struct file *file, fl_owner_t id)
10346+{
10347+ int err;
10348+ aufs_bindex_t bindex, bbot;
10349+ struct file *h_file;
10350+
10351+ err = 0;
10352+ bbot = au_fbbot_dir(file);
10353+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
10354+ h_file = au_hf_dir(file, bindex);
10355+ if (h_file)
10356+ err = vfsub_flush(h_file, id);
10357+ }
10358+ return err;
10359+}
10360+
10361+static int aufs_flush_dir(struct file *file, fl_owner_t id)
10362+{
10363+ return au_do_flush(file, id, au_do_flush_dir);
10364+}
10365+
10366+/* ---------------------------------------------------------------------- */
10367+
10368+static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
10369+{
10370+ int err;
10371+ aufs_bindex_t bbot, bindex;
10372+ struct inode *inode;
10373+ struct super_block *sb;
10374+
10375+ err = 0;
10376+ sb = dentry->d_sb;
10377+ inode = d_inode(dentry);
10378+ IMustLock(inode);
10379+ bbot = au_dbbot(dentry);
10380+ for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
10381+ struct path h_path;
10382+
10383+ if (au_test_ro(sb, bindex, inode))
10384+ continue;
10385+ h_path.dentry = au_h_dptr(dentry, bindex);
10386+ if (!h_path.dentry)
10387+ continue;
10388+
10389+ h_path.mnt = au_sbr_mnt(sb, bindex);
10390+ err = vfsub_fsync(NULL, &h_path, datasync);
10391+ }
10392+
10393+ return err;
10394+}
10395+
10396+static int au_do_fsync_dir(struct file *file, int datasync)
10397+{
10398+ int err;
10399+ aufs_bindex_t bbot, bindex;
10400+ struct file *h_file;
10401+ struct super_block *sb;
10402+ struct inode *inode;
10403+
10404+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
10405+ if (unlikely(err))
10406+ goto out;
10407+
10408+ inode = file_inode(file);
10409+ sb = inode->i_sb;
10410+ bbot = au_fbbot_dir(file);
10411+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
10412+ h_file = au_hf_dir(file, bindex);
10413+ if (!h_file || au_test_ro(sb, bindex, inode))
10414+ continue;
10415+
10416+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
10417+ }
10418+
10419+out:
10420+ return err;
10421+}
10422+
10423+/*
10424+ * @file may be NULL
10425+ */
10426+static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
10427+ int datasync)
10428+{
10429+ int err;
10430+ struct dentry *dentry;
10431+ struct inode *inode;
10432+ struct super_block *sb;
10433+
10434+ err = 0;
10435+ dentry = file->f_path.dentry;
10436+ inode = d_inode(dentry);
10437+ inode_lock(inode);
10438+ sb = dentry->d_sb;
10439+ si_noflush_read_lock(sb);
10440+ if (file)
10441+ err = au_do_fsync_dir(file, datasync);
10442+ else {
10443+ di_write_lock_child(dentry);
10444+ err = au_do_fsync_dir_no_file(dentry, datasync);
10445+ }
10446+ au_cpup_attr_timesizes(inode);
10447+ di_write_unlock(dentry);
10448+ if (file)
10449+ fi_write_unlock(file);
10450+
10451+ si_read_unlock(sb);
10452+ inode_unlock(inode);
10453+ return err;
10454+}
10455+
10456+/* ---------------------------------------------------------------------- */
10457+
10458+static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
10459+{
10460+ int err;
10461+ struct dentry *dentry;
10462+ struct inode *inode, *h_inode;
10463+ struct super_block *sb;
10464+
10465+ AuDbg("%pD, ctx{%ps, %llu}\n", file, ctx->actor, ctx->pos);
10466+
10467+ dentry = file->f_path.dentry;
10468+ inode = d_inode(dentry);
10469+ IMustLock(inode);
10470+
10471+ sb = dentry->d_sb;
10472+ si_read_lock(sb, AuLock_FLUSH);
10473+ err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
10474+ if (unlikely(err))
10475+ goto out;
10476+ err = au_alive_dir(dentry);
10477+ if (!err)
10478+ err = au_vdir_init(file);
10479+ di_downgrade_lock(dentry, AuLock_IR);
10480+ if (unlikely(err))
10481+ goto out_unlock;
10482+
10483+ h_inode = au_h_iptr(inode, au_ibtop(inode));
10484+ if (!au_test_nfsd()) {
10485+ err = au_vdir_fill_de(file, ctx);
10486+ fsstack_copy_attr_atime(inode, h_inode);
10487+ } else {
10488+ /*
10489+ * nfsd filldir may call lookup_one_len(), vfs_getattr(),
10490+ * encode_fh() and others.
10491+ */
10492+ atomic_inc(&h_inode->i_count);
10493+ di_read_unlock(dentry, AuLock_IR);
10494+ si_read_unlock(sb);
10495+ err = au_vdir_fill_de(file, ctx);
10496+ fsstack_copy_attr_atime(inode, h_inode);
10497+ fi_write_unlock(file);
10498+ iput(h_inode);
10499+
10500+ AuTraceErr(err);
10501+ return err;
10502+ }
10503+
10504+out_unlock:
10505+ di_read_unlock(dentry, AuLock_IR);
10506+ fi_write_unlock(file);
10507+out:
10508+ si_read_unlock(sb);
10509+ return err;
10510+}
10511+
10512+/* ---------------------------------------------------------------------- */
10513+
10514+#define AuTestEmpty_WHONLY 1
10515+#define AuTestEmpty_CALLED (1 << 1)
10516+#define AuTestEmpty_SHWH (1 << 2)
10517+#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
10518+#define au_fset_testempty(flags, name) \
10519+ do { (flags) |= AuTestEmpty_##name; } while (0)
10520+#define au_fclr_testempty(flags, name) \
10521+ do { (flags) &= ~AuTestEmpty_##name; } while (0)
10522+
10523+#ifndef CONFIG_AUFS_SHWH
10524+#undef AuTestEmpty_SHWH
10525+#define AuTestEmpty_SHWH 0
10526+#endif
10527+
10528+struct test_empty_arg {
10529+ struct dir_context ctx;
10530+ struct au_nhash *whlist;
10531+ unsigned int flags;
10532+ int err;
10533+ aufs_bindex_t bindex;
10534+};
10535+
10536+static bool test_empty_cb(struct dir_context *ctx, const char *__name,
10537+ int namelen, loff_t offset __maybe_unused, u64 ino,
10538+ unsigned int d_type)
10539+{
10540+ struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
10541+ ctx);
10542+ char *name = (void *)__name;
10543+
10544+ arg->err = 0;
10545+ au_fset_testempty(arg->flags, CALLED);
10546+ /* smp_mb(); */
10547+ if (name[0] == '.'
10548+ && (namelen == 1 || (name[1] == '.' && namelen == 2)))
10549+ goto out; /* success */
10550+
10551+ if (namelen <= AUFS_WH_PFX_LEN
10552+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
10553+ if (au_ftest_testempty(arg->flags, WHONLY)
10554+ && !au_nhash_test_known_wh(arg->whlist, name, namelen))
10555+ arg->err = -ENOTEMPTY;
10556+ goto out;
10557+ }
10558+
10559+ name += AUFS_WH_PFX_LEN;
10560+ namelen -= AUFS_WH_PFX_LEN;
10561+ if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
10562+ arg->err = au_nhash_append_wh
10563+ (arg->whlist, name, namelen, ino, d_type, arg->bindex,
10564+ au_ftest_testempty(arg->flags, SHWH));
10565+
10566+out:
10567+ /* smp_mb(); */
10568+ AuTraceErr(arg->err);
10569+ return !arg->err;
10570+}
10571+
10572+static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10573+{
10574+ int err;
10575+ struct file *h_file;
10576+ struct au_branch *br;
10577+
10578+ h_file = au_h_open(dentry, arg->bindex,
10579+ O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
10580+ /*file*/NULL, /*force_wr*/0);
10581+ err = PTR_ERR(h_file);
10582+ if (IS_ERR(h_file))
10583+ goto out;
10584+
10585+ err = 0;
10586+ if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
10587+ && !file_inode(h_file)->i_nlink)
10588+ goto out_put;
10589+
10590+ do {
10591+ arg->err = 0;
10592+ au_fclr_testempty(arg->flags, CALLED);
10593+ /* smp_mb(); */
10594+ err = vfsub_iterate_dir(h_file, &arg->ctx);
10595+ if (err >= 0)
10596+ err = arg->err;
10597+ } while (!err && au_ftest_testempty(arg->flags, CALLED));
10598+
10599+out_put:
10600+ fput(h_file);
10601+ br = au_sbr(dentry->d_sb, arg->bindex);
10602+ au_lcnt_dec(&br->br_nfiles);
10603+out:
10604+ return err;
10605+}
10606+
10607+struct do_test_empty_args {
10608+ int *errp;
10609+ struct dentry *dentry;
10610+ struct test_empty_arg *arg;
10611+};
10612+
10613+static void call_do_test_empty(void *args)
10614+{
10615+ struct do_test_empty_args *a = args;
10616+ *a->errp = do_test_empty(a->dentry, a->arg);
10617+}
10618+
10619+static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
10620+{
10621+ int err, wkq_err;
10622+ struct dentry *h_dentry;
10623+ struct inode *h_inode;
10624+ struct user_namespace *h_userns;
10625+
10626+ h_userns = au_sbr_userns(dentry->d_sb, arg->bindex);
10627+ h_dentry = au_h_dptr(dentry, arg->bindex);
10628+ h_inode = d_inode(h_dentry);
10629+ /* todo: i_mode changes anytime? */
10630+ inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
10631+ err = au_test_h_perm_sio(h_userns, h_inode, MAY_EXEC | MAY_READ);
10632+ inode_unlock_shared(h_inode);
10633+ if (!err)
10634+ err = do_test_empty(dentry, arg);
10635+ else {
10636+ struct do_test_empty_args args = {
10637+ .errp = &err,
10638+ .dentry = dentry,
10639+ .arg = arg
10640+ };
10641+ unsigned int flags = arg->flags;
10642+
10643+ wkq_err = au_wkq_wait(call_do_test_empty, &args);
10644+ if (unlikely(wkq_err))
10645+ err = wkq_err;
10646+ arg->flags = flags;
10647+ }
10648+
10649+ return err;
10650+}
10651+
10652+int au_test_empty_lower(struct dentry *dentry)
10653+{
10654+ int err;
10655+ unsigned int rdhash;
10656+ aufs_bindex_t bindex, btop, btail;
10657+ struct au_nhash whlist;
10658+ struct test_empty_arg arg = {
10659+ .ctx = {
10660+ .actor = test_empty_cb
10661+ }
10662+ };
10663+ int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
10664+
10665+ SiMustAnyLock(dentry->d_sb);
10666+
10667+ rdhash = au_sbi(dentry->d_sb)->si_rdhash;
10668+ if (!rdhash)
10669+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
10670+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
10671+ if (unlikely(err))
10672+ goto out;
10673+
10674+ arg.flags = 0;
10675+ arg.whlist = &whlist;
10676+ btop = au_dbtop(dentry);
10677+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10678+ au_fset_testempty(arg.flags, SHWH);
10679+ test_empty = do_test_empty;
10680+ if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
10681+ test_empty = sio_test_empty;
10682+ arg.bindex = btop;
10683+ err = test_empty(dentry, &arg);
10684+ if (unlikely(err))
10685+ goto out_whlist;
10686+
10687+ au_fset_testempty(arg.flags, WHONLY);
10688+ btail = au_dbtaildir(dentry);
10689+ for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
10690+ struct dentry *h_dentry;
10691+
10692+ h_dentry = au_h_dptr(dentry, bindex);
10693+ if (h_dentry && d_is_positive(h_dentry)) {
10694+ arg.bindex = bindex;
10695+ err = test_empty(dentry, &arg);
10696+ }
10697+ }
10698+
10699+out_whlist:
10700+ au_nhash_wh_free(&whlist);
10701+out:
10702+ return err;
10703+}
10704+
10705+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
10706+{
10707+ int err;
10708+ struct test_empty_arg arg = {
10709+ .ctx = {
10710+ .actor = test_empty_cb
10711+ }
10712+ };
10713+ aufs_bindex_t bindex, btail;
10714+
10715+ err = 0;
10716+ arg.whlist = whlist;
10717+ arg.flags = AuTestEmpty_WHONLY;
10718+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
10719+ au_fset_testempty(arg.flags, SHWH);
10720+ btail = au_dbtaildir(dentry);
10721+ for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
10722+ struct dentry *h_dentry;
10723+
10724+ h_dentry = au_h_dptr(dentry, bindex);
10725+ if (h_dentry && d_is_positive(h_dentry)) {
10726+ arg.bindex = bindex;
10727+ err = sio_test_empty(dentry, &arg);
10728+ }
10729+ }
10730+
10731+ return err;
10732+}
10733+
10734+/* ---------------------------------------------------------------------- */
10735+
10736+const struct file_operations aufs_dir_fop = {
10737+ .owner = THIS_MODULE,
10738+ .llseek = default_llseek,
10739+ .read = generic_read_dir,
10740+ .iterate_shared = aufs_iterate_shared,
10741+ .unlocked_ioctl = aufs_ioctl_dir,
10742+#ifdef CONFIG_COMPAT
10743+ .compat_ioctl = aufs_compat_ioctl_dir,
10744+#endif
10745+ .open = aufs_open_dir,
10746+ .release = aufs_release_dir,
10747+ .flush = aufs_flush_dir,
10748+ .fsync = aufs_fsync_dir
10749+};
10750diff -urN /usr/share/empty/fs/aufs/dir.h linux/fs/aufs/dir.h
10751--- /usr/share/empty/fs/aufs/dir.h 1970-01-01 01:00:00.000000000 +0100
10752+++ linux/fs/aufs/dir.h 2022-11-05 23:02:18.962555950 +0100
10753@@ -0,0 +1,134 @@
10754+/* SPDX-License-Identifier: GPL-2.0 */
10755+/*
10756+ * Copyright (C) 2005-2022 Junjiro R. Okajima
10757+ *
10758+ * This program is free software; you can redistribute it and/or modify
10759+ * it under the terms of the GNU General Public License as published by
10760+ * the Free Software Foundation; either version 2 of the License, or
10761+ * (at your option) any later version.
10762+ *
10763+ * This program is distributed in the hope that it will be useful,
10764+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10765+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10766+ * GNU General Public License for more details.
10767+ *
10768+ * You should have received a copy of the GNU General Public License
10769+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10770+ */
10771+
10772+/*
10773+ * directory operations
10774+ */
10775+
10776+#ifndef __AUFS_DIR_H__
10777+#define __AUFS_DIR_H__
10778+
10779+#ifdef __KERNEL__
10780+
10781+#include <linux/fs.h>
10782+
10783+/* ---------------------------------------------------------------------- */
10784+
10785+/* need to be faster and smaller */
10786+
10787+struct au_nhash {
10788+ unsigned int nh_num;
10789+ struct hlist_head *nh_head;
10790+};
10791+
10792+struct au_vdir_destr {
10793+ unsigned char len;
10794+ unsigned char name[];
10795+} __packed;
10796+
10797+struct au_vdir_dehstr {
10798+ struct hlist_node hash;
10799+ struct au_vdir_destr *str;
10800+ struct rcu_head rcu;
10801+} ____cacheline_aligned_in_smp;
10802+
10803+struct au_vdir_de {
10804+ ino_t de_ino;
10805+ unsigned char de_type;
10806+ /* caution: packed */
10807+ struct au_vdir_destr de_str;
10808+} __packed;
10809+
10810+struct au_vdir_wh {
10811+ struct hlist_node wh_hash;
10812+#ifdef CONFIG_AUFS_SHWH
10813+ ino_t wh_ino;
10814+ aufs_bindex_t wh_bindex;
10815+ unsigned char wh_type;
10816+#else
10817+ aufs_bindex_t wh_bindex;
10818+#endif
10819+ /* caution: packed */
10820+ struct au_vdir_destr wh_str;
10821+} __packed;
10822+
10823+union au_vdir_deblk_p {
10824+ unsigned char *deblk;
10825+ struct au_vdir_de *de;
10826+};
10827+
10828+struct au_vdir {
10829+ unsigned char **vd_deblk;
10830+ unsigned long vd_nblk;
10831+ struct {
10832+ unsigned long ul;
10833+ union au_vdir_deblk_p p;
10834+ } vd_last;
10835+
10836+ u64 vd_version;
10837+ unsigned int vd_deblk_sz;
10838+ unsigned long vd_jiffy;
10839+ struct rcu_head rcu;
10840+} ____cacheline_aligned_in_smp;
10841+
10842+/* ---------------------------------------------------------------------- */
10843+
10844+/* dir.c */
10845+extern const struct file_operations aufs_dir_fop;
10846+void au_add_nlink(struct inode *dir, struct inode *h_dir);
10847+void au_sub_nlink(struct inode *dir, struct inode *h_dir);
10848+loff_t au_dir_size(struct file *file, struct dentry *dentry);
10849+void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
10850+int au_test_empty_lower(struct dentry *dentry);
10851+int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
10852+
10853+/* vdir.c */
10854+unsigned int au_rdhash_est(loff_t sz);
10855+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
10856+void au_nhash_wh_free(struct au_nhash *whlist);
10857+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
10858+ int limit);
10859+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
10860+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
10861+ unsigned int d_type, aufs_bindex_t bindex,
10862+ unsigned char shwh);
10863+void au_vdir_free(struct au_vdir *vdir);
10864+int au_vdir_init(struct file *file);
10865+int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
10866+
10867+/* ioctl.c */
10868+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
10869+
10870+#ifdef CONFIG_AUFS_RDU
10871+/* rdu.c */
10872+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
10873+#ifdef CONFIG_COMPAT
10874+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
10875+ unsigned long arg);
10876+#endif
10877+#else
10878+AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
10879+ unsigned int cmd, unsigned long arg)
10880+#ifdef CONFIG_COMPAT
10881+AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
10882+ unsigned int cmd, unsigned long arg)
10883+#endif
10884+#endif
10885+
10886+#endif /* __KERNEL__ */
10887+#endif /* __AUFS_DIR_H__ */
10888diff -urN /usr/share/empty/fs/aufs/dirren.c linux/fs/aufs/dirren.c
10889--- /usr/share/empty/fs/aufs/dirren.c 1970-01-01 01:00:00.000000000 +0100
10890+++ linux/fs/aufs/dirren.c 2022-11-05 23:02:18.962555950 +0100
10891@@ -0,0 +1,1315 @@
10892+// SPDX-License-Identifier: GPL-2.0
10893+/*
10894+ * Copyright (C) 2017-2022 Junjiro R. Okajima
10895+ *
10896+ * This program is free software; you can redistribute it and/or modify
10897+ * it under the terms of the GNU General Public License as published by
10898+ * the Free Software Foundation; either version 2 of the License, or
10899+ * (at your option) any later version.
10900+ *
10901+ * This program is distributed in the hope that it will be useful,
10902+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
10903+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10904+ * GNU General Public License for more details.
10905+ *
10906+ * You should have received a copy of the GNU General Public License
10907+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
10908+ */
10909+
10910+/*
10911+ * special handling in renaming a directory
10912+ * in order to support looking-up the before-renamed name on the lower readonly
10913+ * branches
10914+ */
10915+
10916+#include <linux/byteorder/generic.h>
10917+#include "aufs.h"
10918+
10919+static void au_dr_hino_del(struct au_dr_br *dr, struct au_dr_hino *ent)
10920+{
10921+ int idx;
10922+
10923+ idx = au_dr_ihash(ent->dr_h_ino);
10924+ au_hbl_del(&ent->dr_hnode, dr->dr_h_ino + idx);
10925+}
10926+
10927+static int au_dr_hino_test_empty(struct au_dr_br *dr)
10928+{
10929+ int ret, i;
10930+ struct hlist_bl_head *hbl;
10931+
10932+ ret = 1;
10933+ for (i = 0; ret && i < AuDirren_NHASH; i++) {
10934+ hbl = dr->dr_h_ino + i;
10935+ hlist_bl_lock(hbl);
10936+ ret &= hlist_bl_empty(hbl);
10937+ hlist_bl_unlock(hbl);
10938+ }
10939+
10940+ return ret;
10941+}
10942+
10943+static struct au_dr_hino *au_dr_hino_find(struct au_dr_br *dr, ino_t ino)
10944+{
10945+ struct au_dr_hino *found, *ent;
10946+ struct hlist_bl_head *hbl;
10947+ struct hlist_bl_node *pos;
10948+ int idx;
10949+
10950+ found = NULL;
10951+ idx = au_dr_ihash(ino);
10952+ hbl = dr->dr_h_ino + idx;
10953+ hlist_bl_lock(hbl);
10954+ hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
10955+ if (ent->dr_h_ino == ino) {
10956+ found = ent;
10957+ break;
10958+ }
10959+ hlist_bl_unlock(hbl);
10960+
10961+ return found;
10962+}
10963+
10964+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t ino,
10965+ struct au_dr_hino *add_ent)
10966+{
10967+ int found, idx;
10968+ struct hlist_bl_head *hbl;
10969+ struct hlist_bl_node *pos;
10970+ struct au_dr_hino *ent;
10971+
10972+ found = 0;
10973+ idx = au_dr_ihash(ino);
10974+ hbl = dr->dr_h_ino + idx;
10975+#if 0 /* debug print */
10976+ {
10977+ struct hlist_bl_node *tmp;
10978+
10979+ hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
10980+ AuDbg("hi%llu\n", (unsigned long long)ent->dr_h_ino);
10981+ }
10982+#endif
10983+ hlist_bl_lock(hbl);
10984+ hlist_bl_for_each_entry(ent, pos, hbl, dr_hnode)
10985+ if (ent->dr_h_ino == ino) {
10986+ found = 1;
10987+ break;
10988+ }
10989+ if (!found && add_ent)
10990+ hlist_bl_add_head(&add_ent->dr_hnode, hbl);
10991+ hlist_bl_unlock(hbl);
10992+
10993+ if (!found && add_ent)
10994+ AuDbg("i%llu added\n", (unsigned long long)add_ent->dr_h_ino);
10995+
10996+ return found;
10997+}
10998+
10999+void au_dr_hino_free(struct au_dr_br *dr)
11000+{
11001+ int i;
11002+ struct hlist_bl_head *hbl;
11003+ struct hlist_bl_node *pos, *tmp;
11004+ struct au_dr_hino *ent;
11005+
11006+ /* SiMustWriteLock(sb); */
11007+
11008+ for (i = 0; i < AuDirren_NHASH; i++) {
11009+ hbl = dr->dr_h_ino + i;
11010+ /* no spinlock since sbinfo must be write-locked */
11011+ hlist_bl_for_each_entry_safe(ent, pos, tmp, hbl, dr_hnode)
11012+ au_kfree_rcu(ent);
11013+ INIT_HLIST_BL_HEAD(hbl);
11014+ }
11015+}
11016+
11017+/* returns the number of inodes or an error */
11018+static int au_dr_hino_store(struct super_block *sb, struct au_branch *br,
11019+ struct file *hinofile)
11020+{
11021+ int err, i;
11022+ ssize_t ssz;
11023+ loff_t pos, oldsize;
11024+ __be64 u64;
11025+ struct inode *hinoinode;
11026+ struct hlist_bl_head *hbl;
11027+ struct hlist_bl_node *n1, *n2;
11028+ struct au_dr_hino *ent;
11029+
11030+ SiMustWriteLock(sb);
11031+ AuDebugOn(!au_br_writable(br->br_perm));
11032+
11033+ hinoinode = file_inode(hinofile);
11034+ oldsize = i_size_read(hinoinode);
11035+
11036+ err = 0;
11037+ pos = 0;
11038+ hbl = br->br_dirren.dr_h_ino;
11039+ for (i = 0; !err && i < AuDirren_NHASH; i++, hbl++) {
11040+ /* no bit-lock since sbinfo must be write-locked */
11041+ hlist_bl_for_each_entry_safe(ent, n1, n2, hbl, dr_hnode) {
11042+ AuDbg("hi%llu, %pD2\n",
11043+ (unsigned long long)ent->dr_h_ino, hinofile);
11044+ u64 = cpu_to_be64(ent->dr_h_ino);
11045+ ssz = vfsub_write_k(hinofile, &u64, sizeof(u64), &pos);
11046+ if (ssz == sizeof(u64))
11047+ continue;
11048+
11049+ /* write error */
11050+ pr_err("ssz %zd, %pD2\n", ssz, hinofile);
11051+ err = -ENOSPC;
11052+ if (ssz < 0)
11053+ err = ssz;
11054+ break;
11055+ }
11056+ }
11057+ /* regardless the error */
11058+ if (pos < oldsize) {
11059+ err = vfsub_trunc(&hinofile->f_path, pos, /*attr*/0, hinofile);
11060+ AuTraceErr(err);
11061+ }
11062+
11063+ AuTraceErr(err);
11064+ return err;
11065+}
11066+
11067+static int au_dr_hino_load(struct au_dr_br *dr, struct file *hinofile)
11068+{
11069+ int err, hidx;
11070+ ssize_t ssz;
11071+ size_t sz, n;
11072+ loff_t pos;
11073+ uint64_t u64;
11074+ struct au_dr_hino *ent;
11075+ struct inode *hinoinode;
11076+ struct hlist_bl_head *hbl;
11077+
11078+ err = 0;
11079+ pos = 0;
11080+ hbl = dr->dr_h_ino;
11081+ hinoinode = file_inode(hinofile);
11082+ sz = i_size_read(hinoinode);
11083+ AuDebugOn(sz % sizeof(u64));
11084+ n = sz / sizeof(u64);
11085+ while (n--) {
11086+ ssz = vfsub_read_k(hinofile, &u64, sizeof(u64), &pos);
11087+ if (unlikely(ssz != sizeof(u64))) {
11088+ pr_err("ssz %zd, %pD2\n", ssz, hinofile);
11089+ err = -EINVAL;
11090+ if (ssz < 0)
11091+ err = ssz;
11092+ goto out_free;
11093+ }
11094+
11095+ ent = kmalloc(sizeof(*ent), GFP_NOFS);
11096+ if (!ent) {
11097+ err = -ENOMEM;
11098+ AuTraceErr(err);
11099+ goto out_free;
11100+ }
11101+ ent->dr_h_ino = be64_to_cpu((__force __be64)u64);
11102+ AuDbg("hi%llu, %pD2\n",
11103+ (unsigned long long)ent->dr_h_ino, hinofile);
11104+ hidx = au_dr_ihash(ent->dr_h_ino);
11105+ au_hbl_add(&ent->dr_hnode, hbl + hidx);
11106+ }
11107+ goto out; /* success */
11108+
11109+out_free:
11110+ au_dr_hino_free(dr);
11111+out:
11112+ AuTraceErr(err);
11113+ return err;
11114+}
11115+
11116+/*
11117+ * @bindex/@br is a switch to distinguish whether suspending hnotify or not.
11118+ * @path is a switch to distinguish load and store.
11119+ */
11120+static int au_dr_hino(struct super_block *sb, aufs_bindex_t bindex,
11121+ struct au_branch *br, const struct path *path)
11122+{
11123+ int err, flags;
11124+ unsigned char load, suspend;
11125+ struct file *hinofile;
11126+ struct au_hinode *hdir;
11127+ struct inode *dir, *delegated;
11128+ struct path hinopath;
11129+ struct qstr hinoname = QSTR_INIT(AUFS_WH_DR_BRHINO,
11130+ sizeof(AUFS_WH_DR_BRHINO) - 1);
11131+
11132+ AuDebugOn(bindex < 0 && !br);
11133+ AuDebugOn(bindex >= 0 && br);
11134+
11135+ err = -EINVAL;
11136+ suspend = !br;
11137+ if (suspend)
11138+ br = au_sbr(sb, bindex);
11139+ load = !!path;
11140+ if (!load) {
11141+ path = &br->br_path;
11142+ AuDebugOn(!au_br_writable(br->br_perm));
11143+ if (unlikely(!au_br_writable(br->br_perm)))
11144+ goto out;
11145+ }
11146+
11147+ hdir = NULL;
11148+ if (suspend) {
11149+ dir = d_inode(sb->s_root);
11150+ hdir = au_hinode(au_ii(dir), bindex);
11151+ dir = hdir->hi_inode;
11152+ au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
11153+ } else {
11154+ dir = d_inode(path->dentry);
11155+ inode_lock_nested(dir, AuLsc_I_CHILD);
11156+ }
11157+ hinopath.mnt = path->mnt;
11158+ hinopath.dentry = vfsub_lkup_one(&hinoname, (struct path *)path);
11159+ err = PTR_ERR(hinopath.dentry);
11160+ if (IS_ERR(hinopath.dentry))
11161+ goto out_unlock;
11162+
11163+ err = 0;
11164+ flags = O_RDONLY;
11165+ if (load) {
11166+ if (d_is_negative(hinopath.dentry))
11167+ goto out_dput; /* success */
11168+ } else {
11169+ if (au_dr_hino_test_empty(&br->br_dirren)) {
11170+ if (d_is_positive(hinopath.dentry)) {
11171+ delegated = NULL;
11172+ err = vfsub_unlink(dir, &hinopath, &delegated,
11173+ /*force*/0);
11174+ AuTraceErr(err);
11175+ if (unlikely(err))
11176+ pr_err("ignored err %d, %pd2\n",
11177+ err, hinopath.dentry);
11178+ if (unlikely(err == -EWOULDBLOCK))
11179+ iput(delegated);
11180+ err = 0;
11181+ }
11182+ goto out_dput;
11183+ } else if (!d_is_positive(hinopath.dentry)) {
11184+ err = vfsub_create(dir, &hinopath, 0600,
11185+ /*want_excl*/false);
11186+ AuTraceErr(err);
11187+ if (unlikely(err))
11188+ goto out_dput;
11189+ }
11190+ flags = O_WRONLY;
11191+ }
11192+ hinofile = vfsub_dentry_open(&hinopath, flags);
11193+ if (suspend)
11194+ au_hn_inode_unlock(hdir);
11195+ else
11196+ inode_unlock(dir);
11197+ dput(hinopath.dentry);
11198+ AuTraceErrPtr(hinofile);
11199+ if (IS_ERR(hinofile)) {
11200+ err = PTR_ERR(hinofile);
11201+ goto out;
11202+ }
11203+
11204+ if (load)
11205+ err = au_dr_hino_load(&br->br_dirren, hinofile);
11206+ else
11207+ err = au_dr_hino_store(sb, br, hinofile);
11208+ fput(hinofile);
11209+ goto out;
11210+
11211+out_dput:
11212+ dput(hinopath.dentry);
11213+out_unlock:
11214+ if (suspend)
11215+ au_hn_inode_unlock(hdir);
11216+ else
11217+ inode_unlock(dir);
11218+out:
11219+ AuTraceErr(err);
11220+ return err;
11221+}
11222+
11223+/* ---------------------------------------------------------------------- */
11224+
11225+static int au_dr_brid_init(struct au_dr_brid *brid, const struct path *path)
11226+{
11227+ int err;
11228+ struct kstatfs kstfs;
11229+ dev_t dev;
11230+ struct dentry *dentry;
11231+ struct super_block *sb;
11232+
11233+ err = vfs_statfs((void *)path, &kstfs);
11234+ AuTraceErr(err);
11235+ if (unlikely(err))
11236+ goto out;
11237+
11238+ /* todo: support for UUID */
11239+
11240+ if (kstfs.f_fsid.val[0] || kstfs.f_fsid.val[1]) {
11241+ brid->type = AuBrid_FSID;
11242+ brid->fsid = kstfs.f_fsid;
11243+ } else {
11244+ dentry = path->dentry;
11245+ sb = dentry->d_sb;
11246+ dev = sb->s_dev;
11247+ if (dev) {
11248+ brid->type = AuBrid_DEV;
11249+ brid->dev = dev;
11250+ }
11251+ }
11252+
11253+out:
11254+ return err;
11255+}
11256+
11257+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
11258+ const struct path *path)
11259+{
11260+ int err, i;
11261+ struct au_dr_br *dr;
11262+ struct hlist_bl_head *hbl;
11263+
11264+ dr = &br->br_dirren;
11265+ hbl = dr->dr_h_ino;
11266+ for (i = 0; i < AuDirren_NHASH; i++, hbl++)
11267+ INIT_HLIST_BL_HEAD(hbl);
11268+
11269+ err = au_dr_brid_init(&dr->dr_brid, path);
11270+ if (unlikely(err))
11271+ goto out;
11272+
11273+ if (au_opt_test(au_mntflags(sb), DIRREN))
11274+ err = au_dr_hino(sb, /*bindex*/-1, br, path);
11275+
11276+out:
11277+ AuTraceErr(err);
11278+ return err;
11279+}
11280+
11281+int au_dr_br_fin(struct super_block *sb, struct au_branch *br)
11282+{
11283+ int err;
11284+
11285+ err = 0;
11286+ if (au_br_writable(br->br_perm))
11287+ err = au_dr_hino(sb, /*bindex*/-1, br, /*path*/NULL);
11288+ if (!err)
11289+ au_dr_hino_free(&br->br_dirren);
11290+
11291+ return err;
11292+}
11293+
11294+/* ---------------------------------------------------------------------- */
11295+
11296+static int au_brid_str(struct au_dr_brid *brid, struct inode *h_inode,
11297+ char *buf, size_t sz)
11298+{
11299+ int err;
11300+ unsigned int major, minor;
11301+ char *p;
11302+
11303+ p = buf;
11304+ err = snprintf(p, sz, "%d_", brid->type);
11305+ AuDebugOn(err > sz);
11306+ p += err;
11307+ sz -= err;
11308+ switch (brid->type) {
11309+ case AuBrid_Unset:
11310+ return -EINVAL;
11311+ case AuBrid_UUID:
11312+ err = snprintf(p, sz, "%pU", brid->uuid.b);
11313+ break;
11314+ case AuBrid_FSID:
11315+ err = snprintf(p, sz, "%08x-%08x",
11316+ brid->fsid.val[0], brid->fsid.val[1]);
11317+ break;
11318+ case AuBrid_DEV:
11319+ major = MAJOR(brid->dev);
11320+ minor = MINOR(brid->dev);
11321+ if (major <= 0xff && minor <= 0xff)
11322+ err = snprintf(p, sz, "%02x%02x", major, minor);
11323+ else
11324+ err = snprintf(p, sz, "%03x:%05x", major, minor);
11325+ break;
11326+ }
11327+ AuDebugOn(err > sz);
11328+ p += err;
11329+ sz -= err;
11330+ err = snprintf(p, sz, "_%llu", (unsigned long long)h_inode->i_ino);
11331+ AuDebugOn(err > sz);
11332+ p += err;
11333+ sz -= err;
11334+
11335+ return p - buf;
11336+}
11337+
11338+static int au_drinfo_name(struct au_branch *br, char *name, int len)
11339+{
11340+ int rlen;
11341+ struct dentry *br_dentry;
11342+ struct inode *br_inode;
11343+
11344+ br_dentry = au_br_dentry(br);
11345+ br_inode = d_inode(br_dentry);
11346+ rlen = au_brid_str(&br->br_dirren.dr_brid, br_inode, name, len);
11347+ AuDebugOn(rlen >= AUFS_DIRREN_ENV_VAL_SZ);
11348+ AuDebugOn(rlen > len);
11349+
11350+ return rlen;
11351+}
11352+
11353+/* ---------------------------------------------------------------------- */
11354+
11355+/*
11356+ * from the given @h_dentry, construct drinfo at @*fdata.
11357+ * when the size of @*fdata is not enough, reallocate and return new @fdata and
11358+ * @allocated.
11359+ */
11360+static int au_drinfo_construct(struct au_drinfo_fdata **fdata,
11361+ struct dentry *h_dentry,
11362+ unsigned char *allocated)
11363+{
11364+ int err, v;
11365+ struct au_drinfo_fdata *f, *p;
11366+ struct au_drinfo *drinfo;
11367+ struct inode *h_inode;
11368+ struct qstr *qname;
11369+
11370+ err = 0;
11371+ f = *fdata;
11372+ h_inode = d_inode(h_dentry);
11373+ qname = &h_dentry->d_name;
11374+ drinfo = &f->drinfo;
11375+ drinfo->ino = (__force uint64_t)cpu_to_be64(h_inode->i_ino);
11376+ drinfo->oldnamelen = qname->len;
11377+ if (*allocated < sizeof(*f) + qname->len) {
11378+ v = roundup_pow_of_two(*allocated + qname->len);
11379+ p = au_krealloc(f, v, GFP_NOFS, /*may_shrink*/0);
11380+ if (unlikely(!p)) {
11381+ err = -ENOMEM;
11382+ AuTraceErr(err);
11383+ goto out;
11384+ }
11385+ f = p;
11386+ *fdata = f;
11387+ *allocated = v;
11388+ drinfo = &f->drinfo;
11389+ }
11390+ memcpy(drinfo->oldname, qname->name, qname->len);
11391+ AuDbg("i%llu, %.*s\n",
11392+ be64_to_cpu((__force __be64)drinfo->ino), drinfo->oldnamelen,
11393+ drinfo->oldname);
11394+
11395+out:
11396+ AuTraceErr(err);
11397+ return err;
11398+}
11399+
11400+/* callers have to free the return value */
11401+static struct au_drinfo *au_drinfo_read_k(struct file *file, ino_t h_ino)
11402+{
11403+ struct au_drinfo *ret, *drinfo;
11404+ struct au_drinfo_fdata fdata;
11405+ int len;
11406+ loff_t pos;
11407+ ssize_t ssz;
11408+
11409+ ret = ERR_PTR(-EIO);
11410+ pos = 0;
11411+ ssz = vfsub_read_k(file, &fdata, sizeof(fdata), &pos);
11412+ if (unlikely(ssz != sizeof(fdata))) {
11413+ AuIOErr("ssz %zd, %u, %pD2\n",
11414+ ssz, (unsigned int)sizeof(fdata), file);
11415+ goto out;
11416+ }
11417+
11418+ fdata.magic = ntohl((__force __be32)fdata.magic);
11419+ switch (fdata.magic) {
11420+ case AUFS_DRINFO_MAGIC_V1:
11421+ break;
11422+ default:
11423+ AuIOErr("magic-num 0x%x, 0x%x, %pD2\n",
11424+ fdata.magic, AUFS_DRINFO_MAGIC_V1, file);
11425+ goto out;
11426+ }
11427+
11428+ drinfo = &fdata.drinfo;
11429+ len = drinfo->oldnamelen;
11430+ if (!len) {
11431+ AuIOErr("broken drinfo %pD2\n", file);
11432+ goto out;
11433+ }
11434+
11435+ ret = NULL;
11436+ drinfo->ino = be64_to_cpu((__force __be64)drinfo->ino);
11437+ if (unlikely(h_ino && drinfo->ino != h_ino)) {
11438+ AuDbg("ignored i%llu, i%llu, %pD2\n",
11439+ (unsigned long long)drinfo->ino,
11440+ (unsigned long long)h_ino, file);
11441+ goto out; /* success */
11442+ }
11443+
11444+ ret = kmalloc(sizeof(*ret) + len, GFP_NOFS);
11445+ if (unlikely(!ret)) {
11446+ ret = ERR_PTR(-ENOMEM);
11447+ AuTraceErrPtr(ret);
11448+ goto out;
11449+ }
11450+
11451+ *ret = *drinfo;
11452+ ssz = vfsub_read_k(file, (void *)ret->oldname, len, &pos);
11453+ if (unlikely(ssz != len)) {
11454+ au_kfree_rcu(ret);
11455+ ret = ERR_PTR(-EIO);
11456+ AuIOErr("ssz %zd, %u, %pD2\n", ssz, len, file);
11457+ goto out;
11458+ }
11459+
11460+ AuDbg("oldname %.*s\n", ret->oldnamelen, ret->oldname);
11461+
11462+out:
11463+ return ret;
11464+}
11465+
11466+/* ---------------------------------------------------------------------- */
11467+
11468+/* in order to be revertible */
11469+struct au_drinfo_rev_elm {
11470+ int created;
11471+ struct dentry *info_dentry;
11472+ struct au_drinfo *info_last;
11473+};
11474+
11475+struct au_drinfo_rev {
11476+ unsigned char already;
11477+ aufs_bindex_t nelm;
11478+ struct au_drinfo_rev_elm elm[];
11479+};
11480+
11481+/* todo: isn't it too large? */
11482+struct au_drinfo_store {
11483+ struct path h_ppath;
11484+ struct dentry *h_dentry;
11485+ struct au_drinfo_fdata *fdata;
11486+ char *infoname; /* inside of whname, just after PFX */
11487+ char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ];
11488+ aufs_bindex_t btgt, btail;
11489+ unsigned char no_sio,
11490+ allocated, /* current size of *fdata */
11491+ infonamelen, /* room size for p */
11492+ whnamelen, /* length of the generated name */
11493+ renameback; /* renamed back */
11494+};
11495+
11496+/* on rename(2) error, the caller should revert it using @elm */
11497+static int au_drinfo_do_store(struct au_drinfo_store *w,
11498+ struct au_drinfo_rev_elm *elm)
11499+{
11500+ int err, len;
11501+ ssize_t ssz;
11502+ loff_t pos;
11503+ struct path infopath = {
11504+ .mnt = w->h_ppath.mnt
11505+ };
11506+ struct inode *h_dir, *h_inode, *delegated;
11507+ struct file *infofile;
11508+ struct qstr *qname;
11509+
11510+ AuDebugOn(elm
11511+ && memcmp(elm, page_address(ZERO_PAGE(0)), sizeof(*elm)));
11512+
11513+ infopath.dentry = vfsub_lookup_one_len(w->whname, &w->h_ppath,
11514+ w->whnamelen);
11515+ AuTraceErrPtr(infopath.dentry);
11516+ if (IS_ERR(infopath.dentry)) {
11517+ err = PTR_ERR(infopath.dentry);
11518+ goto out;
11519+ }
11520+
11521+ err = 0;
11522+ h_dir = d_inode(w->h_ppath.dentry);
11523+ if (elm && d_is_negative(infopath.dentry)) {
11524+ err = vfsub_create(h_dir, &infopath, 0600, /*want_excl*/true);
11525+ AuTraceErr(err);
11526+ if (unlikely(err))
11527+ goto out_dput;
11528+ elm->created = 1;
11529+ elm->info_dentry = dget(infopath.dentry);
11530+ }
11531+
11532+ infofile = vfsub_dentry_open(&infopath, O_RDWR);
11533+ AuTraceErrPtr(infofile);
11534+ if (IS_ERR(infofile)) {
11535+ err = PTR_ERR(infofile);
11536+ goto out_dput;
11537+ }
11538+
11539+ h_inode = d_inode(infopath.dentry);
11540+ if (elm && i_size_read(h_inode)) {
11541+ h_inode = d_inode(w->h_dentry);
11542+ elm->info_last = au_drinfo_read_k(infofile, h_inode->i_ino);
11543+ AuTraceErrPtr(elm->info_last);
11544+ if (IS_ERR(elm->info_last)) {
11545+ err = PTR_ERR(elm->info_last);
11546+ elm->info_last = NULL;
11547+ AuDebugOn(elm->info_dentry);
11548+ goto out_fput;
11549+ }
11550+ }
11551+
11552+ if (elm && w->renameback) {
11553+ delegated = NULL;
11554+ err = vfsub_unlink(h_dir, &infopath, &delegated, /*force*/0);
11555+ AuTraceErr(err);
11556+ if (unlikely(err == -EWOULDBLOCK))
11557+ iput(delegated);
11558+ goto out_fput;
11559+ }
11560+
11561+ pos = 0;
11562+ qname = &w->h_dentry->d_name;
11563+ len = sizeof(*w->fdata) + qname->len;
11564+ if (!elm)
11565+ len = sizeof(*w->fdata) + w->fdata->drinfo.oldnamelen;
11566+ ssz = vfsub_write_k(infofile, w->fdata, len, &pos);
11567+ if (ssz == len) {
11568+ AuDbg("hi%llu, %.*s\n", w->fdata->drinfo.ino,
11569+ w->fdata->drinfo.oldnamelen, w->fdata->drinfo.oldname);
11570+ goto out_fput; /* success */
11571+ } else {
11572+ err = -EIO;
11573+ if (ssz < 0)
11574+ err = ssz;
11575+ /* the caller should revert it using @elm */
11576+ }
11577+
11578+out_fput:
11579+ fput(infofile);
11580+out_dput:
11581+ dput(infopath.dentry);
11582+out:
11583+ AuTraceErr(err);
11584+ return err;
11585+}
11586+
11587+struct au_call_drinfo_do_store_args {
11588+ int *errp;
11589+ struct au_drinfo_store *w;
11590+ struct au_drinfo_rev_elm *elm;
11591+};
11592+
11593+static void au_call_drinfo_do_store(void *args)
11594+{
11595+ struct au_call_drinfo_do_store_args *a = args;
11596+
11597+ *a->errp = au_drinfo_do_store(a->w, a->elm);
11598+}
11599+
11600+static int au_drinfo_store_sio(struct au_drinfo_store *w,
11601+ struct au_drinfo_rev_elm *elm)
11602+{
11603+ int err, wkq_err;
11604+
11605+ if (w->no_sio)
11606+ err = au_drinfo_do_store(w, elm);
11607+ else {
11608+ struct au_call_drinfo_do_store_args a = {
11609+ .errp = &err,
11610+ .w = w,
11611+ .elm = elm
11612+ };
11613+ wkq_err = au_wkq_wait(au_call_drinfo_do_store, &a);
11614+ if (unlikely(wkq_err))
11615+ err = wkq_err;
11616+ }
11617+ AuTraceErr(err);
11618+
11619+ return err;
11620+}
11621+
11622+static int au_drinfo_store_work_init(struct au_drinfo_store *w,
11623+ aufs_bindex_t btgt)
11624+{
11625+ int err;
11626+
11627+ memset(w, 0, sizeof(*w));
11628+ w->allocated = roundup_pow_of_two(sizeof(*w->fdata) + 40);
11629+ strcpy(w->whname, AUFS_WH_DR_INFO_PFX);
11630+ w->infoname = w->whname + sizeof(AUFS_WH_DR_INFO_PFX) - 1;
11631+ w->infonamelen = sizeof(w->whname) - sizeof(AUFS_WH_DR_INFO_PFX);
11632+ w->btgt = btgt;
11633+ w->no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
11634+
11635+ err = -ENOMEM;
11636+ w->fdata = kcalloc(1, w->allocated, GFP_NOFS);
11637+ if (unlikely(!w->fdata)) {
11638+ AuTraceErr(err);
11639+ goto out;
11640+ }
11641+ w->fdata->magic = (__force uint32_t)htonl(AUFS_DRINFO_MAGIC_V1);
11642+ err = 0;
11643+
11644+out:
11645+ return err;
11646+}
11647+
11648+static void au_drinfo_store_work_fin(struct au_drinfo_store *w)
11649+{
11650+ au_kfree_rcu(w->fdata);
11651+}
11652+
11653+static void au_drinfo_store_rev(struct au_drinfo_rev *rev,
11654+ struct au_drinfo_store *w)
11655+{
11656+ struct au_drinfo_rev_elm *elm;
11657+ struct inode *h_dir, *delegated;
11658+ int err, nelm;
11659+ struct path infopath = {
11660+ .mnt = w->h_ppath.mnt
11661+ };
11662+
11663+ h_dir = d_inode(w->h_ppath.dentry);
11664+ IMustLock(h_dir);
11665+
11666+ err = 0;
11667+ elm = rev->elm;
11668+ for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
11669+ AuDebugOn(elm->created && elm->info_last);
11670+ if (elm->created) {
11671+ AuDbg("here\n");
11672+ delegated = NULL;
11673+ infopath.dentry = elm->info_dentry;
11674+ err = vfsub_unlink(h_dir, &infopath, &delegated,
11675+ !w->no_sio);
11676+ AuTraceErr(err);
11677+ if (unlikely(err == -EWOULDBLOCK))
11678+ iput(delegated);
11679+ dput(elm->info_dentry);
11680+ } else if (elm->info_last) {
11681+ AuDbg("here\n");
11682+ w->fdata->drinfo = *elm->info_last;
11683+ memcpy(w->fdata->drinfo.oldname,
11684+ elm->info_last->oldname,
11685+ elm->info_last->oldnamelen);
11686+ err = au_drinfo_store_sio(w, /*elm*/NULL);
11687+ au_kfree_rcu(elm->info_last);
11688+ }
11689+ if (unlikely(err))
11690+ AuIOErr("%d, %s\n", err, w->whname);
11691+ /* go on even if err */
11692+ }
11693+}
11694+
11695+/* caller has to call au_dr_rename_fin() later */
11696+static int au_drinfo_store(struct dentry *dentry, aufs_bindex_t btgt,
11697+ struct qstr *dst_name, void *_rev)
11698+{
11699+ int err, sz, nelm;
11700+ aufs_bindex_t bindex, btail;
11701+ struct au_drinfo_store work;
11702+ struct au_drinfo_rev *rev, **p;
11703+ struct au_drinfo_rev_elm *elm;
11704+ struct super_block *sb;
11705+ struct au_branch *br;
11706+ struct au_hinode *hdir;
11707+
11708+ err = au_drinfo_store_work_init(&work, btgt);
11709+ AuTraceErr(err);
11710+ if (unlikely(err))
11711+ goto out;
11712+
11713+ err = -ENOMEM;
11714+ btail = au_dbtaildir(dentry);
11715+ nelm = btail - btgt;
11716+ sz = sizeof(*rev) + sizeof(*elm) * nelm;
11717+ rev = kcalloc(1, sz, GFP_NOFS);
11718+ if (unlikely(!rev)) {
11719+ AuTraceErr(err);
11720+ goto out_args;
11721+ }
11722+ rev->nelm = nelm;
11723+ elm = rev->elm;
11724+ p = _rev;
11725+ *p = rev;
11726+
11727+ err = 0;
11728+ sb = dentry->d_sb;
11729+ work.h_ppath.dentry = au_h_dptr(dentry, btgt);
11730+ work.h_ppath.mnt = au_sbr_mnt(sb, btgt);
11731+ hdir = au_hi(d_inode(dentry), btgt);
11732+ au_hn_inode_lock_nested(hdir, AuLsc_I_CHILD);
11733+ for (bindex = btgt + 1; bindex <= btail; bindex++, elm++) {
11734+ work.h_dentry = au_h_dptr(dentry, bindex);
11735+ if (!work.h_dentry)
11736+ continue;
11737+
11738+ err = au_drinfo_construct(&work.fdata, work.h_dentry,
11739+ &work.allocated);
11740+ AuTraceErr(err);
11741+ if (unlikely(err))
11742+ break;
11743+
11744+ work.renameback = au_qstreq(&work.h_dentry->d_name, dst_name);
11745+ br = au_sbr(sb, bindex);
11746+ work.whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
11747+ work.whnamelen += au_drinfo_name(br, work.infoname,
11748+ work.infonamelen);
11749+ AuDbg("whname %.*s, i%llu, %.*s\n",
11750+ work.whnamelen, work.whname,
11751+ be64_to_cpu((__force __be64)work.fdata->drinfo.ino),
11752+ work.fdata->drinfo.oldnamelen,
11753+ work.fdata->drinfo.oldname);
11754+
11755+ err = au_drinfo_store_sio(&work, elm);
11756+ AuTraceErr(err);
11757+ if (unlikely(err))
11758+ break;
11759+ }
11760+ if (unlikely(err)) {
11761+ /* revert all drinfo */
11762+ au_drinfo_store_rev(rev, &work);
11763+ au_kfree_try_rcu(rev);
11764+ *p = NULL;
11765+ }
11766+ au_hn_inode_unlock(hdir);
11767+
11768+out_args:
11769+ au_drinfo_store_work_fin(&work);
11770+out:
11771+ return err;
11772+}
11773+
11774+/* ---------------------------------------------------------------------- */
11775+
11776+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
11777+ struct qstr *dst_name, void *_rev)
11778+{
11779+ int err, already;
11780+ ino_t ino;
11781+ struct super_block *sb;
11782+ struct au_branch *br;
11783+ struct au_dr_br *dr;
11784+ struct dentry *h_dentry;
11785+ struct inode *h_inode;
11786+ struct au_dr_hino *ent;
11787+ struct au_drinfo_rev *rev, **p;
11788+
11789+ AuDbg("bindex %d\n", bindex);
11790+
11791+ err = -ENOMEM;
11792+ ent = kmalloc(sizeof(*ent), GFP_NOFS);
11793+ if (unlikely(!ent))
11794+ goto out;
11795+
11796+ sb = src->d_sb;
11797+ br = au_sbr(sb, bindex);
11798+ dr = &br->br_dirren;
11799+ h_dentry = au_h_dptr(src, bindex);
11800+ h_inode = d_inode(h_dentry);
11801+ ino = h_inode->i_ino;
11802+ ent->dr_h_ino = ino;
11803+ already = au_dr_hino_test_add(dr, ino, ent);
11804+ AuDbg("b%d, hi%llu, already %d\n",
11805+ bindex, (unsigned long long)ino, already);
11806+
11807+ err = au_drinfo_store(src, bindex, dst_name, _rev);
11808+ AuTraceErr(err);
11809+ if (!err) {
11810+ p = _rev;
11811+ rev = *p;
11812+ rev->already = already;
11813+ goto out; /* success */
11814+ }
11815+
11816+ /* revert */
11817+ if (!already)
11818+ au_dr_hino_del(dr, ent);
11819+ au_kfree_rcu(ent);
11820+
11821+out:
11822+ AuTraceErr(err);
11823+ return err;
11824+}
11825+
11826+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *_rev)
11827+{
11828+ struct au_drinfo_rev *rev;
11829+ struct au_drinfo_rev_elm *elm;
11830+ int nelm;
11831+
11832+ rev = _rev;
11833+ elm = rev->elm;
11834+ for (nelm = rev->nelm; nelm > 0; nelm--, elm++) {
11835+ dput(elm->info_dentry);
11836+ au_kfree_rcu(elm->info_last);
11837+ }
11838+ au_kfree_try_rcu(rev);
11839+}
11840+
11841+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t btgt, void *_rev)
11842+{
11843+ int err;
11844+ struct au_drinfo_store work;
11845+ struct au_drinfo_rev *rev = _rev;
11846+ struct super_block *sb;
11847+ struct au_branch *br;
11848+ struct inode *h_inode;
11849+ struct au_dr_br *dr;
11850+ struct au_dr_hino *ent;
11851+
11852+ err = au_drinfo_store_work_init(&work, btgt);
11853+ if (unlikely(err))
11854+ goto out;
11855+
11856+ sb = src->d_sb;
11857+ br = au_sbr(sb, btgt);
11858+ work.h_ppath.dentry = au_h_dptr(src, btgt);
11859+ work.h_ppath.mnt = au_br_mnt(br);
11860+ au_drinfo_store_rev(rev, &work);
11861+ au_drinfo_store_work_fin(&work);
11862+ if (rev->already)
11863+ goto out;
11864+
11865+ dr = &br->br_dirren;
11866+ h_inode = d_inode(work.h_ppath.dentry);
11867+ ent = au_dr_hino_find(dr, h_inode->i_ino);
11868+ BUG_ON(!ent);
11869+ au_dr_hino_del(dr, ent);
11870+ au_kfree_rcu(ent);
11871+
11872+out:
11873+ au_kfree_try_rcu(rev);
11874+ if (unlikely(err))
11875+ pr_err("failed to remove dirren info\n");
11876+}
11877+
11878+/* ---------------------------------------------------------------------- */
11879+
11880+static struct au_drinfo *au_drinfo_do_load(struct path *h_ppath,
11881+ char *whname, int whnamelen,
11882+ struct dentry **info_dentry)
11883+{
11884+ struct au_drinfo *drinfo;
11885+ struct file *f;
11886+ struct inode *h_dir;
11887+ struct path infopath;
11888+ int unlocked;
11889+
11890+ AuDbg("%pd/%.*s\n", h_ppath->dentry, whnamelen, whname);
11891+
11892+ *info_dentry = NULL;
11893+ drinfo = NULL;
11894+ unlocked = 0;
11895+ h_dir = d_inode(h_ppath->dentry);
11896+ inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
11897+ infopath.dentry = vfsub_lookup_one_len(whname, h_ppath, whnamelen);
11898+ if (IS_ERR(infopath.dentry)) {
11899+ drinfo = (void *)infopath.dentry;
11900+ goto out;
11901+ }
11902+
11903+ if (d_is_negative(infopath.dentry))
11904+ goto out_dput; /* success */
11905+
11906+ infopath.mnt = h_ppath->mnt;
11907+ f = vfsub_dentry_open(&infopath, O_RDONLY);
11908+ inode_unlock_shared(h_dir);
11909+ unlocked = 1;
11910+ if (IS_ERR(f)) {
11911+ drinfo = (void *)f;
11912+ goto out_dput;
11913+ }
11914+
11915+ drinfo = au_drinfo_read_k(f, /*h_ino*/0);
11916+ if (IS_ERR_OR_NULL(drinfo))
11917+ goto out_fput;
11918+
11919+ AuDbg("oldname %.*s\n", drinfo->oldnamelen, drinfo->oldname);
11920+ *info_dentry = dget(infopath.dentry); /* keep it alive */
11921+
11922+out_fput:
11923+ fput(f);
11924+out_dput:
11925+ dput(infopath.dentry);
11926+out:
11927+ if (!unlocked)
11928+ inode_unlock_shared(h_dir);
11929+ AuTraceErrPtr(drinfo);
11930+ return drinfo;
11931+}
11932+
11933+struct au_drinfo_do_load_args {
11934+ struct au_drinfo **drinfop;
11935+ struct path *h_ppath;
11936+ char *whname;
11937+ int whnamelen;
11938+ struct dentry **info_dentry;
11939+};
11940+
11941+static void au_call_drinfo_do_load(void *args)
11942+{
11943+ struct au_drinfo_do_load_args *a = args;
11944+
11945+ *a->drinfop = au_drinfo_do_load(a->h_ppath, a->whname, a->whnamelen,
11946+ a->info_dentry);
11947+}
11948+
11949+struct au_drinfo_load {
11950+ struct path h_ppath;
11951+ struct qstr *qname;
11952+ unsigned char no_sio;
11953+
11954+ aufs_bindex_t ninfo;
11955+ struct au_drinfo **drinfo;
11956+};
11957+
11958+static int au_drinfo_load(struct au_drinfo_load *w, aufs_bindex_t bindex,
11959+ struct au_branch *br)
11960+{
11961+ int err, wkq_err, whnamelen, e;
11962+ char whname[sizeof(AUFS_WH_DR_INFO_PFX) + AUFS_DIRREN_ENV_VAL_SZ]
11963+ = AUFS_WH_DR_INFO_PFX;
11964+ struct au_drinfo *drinfo;
11965+ struct qstr oldname;
11966+ struct inode *h_dir, *delegated;
11967+ struct dentry *info_dentry;
11968+ struct path infopath;
11969+
11970+ whnamelen = sizeof(AUFS_WH_DR_INFO_PFX) - 1;
11971+ whnamelen += au_drinfo_name(br, whname + whnamelen,
11972+ sizeof(whname) - whnamelen);
11973+ if (w->no_sio)
11974+ drinfo = au_drinfo_do_load(&w->h_ppath, whname, whnamelen,
11975+ &info_dentry);
11976+ else {
11977+ struct au_drinfo_do_load_args args = {
11978+ .drinfop = &drinfo,
11979+ .h_ppath = &w->h_ppath,
11980+ .whname = whname,
11981+ .whnamelen = whnamelen,
11982+ .info_dentry = &info_dentry
11983+ };
11984+ wkq_err = au_wkq_wait(au_call_drinfo_do_load, &args);
11985+ if (unlikely(wkq_err))
11986+ drinfo = ERR_PTR(wkq_err);
11987+ }
11988+ err = PTR_ERR(drinfo);
11989+ if (IS_ERR_OR_NULL(drinfo))
11990+ goto out;
11991+
11992+ err = 0;
11993+ oldname.len = drinfo->oldnamelen;
11994+ oldname.name = drinfo->oldname;
11995+ if (au_qstreq(w->qname, &oldname)) {
11996+ /* the name is renamed back */
11997+ au_kfree_rcu(drinfo);
11998+ drinfo = NULL;
11999+
12000+ infopath.dentry = info_dentry;
12001+ infopath.mnt = w->h_ppath.mnt;
12002+ h_dir = d_inode(w->h_ppath.dentry);
12003+ delegated = NULL;
12004+ inode_lock_nested(h_dir, AuLsc_I_PARENT);
12005+ e = vfsub_unlink(h_dir, &infopath, &delegated, !w->no_sio);
12006+ inode_unlock(h_dir);
12007+ if (unlikely(e))
12008+ AuIOErr("ignored %d, %pd2\n", e, &infopath.dentry);
12009+ if (unlikely(e == -EWOULDBLOCK))
12010+ iput(delegated);
12011+ }
12012+ au_kfree_rcu(w->drinfo[bindex]);
12013+ w->drinfo[bindex] = drinfo;
12014+ dput(info_dentry);
12015+
12016+out:
12017+ AuTraceErr(err);
12018+ return err;
12019+}
12020+
12021+/* ---------------------------------------------------------------------- */
12022+
12023+static void au_dr_lkup_free(struct au_drinfo **drinfo, int n)
12024+{
12025+ struct au_drinfo **p = drinfo;
12026+
12027+ while (n-- > 0)
12028+ au_kfree_rcu(*drinfo++);
12029+ au_kfree_try_rcu(p);
12030+}
12031+
12032+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
12033+ aufs_bindex_t btgt)
12034+{
12035+ int err, ninfo;
12036+ struct au_drinfo_load w;
12037+ aufs_bindex_t bindex, bbot;
12038+ struct au_branch *br;
12039+ struct inode *h_dir;
12040+ struct au_dr_hino *ent;
12041+ struct super_block *sb;
12042+
12043+ AuDbg("%.*s, name %.*s, whname %.*s, b%d\n",
12044+ AuLNPair(&dentry->d_name), AuLNPair(&lkup->dirren.dr_name),
12045+ AuLNPair(&lkup->whname), btgt);
12046+
12047+ sb = dentry->d_sb;
12048+ bbot = au_sbbot(sb);
12049+ w.ninfo = bbot + 1;
12050+ if (!lkup->dirren.drinfo) {
12051+ lkup->dirren.drinfo = kcalloc(w.ninfo,
12052+ sizeof(*lkup->dirren.drinfo),
12053+ GFP_NOFS);
12054+ if (unlikely(!lkup->dirren.drinfo)) {
12055+ err = -ENOMEM;
12056+ goto out;
12057+ }
12058+ lkup->dirren.ninfo = w.ninfo;
12059+ }
12060+ w.drinfo = lkup->dirren.drinfo;
12061+ w.no_sio = !!uid_eq(current_fsuid(), GLOBAL_ROOT_UID);
12062+ w.h_ppath.dentry = au_h_dptr(dentry, btgt);
12063+ AuDebugOn(!w.h_ppath.dentry);
12064+ w.h_ppath.mnt = au_sbr_mnt(sb, btgt);
12065+ w.qname = &dentry->d_name;
12066+
12067+ ninfo = 0;
12068+ for (bindex = btgt + 1; bindex <= bbot; bindex++) {
12069+ br = au_sbr(sb, bindex);
12070+ err = au_drinfo_load(&w, bindex, br);
12071+ if (unlikely(err))
12072+ goto out_free;
12073+ if (w.drinfo[bindex])
12074+ ninfo++;
12075+ }
12076+ if (!ninfo) {
12077+ br = au_sbr(sb, btgt);
12078+ h_dir = d_inode(w.h_ppath.dentry);
12079+ ent = au_dr_hino_find(&br->br_dirren, h_dir->i_ino);
12080+ AuDebugOn(!ent);
12081+ au_dr_hino_del(&br->br_dirren, ent);
12082+ au_kfree_rcu(ent);
12083+ }
12084+ goto out; /* success */
12085+
12086+out_free:
12087+ au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
12088+ lkup->dirren.ninfo = 0;
12089+ lkup->dirren.drinfo = NULL;
12090+out:
12091+ AuTraceErr(err);
12092+ return err;
12093+}
12094+
12095+void au_dr_lkup_fin(struct au_do_lookup_args *lkup)
12096+{
12097+ au_dr_lkup_free(lkup->dirren.drinfo, lkup->dirren.ninfo);
12098+}
12099+
12100+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt)
12101+{
12102+ int err;
12103+ struct au_drinfo *drinfo;
12104+
12105+ err = 0;
12106+ if (!lkup->dirren.drinfo)
12107+ goto out;
12108+ AuDebugOn(lkup->dirren.ninfo <= btgt);
12109+ drinfo = lkup->dirren.drinfo[btgt];
12110+ if (!drinfo)
12111+ goto out;
12112+
12113+ au_kfree_try_rcu(lkup->whname.name);
12114+ lkup->whname.name = NULL;
12115+ lkup->dirren.dr_name.len = drinfo->oldnamelen;
12116+ lkup->dirren.dr_name.name = drinfo->oldname;
12117+ lkup->name = &lkup->dirren.dr_name;
12118+ err = au_wh_name_alloc(&lkup->whname, lkup->name);
12119+ if (!err)
12120+ AuDbg("name %.*s, whname %.*s, b%d\n",
12121+ AuLNPair(lkup->name), AuLNPair(&lkup->whname),
12122+ btgt);
12123+
12124+out:
12125+ AuTraceErr(err);
12126+ return err;
12127+}
12128+
12129+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
12130+ ino_t h_ino)
12131+{
12132+ int match;
12133+ struct au_drinfo *drinfo;
12134+
12135+ match = 1;
12136+ if (!lkup->dirren.drinfo)
12137+ goto out;
12138+ AuDebugOn(lkup->dirren.ninfo <= bindex);
12139+ drinfo = lkup->dirren.drinfo[bindex];
12140+ if (!drinfo)
12141+ goto out;
12142+
12143+ match = (drinfo->ino == h_ino);
12144+ AuDbg("match %d\n", match);
12145+
12146+out:
12147+ return match;
12148+}
12149+
12150+/* ---------------------------------------------------------------------- */
12151+
12152+int au_dr_opt_set(struct super_block *sb)
12153+{
12154+ int err;
12155+ aufs_bindex_t bindex, bbot;
12156+ struct au_branch *br;
12157+
12158+ err = 0;
12159+ bbot = au_sbbot(sb);
12160+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
12161+ br = au_sbr(sb, bindex);
12162+ err = au_dr_hino(sb, bindex, /*br*/NULL, &br->br_path);
12163+ }
12164+
12165+ return err;
12166+}
12167+
12168+int au_dr_opt_flush(struct super_block *sb)
12169+{
12170+ int err;
12171+ aufs_bindex_t bindex, bbot;
12172+ struct au_branch *br;
12173+
12174+ err = 0;
12175+ bbot = au_sbbot(sb);
12176+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
12177+ br = au_sbr(sb, bindex);
12178+ if (au_br_writable(br->br_perm))
12179+ err = au_dr_hino(sb, bindex, /*br*/NULL, /*path*/NULL);
12180+ }
12181+
12182+ return err;
12183+}
12184+
12185+int au_dr_opt_clr(struct super_block *sb, int no_flush)
12186+{
12187+ int err;
12188+ aufs_bindex_t bindex, bbot;
12189+ struct au_branch *br;
12190+
12191+ err = 0;
12192+ if (!no_flush) {
12193+ err = au_dr_opt_flush(sb);
12194+ if (unlikely(err))
12195+ goto out;
12196+ }
12197+
12198+ bbot = au_sbbot(sb);
12199+ for (bindex = 0; bindex <= bbot; bindex++) {
12200+ br = au_sbr(sb, bindex);
12201+ au_dr_hino_free(&br->br_dirren);
12202+ }
12203+
12204+out:
12205+ return err;
12206+}
12207diff -urN /usr/share/empty/fs/aufs/dirren.h linux/fs/aufs/dirren.h
12208--- /usr/share/empty/fs/aufs/dirren.h 1970-01-01 01:00:00.000000000 +0100
12209+++ linux/fs/aufs/dirren.h 2022-11-05 23:02:18.962555950 +0100
12210@@ -0,0 +1,140 @@
12211+/* SPDX-License-Identifier: GPL-2.0 */
12212+/*
12213+ * Copyright (C) 2017-2022 Junjiro R. Okajima
12214+ *
12215+ * This program is free software; you can redistribute it and/or modify
12216+ * it under the terms of the GNU General Public License as published by
12217+ * the Free Software Foundation; either version 2 of the License, or
12218+ * (at your option) any later version.
12219+ *
12220+ * This program is distributed in the hope that it will be useful,
12221+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12222+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12223+ * GNU General Public License for more details.
12224+ *
12225+ * You should have received a copy of the GNU General Public License
12226+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
12227+ */
12228+
12229+/*
12230+ * renamed dir info
12231+ */
12232+
12233+#ifndef __AUFS_DIRREN_H__
12234+#define __AUFS_DIRREN_H__
12235+
12236+#ifdef __KERNEL__
12237+
12238+#include <linux/dcache.h>
12239+#include <linux/statfs.h>
12240+#include <linux/uuid.h>
12241+#include "hbl.h"
12242+
12243+#define AuDirren_NHASH 100
12244+
12245+#ifdef CONFIG_AUFS_DIRREN
12246+enum au_brid_type {
12247+ AuBrid_Unset,
12248+ AuBrid_UUID,
12249+ AuBrid_FSID,
12250+ AuBrid_DEV
12251+};
12252+
12253+struct au_dr_brid {
12254+ enum au_brid_type type;
12255+ union {
12256+ uuid_t uuid; /* unimplemented yet */
12257+ fsid_t fsid;
12258+ dev_t dev;
12259+ };
12260+};
12261+
12262+/* 20 is the max digits length of ulong 64 */
12263+/* brid-type "_" uuid "_" inum */
12264+#define AUFS_DIRREN_FNAME_SZ (1 + 1 + UUID_STRING_LEN + 20)
12265+#define AUFS_DIRREN_ENV_VAL_SZ (AUFS_DIRREN_FNAME_SZ + 1 + 20)
12266+
12267+struct au_dr_hino {
12268+ struct hlist_bl_node dr_hnode;
12269+ ino_t dr_h_ino;
12270+};
12271+
12272+struct au_dr_br {
12273+ struct hlist_bl_head dr_h_ino[AuDirren_NHASH];
12274+ struct au_dr_brid dr_brid;
12275+};
12276+
12277+struct au_dr_lookup {
12278+ /* dr_name is pointed by struct au_do_lookup_args.name */
12279+ struct qstr dr_name; /* subset of dr_info */
12280+ aufs_bindex_t ninfo;
12281+ struct au_drinfo **drinfo;
12282+};
12283+#else
12284+struct au_dr_hino;
12285+/* empty */
12286+struct au_dr_br { };
12287+struct au_dr_lookup { };
12288+#endif
12289+
12290+/* ---------------------------------------------------------------------- */
12291+
12292+struct au_branch;
12293+struct au_do_lookup_args;
12294+struct au_hinode;
12295+#ifdef CONFIG_AUFS_DIRREN
12296+int au_dr_hino_test_add(struct au_dr_br *dr, ino_t h_ino,
12297+ struct au_dr_hino *add_ent);
12298+void au_dr_hino_free(struct au_dr_br *dr);
12299+int au_dr_br_init(struct super_block *sb, struct au_branch *br,
12300+ const struct path *path);
12301+int au_dr_br_fin(struct super_block *sb, struct au_branch *br);
12302+int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
12303+ struct qstr *dst_name, void *_rev);
12304+void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *rev);
12305+void au_dr_rename_rev(struct dentry *src, aufs_bindex_t bindex, void *rev);
12306+int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
12307+ aufs_bindex_t bindex);
12308+int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
12309+int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
12310+ ino_t h_ino);
12311+void au_dr_lkup_fin(struct au_do_lookup_args *lkup);
12312+int au_dr_opt_set(struct super_block *sb);
12313+int au_dr_opt_flush(struct super_block *sb);
12314+int au_dr_opt_clr(struct super_block *sb, int no_flush);
12315+#else
12316+AuStubInt0(au_dr_hino_test_add, struct au_dr_br *dr, ino_t h_ino,
12317+ struct au_dr_hino *add_ent);
12318+AuStubVoid(au_dr_hino_free, struct au_dr_br *dr);
12319+AuStubInt0(au_dr_br_init, struct super_block *sb, struct au_branch *br,
12320+ const struct path *path);
12321+AuStubInt0(au_dr_br_fin, struct super_block *sb, struct au_branch *br);
12322+AuStubInt0(au_dr_rename, struct dentry *src, aufs_bindex_t bindex,
12323+ struct qstr *dst_name, void *_rev);
12324+AuStubVoid(au_dr_rename_fin, struct dentry *src, aufs_bindex_t btgt, void *rev);
12325+AuStubVoid(au_dr_rename_rev, struct dentry *src, aufs_bindex_t bindex,
12326+ void *rev);
12327+AuStubInt0(au_dr_lkup, struct au_do_lookup_args *lkup, struct dentry *dentry,
12328+ aufs_bindex_t bindex);
12329+AuStubInt0(au_dr_lkup_name, struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
12330+AuStubInt0(au_dr_lkup_h_ino, struct au_do_lookup_args *lkup,
12331+ aufs_bindex_t bindex, ino_t h_ino);
12332+AuStubVoid(au_dr_lkup_fin, struct au_do_lookup_args *lkup);
12333+AuStubInt0(au_dr_opt_set, struct super_block *sb);
12334+AuStubInt0(au_dr_opt_flush, struct super_block *sb);
12335+AuStubInt0(au_dr_opt_clr, struct super_block *sb, int no_flush);
12336+#endif
12337+
12338+/* ---------------------------------------------------------------------- */
12339+
12340+#ifdef CONFIG_AUFS_DIRREN
12341+static inline int au_dr_ihash(ino_t h_ino)
12342+{
12343+ return h_ino % AuDirren_NHASH;
12344+}
12345+#else
12346+AuStubInt0(au_dr_ihash, ino_t h_ino);
12347+#endif
12348+
12349+#endif /* __KERNEL__ */
12350+#endif /* __AUFS_DIRREN_H__ */
12351diff -urN /usr/share/empty/fs/aufs/dynop.c linux/fs/aufs/dynop.c
12352--- /usr/share/empty/fs/aufs/dynop.c 1970-01-01 01:00:00.000000000 +0100
12353+++ linux/fs/aufs/dynop.c 2022-11-05 23:02:18.962555950 +0100
12354@@ -0,0 +1,366 @@
12355+// SPDX-License-Identifier: GPL-2.0
12356+/*
12357+ * Copyright (C) 2010-2022 Junjiro R. Okajima
12358+ *
12359+ * This program is free software; you can redistribute it and/or modify
12360+ * it under the terms of the GNU General Public License as published by
12361+ * the Free Software Foundation; either version 2 of the License, or
12362+ * (at your option) any later version.
12363+ *
12364+ * This program is distributed in the hope that it will be useful,
12365+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12366+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12367+ * GNU General Public License for more details.
12368+ *
12369+ * You should have received a copy of the GNU General Public License
12370+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
12371+ */
12372+
12373+/*
12374+ * dynamically customizable operations for regular files
12375+ */
12376+
12377+#include "aufs.h"
12378+
12379+#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
12380+
12381+/*
12382+ * How large will these lists be?
12383+ * Usually just a few elements, 20-30 at most for each, I guess.
12384+ */
12385+static struct hlist_bl_head dynop[AuDyLast];
12386+
12387+static struct au_dykey *dy_gfind_get(struct hlist_bl_head *hbl,
12388+ const void *h_op)
12389+{
12390+ struct au_dykey *key, *tmp;
12391+ struct hlist_bl_node *pos;
12392+
12393+ key = NULL;
12394+ hlist_bl_lock(hbl);
12395+ hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
12396+ if (tmp->dk_op.dy_hop == h_op) {
12397+ if (kref_get_unless_zero(&tmp->dk_kref))
12398+ key = tmp;
12399+ break;
12400+ }
12401+ hlist_bl_unlock(hbl);
12402+
12403+ return key;
12404+}
12405+
12406+static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
12407+{
12408+ struct au_dykey **k, *found;
12409+ const void *h_op = key->dk_op.dy_hop;
12410+ int i;
12411+
12412+ found = NULL;
12413+ k = br->br_dykey;
12414+ for (i = 0; i < AuBrDynOp; i++)
12415+ if (k[i]) {
12416+ if (k[i]->dk_op.dy_hop == h_op) {
12417+ found = k[i];
12418+ break;
12419+ }
12420+ } else
12421+ break;
12422+ if (!found) {
12423+ spin_lock(&br->br_dykey_lock);
12424+ for (; i < AuBrDynOp; i++)
12425+ if (k[i]) {
12426+ if (k[i]->dk_op.dy_hop == h_op) {
12427+ found = k[i];
12428+ break;
12429+ }
12430+ } else {
12431+ k[i] = key;
12432+ break;
12433+ }
12434+ spin_unlock(&br->br_dykey_lock);
12435+ BUG_ON(i == AuBrDynOp); /* expand the array */
12436+ }
12437+
12438+ return found;
12439+}
12440+
12441+/* kref_get() if @key is already added */
12442+static struct au_dykey *dy_gadd(struct hlist_bl_head *hbl, struct au_dykey *key)
12443+{
12444+ struct au_dykey *tmp, *found;
12445+ struct hlist_bl_node *pos;
12446+ const void *h_op = key->dk_op.dy_hop;
12447+
12448+ found = NULL;
12449+ hlist_bl_lock(hbl);
12450+ hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
12451+ if (tmp->dk_op.dy_hop == h_op) {
12452+ if (kref_get_unless_zero(&tmp->dk_kref))
12453+ found = tmp;
12454+ break;
12455+ }
12456+ if (!found)
12457+ hlist_bl_add_head(&key->dk_hnode, hbl);
12458+ hlist_bl_unlock(hbl);
12459+
12460+ if (!found)
12461+ DyPrSym(key);
12462+ return found;
12463+}
12464+
12465+static void dy_free_rcu(struct rcu_head *rcu)
12466+{
12467+ struct au_dykey *key;
12468+
12469+ key = container_of(rcu, struct au_dykey, dk_rcu);
12470+ DyPrSym(key);
12471+ kfree(key);
12472+}
12473+
12474+static void dy_free(struct kref *kref)
12475+{
12476+ struct au_dykey *key;
12477+ struct hlist_bl_head *hbl;
12478+
12479+ key = container_of(kref, struct au_dykey, dk_kref);
12480+ hbl = dynop + key->dk_op.dy_type;
12481+ au_hbl_del(&key->dk_hnode, hbl);
12482+ call_rcu(&key->dk_rcu, dy_free_rcu);
12483+}
12484+
12485+void au_dy_put(struct au_dykey *key)
12486+{
12487+ kref_put(&key->dk_kref, dy_free);
12488+}
12489+
12490+/* ---------------------------------------------------------------------- */
12491+
12492+#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
12493+
12494+#ifdef CONFIG_AUFS_DEBUG
12495+#define DyDbgDeclare(cnt) unsigned int cnt = 0
12496+#define DyDbgInc(cnt) do { cnt++; } while (0)
12497+#else
12498+#define DyDbgDeclare(cnt) do {} while (0)
12499+#define DyDbgInc(cnt) do {} while (0)
12500+#endif
12501+
12502+#define DySet(func, dst, src, h_op, h_sb) do { \
12503+ DyDbgInc(cnt); \
12504+ if (h_op->func) { \
12505+ if (src.func) \
12506+ dst.func = src.func; \
12507+ else \
12508+ AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
12509+ } \
12510+} while (0)
12511+
12512+#define DySetForce(func, dst, src) do { \
12513+ AuDebugOn(!src.func); \
12514+ DyDbgInc(cnt); \
12515+ dst.func = src.func; \
12516+} while (0)
12517+
12518+#define DySetAop(func) \
12519+ DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
12520+#define DySetAopForce(func) \
12521+ DySetForce(func, dyaop->da_op, aufs_aop)
12522+
12523+static void dy_aop(struct au_dykey *key, const void *h_op,
12524+ struct super_block *h_sb __maybe_unused)
12525+{
12526+ struct au_dyaop *dyaop = (void *)key;
12527+ const struct address_space_operations *h_aop = h_op;
12528+ DyDbgDeclare(cnt);
12529+
12530+ AuDbg("%s\n", au_sbtype(h_sb));
12531+
12532+ DySetAop(writepage);
12533+ DySetAopForce(read_folio); /* force */
12534+ DySetAop(writepages);
12535+ DySetAop(dirty_folio);
12536+ DySetAop(invalidate_folio);
12537+ DySetAop(readahead);
12538+ DySetAop(write_begin);
12539+ DySetAop(write_end);
12540+ DySetAop(bmap);
12541+ DySetAop(release_folio);
12542+ DySetAop(free_folio);
12543+ /* this one will be changed according to an aufs mount option */
12544+ DySetAop(direct_IO);
12545+ DySetAop(migrate_folio);
12546+ DySetAop(launder_folio);
12547+ DySetAop(is_partially_uptodate);
12548+ DySetAop(is_dirty_writeback);
12549+ DySetAop(error_remove_page);
12550+ DySetAop(swap_activate);
12551+ DySetAop(swap_deactivate);
12552+ DySetAop(swap_rw);
12553+
12554+ DyDbgSize(cnt, *h_aop);
12555+}
12556+
12557+/* ---------------------------------------------------------------------- */
12558+
12559+static void dy_bug(struct kref *kref)
12560+{
12561+ BUG();
12562+}
12563+
12564+static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
12565+{
12566+ struct au_dykey *key, *old;
12567+ struct hlist_bl_head *hbl;
12568+ struct op {
12569+ unsigned int sz;
12570+ void (*set)(struct au_dykey *key, const void *h_op,
12571+ struct super_block *h_sb __maybe_unused);
12572+ };
12573+ static const struct op a[] = {
12574+ [AuDy_AOP] = {
12575+ .sz = sizeof(struct au_dyaop),
12576+ .set = dy_aop
12577+ }
12578+ };
12579+ const struct op *p;
12580+
12581+ hbl = dynop + op->dy_type;
12582+ key = dy_gfind_get(hbl, op->dy_hop);
12583+ if (key)
12584+ goto out_add; /* success */
12585+
12586+ p = a + op->dy_type;
12587+ key = kzalloc(p->sz, GFP_NOFS);
12588+ if (unlikely(!key)) {
12589+ key = ERR_PTR(-ENOMEM);
12590+ goto out;
12591+ }
12592+
12593+ key->dk_op.dy_hop = op->dy_hop;
12594+ kref_init(&key->dk_kref);
12595+ p->set(key, op->dy_hop, au_br_sb(br));
12596+ old = dy_gadd(hbl, key);
12597+ if (old) {
12598+ au_kfree_rcu(key);
12599+ key = old;
12600+ }
12601+
12602+out_add:
12603+ old = dy_bradd(br, key);
12604+ if (old)
12605+ /* its ref-count should never be zero here */
12606+ kref_put(&key->dk_kref, dy_bug);
12607+out:
12608+ return key;
12609+}
12610+
12611+/* ---------------------------------------------------------------------- */
12612+/*
12613+ * Aufs prohibits O_DIRECT by default even if the branch supports it.
12614+ * This behaviour is necessary to return an error from open(O_DIRECT) instead
12615+ * of the succeeding I/O. The dio mount option enables O_DIRECT and makes
12616+ * open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
12617+ * See the aufs manual in detail.
12618+ */
12619+static void dy_adx(struct au_dyaop *dyaop, int do_dx)
12620+{
12621+ if (!do_dx)
12622+ dyaop->da_op.direct_IO = NULL;
12623+ else
12624+ dyaop->da_op.direct_IO = aufs_aop.direct_IO;
12625+}
12626+
12627+static struct au_dyaop *dy_aget(struct au_branch *br,
12628+ const struct address_space_operations *h_aop,
12629+ int do_dx)
12630+{
12631+ struct au_dyaop *dyaop;
12632+ struct au_dynop op;
12633+
12634+ op.dy_type = AuDy_AOP;
12635+ op.dy_haop = h_aop;
12636+ dyaop = (void *)dy_get(&op, br);
12637+ if (IS_ERR(dyaop))
12638+ goto out;
12639+ dy_adx(dyaop, do_dx);
12640+
12641+out:
12642+ return dyaop;
12643+}
12644+
12645+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
12646+ struct inode *h_inode)
12647+{
12648+ int err, do_dx;
12649+ struct super_block *sb;
12650+ struct au_branch *br;
12651+ struct au_dyaop *dyaop;
12652+
12653+ AuDebugOn(!S_ISREG(h_inode->i_mode));
12654+ IiMustWriteLock(inode);
12655+
12656+ sb = inode->i_sb;
12657+ br = au_sbr(sb, bindex);
12658+ do_dx = !!au_opt_test(au_mntflags(sb), DIO);
12659+ dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
12660+ err = PTR_ERR(dyaop);
12661+ if (IS_ERR(dyaop))
12662+ /* unnecessary to call dy_fput() */
12663+ goto out;
12664+
12665+ err = 0;
12666+ inode->i_mapping->a_ops = &dyaop->da_op;
12667+
12668+out:
12669+ return err;
12670+}
12671+
12672+/*
12673+ * Is it safe to replace a_ops during the inode/file is in operation?
12674+ * Yes, I hope so.
12675+ */
12676+int au_dy_irefresh(struct inode *inode)
12677+{
12678+ int err;
12679+ aufs_bindex_t btop;
12680+ struct inode *h_inode;
12681+
12682+ err = 0;
12683+ if (S_ISREG(inode->i_mode)) {
12684+ btop = au_ibtop(inode);
12685+ h_inode = au_h_iptr(inode, btop);
12686+ err = au_dy_iaop(inode, btop, h_inode);
12687+ }
12688+ return err;
12689+}
12690+
12691+void au_dy_arefresh(int do_dx)
12692+{
12693+ struct hlist_bl_head *hbl;
12694+ struct hlist_bl_node *pos;
12695+ struct au_dykey *key;
12696+
12697+ hbl = dynop + AuDy_AOP;
12698+ hlist_bl_lock(hbl);
12699+ hlist_bl_for_each_entry(key, pos, hbl, dk_hnode)
12700+ dy_adx((void *)key, do_dx);
12701+ hlist_bl_unlock(hbl);
12702+}
12703+
12704+/* ---------------------------------------------------------------------- */
12705+
12706+void __init au_dy_init(void)
12707+{
12708+ int i;
12709+
12710+ for (i = 0; i < AuDyLast; i++)
12711+ INIT_HLIST_BL_HEAD(dynop + i);
12712+}
12713+
12714+void au_dy_fin(void)
12715+{
12716+ int i;
12717+
12718+ for (i = 0; i < AuDyLast; i++)
12719+ WARN_ON(!hlist_bl_empty(dynop + i));
12720+}
12721diff -urN /usr/share/empty/fs/aufs/dynop.h linux/fs/aufs/dynop.h
12722--- /usr/share/empty/fs/aufs/dynop.h 1970-01-01 01:00:00.000000000 +0100
12723+++ linux/fs/aufs/dynop.h 2022-11-05 23:02:18.962555950 +0100
12724@@ -0,0 +1,77 @@
12725+/* SPDX-License-Identifier: GPL-2.0 */
12726+/*
12727+ * Copyright (C) 2010-2022 Junjiro R. Okajima
12728+ *
12729+ * This program is free software; you can redistribute it and/or modify
12730+ * it under the terms of the GNU General Public License as published by
12731+ * the Free Software Foundation; either version 2 of the License, or
12732+ * (at your option) any later version.
12733+ *
12734+ * This program is distributed in the hope that it will be useful,
12735+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12736+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12737+ * GNU General Public License for more details.
12738+ *
12739+ * You should have received a copy of the GNU General Public License
12740+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
12741+ */
12742+
12743+/*
12744+ * dynamically customizable operations (for regular files only)
12745+ */
12746+
12747+#ifndef __AUFS_DYNOP_H__
12748+#define __AUFS_DYNOP_H__
12749+
12750+#ifdef __KERNEL__
12751+
12752+#include <linux/fs.h>
12753+#include <linux/kref.h>
12754+
12755+enum {AuDy_AOP, AuDyLast};
12756+
12757+struct au_dynop {
12758+ int dy_type;
12759+ union {
12760+ const void *dy_hop;
12761+ const struct address_space_operations *dy_haop;
12762+ };
12763+};
12764+
12765+struct au_dykey {
12766+ union {
12767+ struct hlist_bl_node dk_hnode;
12768+ struct rcu_head dk_rcu;
12769+ };
12770+ struct au_dynop dk_op;
12771+
12772+ /*
12773+ * during I am in the branch local array, kref is gotten. when the
12774+ * branch is removed, kref is put.
12775+ */
12776+ struct kref dk_kref;
12777+};
12778+
12779+/* stop unioning since their sizes are very different from each other */
12780+struct au_dyaop {
12781+ struct au_dykey da_key;
12782+ struct address_space_operations da_op; /* not const */
12783+};
12784+/* make sure that 'struct au_dykey *' can be any type */
12785+static_assert(!offsetof(struct au_dyaop, da_key));
12786+
12787+/* ---------------------------------------------------------------------- */
12788+
12789+/* dynop.c */
12790+struct au_branch;
12791+void au_dy_put(struct au_dykey *key);
12792+int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
12793+ struct inode *h_inode);
12794+int au_dy_irefresh(struct inode *inode);
12795+void au_dy_arefresh(int do_dio);
12796+
12797+void __init au_dy_init(void);
12798+void au_dy_fin(void);
12799+
12800+#endif /* __KERNEL__ */
12801+#endif /* __AUFS_DYNOP_H__ */
12802diff -urN /usr/share/empty/fs/aufs/export.c linux/fs/aufs/export.c
12803--- /usr/share/empty/fs/aufs/export.c 1970-01-01 01:00:00.000000000 +0100
12804+++ linux/fs/aufs/export.c 2022-12-17 09:21:34.796521861 +0100
12805@@ -0,0 +1,830 @@
12806+// SPDX-License-Identifier: GPL-2.0
12807+/*
12808+ * Copyright (C) 2005-2022 Junjiro R. Okajima
12809+ *
12810+ * This program is free software; you can redistribute it and/or modify
12811+ * it under the terms of the GNU General Public License as published by
12812+ * the Free Software Foundation; either version 2 of the License, or
12813+ * (at your option) any later version.
12814+ *
12815+ * This program is distributed in the hope that it will be useful,
12816+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
12817+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12818+ * GNU General Public License for more details.
12819+ *
12820+ * You should have received a copy of the GNU General Public License
12821+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
12822+ */
12823+
12824+/*
12825+ * export via nfs
12826+ */
12827+
12828+#include <linux/exportfs.h>
12829+#include <linux/fs_struct.h>
12830+#include <linux/nsproxy.h>
12831+#include <linux/random.h>
12832+#include <linux/writeback.h>
12833+#include "aufs.h"
12834+
12835+union conv {
12836+#ifdef CONFIG_AUFS_INO_T_64
12837+ __u32 a[2];
12838+#else
12839+ __u32 a[1];
12840+#endif
12841+ ino_t ino;
12842+};
12843+
12844+static ino_t decode_ino(__u32 *a)
12845+{
12846+ union conv u;
12847+
12848+ BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
12849+ u.a[0] = a[0];
12850+#ifdef CONFIG_AUFS_INO_T_64
12851+ u.a[1] = a[1];
12852+#endif
12853+ return u.ino;
12854+}
12855+
12856+static void encode_ino(__u32 *a, ino_t ino)
12857+{
12858+ union conv u;
12859+
12860+ u.ino = ino;
12861+ a[0] = u.a[0];
12862+#ifdef CONFIG_AUFS_INO_T_64
12863+ a[1] = u.a[1];
12864+#endif
12865+}
12866+
12867+/* NFS file handle */
12868+enum {
12869+ Fh_br_id,
12870+ Fh_sigen,
12871+#ifdef CONFIG_AUFS_INO_T_64
12872+ /* support 64bit inode number */
12873+ Fh_ino1,
12874+ Fh_ino2,
12875+ Fh_dir_ino1,
12876+ Fh_dir_ino2,
12877+#else
12878+ Fh_ino1,
12879+ Fh_dir_ino1,
12880+#endif
12881+ Fh_igen,
12882+ Fh_h_type,
12883+ Fh_tail,
12884+
12885+ Fh_ino = Fh_ino1,
12886+ Fh_dir_ino = Fh_dir_ino1
12887+};
12888+
12889+static int au_test_anon(struct dentry *dentry)
12890+{
12891+ /* note: read d_flags without d_lock */
12892+ return !!(dentry->d_flags & DCACHE_DISCONNECTED);
12893+}
12894+
12895+int au_test_nfsd(void)
12896+{
12897+ int ret;
12898+ struct task_struct *tsk = current;
12899+ char comm[sizeof(tsk->comm)];
12900+
12901+ ret = 0;
12902+ if (tsk->flags & PF_KTHREAD) {
12903+ get_task_comm(comm, tsk);
12904+ ret = !strcmp(comm, "nfsd");
12905+ }
12906+
12907+ return ret;
12908+}
12909+
12910+/* ---------------------------------------------------------------------- */
12911+/* inode generation external table */
12912+
12913+void au_xigen_inc(struct inode *inode)
12914+{
12915+ loff_t pos;
12916+ ssize_t sz;
12917+ __u32 igen;
12918+ struct super_block *sb;
12919+ struct au_sbinfo *sbinfo;
12920+
12921+ sb = inode->i_sb;
12922+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
12923+
12924+ sbinfo = au_sbi(sb);
12925+ pos = inode->i_ino;
12926+ pos *= sizeof(igen);
12927+ igen = inode->i_generation + 1;
12928+ sz = xino_fwrite(sbinfo->si_xigen, &igen, sizeof(igen), &pos);
12929+ if (sz == sizeof(igen))
12930+ return; /* success */
12931+
12932+ if (unlikely(sz >= 0))
12933+ AuIOErr("xigen error (%zd)\n", sz);
12934+}
12935+
12936+int au_xigen_new(struct inode *inode)
12937+{
12938+ int err;
12939+ loff_t pos;
12940+ ssize_t sz;
12941+ struct super_block *sb;
12942+ struct au_sbinfo *sbinfo;
12943+ struct file *file;
12944+
12945+ err = 0;
12946+ /* todo: dirty, at mount time */
12947+ if (inode->i_ino == AUFS_ROOT_INO)
12948+ goto out;
12949+ sb = inode->i_sb;
12950+ SiMustAnyLock(sb);
12951+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
12952+ goto out;
12953+
12954+ err = -EFBIG;
12955+ pos = inode->i_ino;
12956+ if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
12957+ AuIOErr1("too large i%lld\n", pos);
12958+ goto out;
12959+ }
12960+ pos *= sizeof(inode->i_generation);
12961+
12962+ err = 0;
12963+ sbinfo = au_sbi(sb);
12964+ file = sbinfo->si_xigen;
12965+ BUG_ON(!file);
12966+
12967+ if (vfsub_f_size_read(file)
12968+ < pos + sizeof(inode->i_generation)) {
12969+ inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
12970+ sz = xino_fwrite(file, &inode->i_generation,
12971+ sizeof(inode->i_generation), &pos);
12972+ } else
12973+ sz = xino_fread(file, &inode->i_generation,
12974+ sizeof(inode->i_generation), &pos);
12975+ if (sz == sizeof(inode->i_generation))
12976+ goto out; /* success */
12977+
12978+ err = sz;
12979+ if (unlikely(sz >= 0)) {
12980+ err = -EIO;
12981+ AuIOErr("xigen error (%zd)\n", sz);
12982+ }
12983+
12984+out:
12985+ return err;
12986+}
12987+
12988+int au_xigen_set(struct super_block *sb, struct path *path)
12989+{
12990+ int err;
12991+ struct au_sbinfo *sbinfo;
12992+ struct file *file;
12993+
12994+ SiMustWriteLock(sb);
12995+
12996+ sbinfo = au_sbi(sb);
12997+ file = au_xino_create2(sb, path, sbinfo->si_xigen);
12998+ err = PTR_ERR(file);
12999+ if (IS_ERR(file))
13000+ goto out;
13001+ err = 0;
13002+ if (sbinfo->si_xigen)
13003+ fput(sbinfo->si_xigen);
13004+ sbinfo->si_xigen = file;
13005+
13006+out:
13007+ AuTraceErr(err);
13008+ return err;
13009+}
13010+
13011+void au_xigen_clr(struct super_block *sb)
13012+{
13013+ struct au_sbinfo *sbinfo;
13014+
13015+ SiMustWriteLock(sb);
13016+
13017+ sbinfo = au_sbi(sb);
13018+ if (sbinfo->si_xigen) {
13019+ fput(sbinfo->si_xigen);
13020+ sbinfo->si_xigen = NULL;
13021+ }
13022+}
13023+
13024+/* ---------------------------------------------------------------------- */
13025+
13026+static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
13027+ ino_t dir_ino)
13028+{
13029+ struct dentry *dentry, *d;
13030+ struct inode *inode;
13031+ unsigned int sigen;
13032+
13033+ dentry = NULL;
13034+ inode = ilookup(sb, ino);
13035+ if (!inode)
13036+ goto out;
13037+
13038+ dentry = ERR_PTR(-ESTALE);
13039+ sigen = au_sigen(sb);
13040+ if (unlikely(au_is_bad_inode(inode)
13041+ || IS_DEADDIR(inode)
13042+ || sigen != au_iigen(inode, NULL)))
13043+ goto out_iput;
13044+
13045+ dentry = NULL;
13046+ if (!dir_ino || S_ISDIR(inode->i_mode))
13047+ dentry = d_find_alias(inode);
13048+ else {
13049+ spin_lock(&inode->i_lock);
13050+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
13051+ spin_lock(&d->d_lock);
13052+ if (!au_test_anon(d)
13053+ && d_inode(d->d_parent)->i_ino == dir_ino) {
13054+ dentry = dget_dlock(d);
13055+ spin_unlock(&d->d_lock);
13056+ break;
13057+ }
13058+ spin_unlock(&d->d_lock);
13059+ }
13060+ spin_unlock(&inode->i_lock);
13061+ }
13062+ if (unlikely(dentry && au_digen_test(dentry, sigen))) {
13063+ /* need to refresh */
13064+ dput(dentry);
13065+ dentry = NULL;
13066+ }
13067+
13068+out_iput:
13069+ iput(inode);
13070+out:
13071+ AuTraceErrPtr(dentry);
13072+ return dentry;
13073+}
13074+
13075+/* ---------------------------------------------------------------------- */
13076+
13077+/* todo: dirty? */
13078+/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
13079+
13080+struct au_compare_mnt_args {
13081+ /* input */
13082+ struct super_block *sb;
13083+
13084+ /* output */
13085+ struct vfsmount *mnt;
13086+};
13087+
13088+static int au_compare_mnt(struct vfsmount *mnt, void *arg)
13089+{
13090+ struct au_compare_mnt_args *a = arg;
13091+
13092+ if (mnt->mnt_sb != a->sb)
13093+ return 0;
13094+ a->mnt = mntget(mnt);
13095+ return 1;
13096+}
13097+
13098+static struct vfsmount *au_mnt_get(struct super_block *sb)
13099+{
13100+ int err;
13101+ struct path root;
13102+ struct au_compare_mnt_args args = {
13103+ .sb = sb
13104+ };
13105+
13106+ get_fs_root(current->fs, &root);
13107+ rcu_read_lock();
13108+ err = iterate_mounts(au_compare_mnt, &args, root.mnt);
13109+ rcu_read_unlock();
13110+ path_put(&root);
13111+ AuDebugOn(!err);
13112+ AuDebugOn(!args.mnt);
13113+ return args.mnt;
13114+}
13115+
13116+struct au_nfsd_si_lock {
13117+ unsigned int sigen;
13118+ aufs_bindex_t bindex, br_id;
13119+ unsigned char force_lock;
13120+};
13121+
13122+static int si_nfsd_read_lock(struct super_block *sb,
13123+ struct au_nfsd_si_lock *nsi_lock)
13124+{
13125+ int err;
13126+ aufs_bindex_t bindex;
13127+
13128+ si_read_lock(sb, AuLock_FLUSH);
13129+
13130+ /* branch id may be wrapped around */
13131+ err = 0;
13132+ bindex = au_br_index(sb, nsi_lock->br_id);
13133+ if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
13134+ goto out; /* success */
13135+
13136+ err = -ESTALE;
13137+ bindex = -1;
13138+ if (!nsi_lock->force_lock)
13139+ si_read_unlock(sb);
13140+
13141+out:
13142+ nsi_lock->bindex = bindex;
13143+ return err;
13144+}
13145+
13146+struct find_name_by_ino {
13147+ struct dir_context ctx;
13148+ int called, found;
13149+ ino_t ino;
13150+ char *name;
13151+ int namelen;
13152+};
13153+
13154+static bool
13155+find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
13156+ loff_t offset, u64 ino, unsigned int d_type)
13157+{
13158+ struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
13159+ ctx);
13160+
13161+ a->called++;
13162+ if (a->ino != ino)
13163+ return true;
13164+
13165+ memcpy(a->name, name, namelen);
13166+ a->namelen = namelen;
13167+ a->found = 1;
13168+ return false;
13169+}
13170+
13171+static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
13172+ struct au_nfsd_si_lock *nsi_lock)
13173+{
13174+ struct dentry *dentry, *parent;
13175+ struct file *file;
13176+ struct inode *dir;
13177+ struct find_name_by_ino arg = {
13178+ .ctx = {
13179+ .actor = find_name_by_ino
13180+ }
13181+ };
13182+ int err;
13183+
13184+ parent = path->dentry;
13185+ if (nsi_lock)
13186+ si_read_unlock(parent->d_sb);
13187+ file = vfsub_dentry_open(path, au_dir_roflags);
13188+ dentry = (void *)file;
13189+ if (IS_ERR(file))
13190+ goto out;
13191+
13192+ dentry = ERR_PTR(-ENOMEM);
13193+ arg.name = (void *)__get_free_page(GFP_NOFS);
13194+ if (unlikely(!arg.name))
13195+ goto out_file;
13196+ arg.ino = ino;
13197+ arg.found = 0;
13198+ do {
13199+ arg.called = 0;
13200+ /* smp_mb(); */
13201+ err = vfsub_iterate_dir(file, &arg.ctx);
13202+ } while (!err && !arg.found && arg.called);
13203+ dentry = ERR_PTR(err);
13204+ if (unlikely(err))
13205+ goto out_name;
13206+ /* instead of ENOENT */
13207+ dentry = ERR_PTR(-ESTALE);
13208+ if (!arg.found)
13209+ goto out_name;
13210+
13211+ /* do not call vfsub_lkup_one() */
13212+ dir = d_inode(parent);
13213+ dentry = vfsub_lookup_one_len_unlocked(arg.name, path, arg.namelen);
13214+ AuTraceErrPtr(dentry);
13215+ if (IS_ERR(dentry))
13216+ goto out_name;
13217+ AuDebugOn(au_test_anon(dentry));
13218+ if (unlikely(d_really_is_negative(dentry))) {
13219+ dput(dentry);
13220+ dentry = ERR_PTR(-ENOENT);
13221+ }
13222+
13223+out_name:
13224+ free_page((unsigned long)arg.name);
13225+out_file:
13226+ fput(file);
13227+out:
13228+ if (unlikely(nsi_lock
13229+ && si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
13230+ if (!IS_ERR(dentry)) {
13231+ dput(dentry);
13232+ dentry = ERR_PTR(-ESTALE);
13233+ }
13234+ AuTraceErrPtr(dentry);
13235+ return dentry;
13236+}
13237+
13238+static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
13239+ ino_t dir_ino,
13240+ struct au_nfsd_si_lock *nsi_lock)
13241+{
13242+ struct dentry *dentry;
13243+ struct path path;
13244+
13245+ if (dir_ino != AUFS_ROOT_INO) {
13246+ path.dentry = decode_by_ino(sb, dir_ino, 0);
13247+ dentry = path.dentry;
13248+ if (!path.dentry || IS_ERR(path.dentry))
13249+ goto out;
13250+ AuDebugOn(au_test_anon(path.dentry));
13251+ } else
13252+ path.dentry = dget(sb->s_root);
13253+
13254+ path.mnt = au_mnt_get(sb);
13255+ dentry = au_lkup_by_ino(&path, ino, nsi_lock);
13256+ path_put(&path);
13257+
13258+out:
13259+ AuTraceErrPtr(dentry);
13260+ return dentry;
13261+}
13262+
13263+/* ---------------------------------------------------------------------- */
13264+
13265+static int h_acceptable(void *expv, struct dentry *dentry)
13266+{
13267+ return 1;
13268+}
13269+
13270+static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
13271+ char *buf, int len, struct super_block *sb)
13272+{
13273+ char *p;
13274+ int n;
13275+ struct path path;
13276+
13277+ p = d_path(h_rootpath, buf, len);
13278+ if (IS_ERR(p))
13279+ goto out;
13280+ n = strlen(p);
13281+
13282+ path.mnt = h_rootpath->mnt;
13283+ path.dentry = h_parent;
13284+ p = d_path(&path, buf, len);
13285+ if (IS_ERR(p))
13286+ goto out;
13287+ if (n != 1)
13288+ p += n;
13289+
13290+ path.mnt = au_mnt_get(sb);
13291+ path.dentry = sb->s_root;
13292+ p = d_path(&path, buf, len - strlen(p));
13293+ mntput(path.mnt);
13294+ if (IS_ERR(p))
13295+ goto out;
13296+ if (n != 1)
13297+ p[strlen(p)] = '/';
13298+
13299+out:
13300+ AuTraceErrPtr(p);
13301+ return p;
13302+}
13303+
13304+static
13305+struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
13306+ int fh_len, struct au_nfsd_si_lock *nsi_lock)
13307+{
13308+ struct dentry *dentry, *h_parent, *root;
13309+ struct super_block *h_sb;
13310+ char *pathname, *p;
13311+ struct vfsmount *h_mnt;
13312+ struct au_branch *br;
13313+ int err;
13314+ struct path path;
13315+
13316+ br = au_sbr(sb, nsi_lock->bindex);
13317+ h_mnt = au_br_mnt(br);
13318+ h_sb = h_mnt->mnt_sb;
13319+ /* todo: call lower fh_to_dentry()? fh_to_parent()? */
13320+ lockdep_off();
13321+ h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
13322+ fh_len - Fh_tail, fh[Fh_h_type],
13323+ h_acceptable, /*context*/NULL);
13324+ lockdep_on();
13325+ dentry = h_parent;
13326+ if (unlikely(!h_parent || IS_ERR(h_parent))) {
13327+ AuWarn1("%s decode_fh failed, %ld\n",
13328+ au_sbtype(h_sb), PTR_ERR(h_parent));
13329+ goto out;
13330+ }
13331+ dentry = NULL;
13332+ if (unlikely(au_test_anon(h_parent))) {
13333+ AuWarn1("%s decode_fh returned a disconnected dentry\n",
13334+ au_sbtype(h_sb));
13335+ goto out_h_parent;
13336+ }
13337+
13338+ dentry = ERR_PTR(-ENOMEM);
13339+ pathname = (void *)__get_free_page(GFP_NOFS);
13340+ if (unlikely(!pathname))
13341+ goto out_h_parent;
13342+
13343+ root = sb->s_root;
13344+ path.mnt = h_mnt;
13345+ di_read_lock_parent(root, !AuLock_IR);
13346+ path.dentry = au_h_dptr(root, nsi_lock->bindex);
13347+ di_read_unlock(root, !AuLock_IR);
13348+ p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
13349+ dentry = (void *)p;
13350+ if (IS_ERR(p))
13351+ goto out_pathname;
13352+
13353+ si_read_unlock(sb);
13354+ err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
13355+ dentry = ERR_PTR(err);
13356+ if (unlikely(err))
13357+ goto out_relock;
13358+
13359+ dentry = ERR_PTR(-ENOENT);
13360+ AuDebugOn(au_test_anon(path.dentry));
13361+ if (unlikely(d_really_is_negative(path.dentry)))
13362+ goto out_path;
13363+
13364+ if (ino != d_inode(path.dentry)->i_ino)
13365+ dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
13366+ else
13367+ dentry = dget(path.dentry);
13368+
13369+out_path:
13370+ path_put(&path);
13371+out_relock:
13372+ if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
13373+ if (!IS_ERR(dentry)) {
13374+ dput(dentry);
13375+ dentry = ERR_PTR(-ESTALE);
13376+ }
13377+out_pathname:
13378+ free_page((unsigned long)pathname);
13379+out_h_parent:
13380+ dput(h_parent);
13381+out:
13382+ AuTraceErrPtr(dentry);
13383+ return dentry;
13384+}
13385+
13386+/* ---------------------------------------------------------------------- */
13387+
13388+static struct dentry *
13389+aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
13390+ int fh_type)
13391+{
13392+ struct dentry *dentry;
13393+ __u32 *fh = fid->raw;
13394+ struct au_branch *br;
13395+ ino_t ino, dir_ino;
13396+ struct au_nfsd_si_lock nsi_lock = {
13397+ .force_lock = 0
13398+ };
13399+
13400+ dentry = ERR_PTR(-ESTALE);
13401+ /* it should never happen, but the file handle is unreliable */
13402+ if (unlikely(fh_len < Fh_tail))
13403+ goto out;
13404+ nsi_lock.sigen = fh[Fh_sigen];
13405+ nsi_lock.br_id = fh[Fh_br_id];
13406+
13407+ /* branch id may be wrapped around */
13408+ br = NULL;
13409+ if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
13410+ goto out;
13411+ nsi_lock.force_lock = 1;
13412+
13413+ /* is this inode still cached? */
13414+ ino = decode_ino(fh + Fh_ino);
13415+ /* it should never happen */
13416+ if (unlikely(ino == AUFS_ROOT_INO))
13417+ goto out_unlock;
13418+
13419+ dir_ino = decode_ino(fh + Fh_dir_ino);
13420+ dentry = decode_by_ino(sb, ino, dir_ino);
13421+ if (IS_ERR(dentry))
13422+ goto out_unlock;
13423+ if (dentry)
13424+ goto accept;
13425+
13426+ /* is the parent dir cached? */
13427+ br = au_sbr(sb, nsi_lock.bindex);
13428+ au_lcnt_inc(&br->br_nfiles);
13429+ dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
13430+ if (IS_ERR(dentry))
13431+ goto out_unlock;
13432+ if (dentry)
13433+ goto accept;
13434+
13435+ /* lookup path */
13436+ dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
13437+ if (IS_ERR(dentry))
13438+ goto out_unlock;
13439+ if (unlikely(!dentry))
13440+ /* todo?: make it ESTALE */
13441+ goto out_unlock;
13442+
13443+accept:
13444+ if (!au_digen_test(dentry, au_sigen(sb))
13445+ && d_inode(dentry)->i_generation == fh[Fh_igen])
13446+ goto out_unlock; /* success */
13447+
13448+ dput(dentry);
13449+ dentry = ERR_PTR(-ESTALE);
13450+out_unlock:
13451+ if (br)
13452+ au_lcnt_dec(&br->br_nfiles);
13453+ si_read_unlock(sb);
13454+out:
13455+ AuTraceErrPtr(dentry);
13456+ return dentry;
13457+}
13458+
13459+#if 0 /* reserved for future use */
13460+/* support subtreecheck option */
13461+static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
13462+ int fh_len, int fh_type)
13463+{
13464+ struct dentry *parent;
13465+ __u32 *fh = fid->raw;
13466+ ino_t dir_ino;
13467+
13468+ dir_ino = decode_ino(fh + Fh_dir_ino);
13469+ parent = decode_by_ino(sb, dir_ino, 0);
13470+ if (IS_ERR(parent))
13471+ goto out;
13472+ if (!parent)
13473+ parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
13474+ dir_ino, fh, fh_len);
13475+
13476+out:
13477+ AuTraceErrPtr(parent);
13478+ return parent;
13479+}
13480+#endif
13481+
13482+/* ---------------------------------------------------------------------- */
13483+
13484+static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
13485+ struct inode *dir)
13486+{
13487+ int err;
13488+ aufs_bindex_t bindex;
13489+ struct super_block *sb, *h_sb;
13490+ struct dentry *dentry, *parent, *h_parent;
13491+ struct inode *h_dir;
13492+ struct au_branch *br;
13493+
13494+ err = -ENOSPC;
13495+ if (unlikely(*max_len <= Fh_tail)) {
13496+ AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
13497+ goto out;
13498+ }
13499+
13500+ err = FILEID_ROOT;
13501+ if (inode->i_ino == AUFS_ROOT_INO) {
13502+ AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
13503+ goto out;
13504+ }
13505+
13506+ h_parent = NULL;
13507+ sb = inode->i_sb;
13508+ err = si_read_lock(sb, AuLock_FLUSH);
13509+ if (unlikely(err))
13510+ goto out;
13511+
13512+#ifdef CONFIG_AUFS_DEBUG
13513+ if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
13514+ AuWarn1("NFS-exporting requires xino\n");
13515+#endif
13516+ err = -EIO;
13517+ parent = NULL;
13518+ ii_read_lock_child(inode);
13519+ bindex = au_ibtop(inode);
13520+ if (!dir) {
13521+ dentry = d_find_any_alias(inode);
13522+ if (unlikely(!dentry))
13523+ goto out_unlock;
13524+ AuDebugOn(au_test_anon(dentry));
13525+ parent = dget_parent(dentry);
13526+ dput(dentry);
13527+ if (unlikely(!parent))
13528+ goto out_unlock;
13529+ if (d_really_is_positive(parent))
13530+ dir = d_inode(parent);
13531+ }
13532+
13533+ ii_read_lock_parent(dir);
13534+ h_dir = au_h_iptr(dir, bindex);
13535+ ii_read_unlock(dir);
13536+ if (unlikely(!h_dir))
13537+ goto out_parent;
13538+ h_parent = d_find_any_alias(h_dir);
13539+ if (unlikely(!h_parent))
13540+ goto out_hparent;
13541+
13542+ err = -EPERM;
13543+ br = au_sbr(sb, bindex);
13544+ h_sb = au_br_sb(br);
13545+ if (unlikely(!h_sb->s_export_op)) {
13546+ AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
13547+ goto out_hparent;
13548+ }
13549+
13550+ fh[Fh_br_id] = br->br_id;
13551+ fh[Fh_sigen] = au_sigen(sb);
13552+ encode_ino(fh + Fh_ino, inode->i_ino);
13553+ encode_ino(fh + Fh_dir_ino, dir->i_ino);
13554+ fh[Fh_igen] = inode->i_generation;
13555+
13556+ *max_len -= Fh_tail;
13557+ fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
13558+ max_len,
13559+ /*connectable or subtreecheck*/0);
13560+ err = fh[Fh_h_type];
13561+ *max_len += Fh_tail;
13562+ /* todo: macros? */
13563+ if (err != FILEID_INVALID)
13564+ err = 99;
13565+ else
13566+ AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
13567+
13568+out_hparent:
13569+ dput(h_parent);
13570+out_parent:
13571+ dput(parent);
13572+out_unlock:
13573+ ii_read_unlock(inode);
13574+ si_read_unlock(sb);
13575+out:
13576+ if (unlikely(err < 0))
13577+ err = FILEID_INVALID;
13578+ return err;
13579+}
13580+
13581+/* ---------------------------------------------------------------------- */
13582+
13583+static int aufs_commit_metadata(struct inode *inode)
13584+{
13585+ int err;
13586+ aufs_bindex_t bindex;
13587+ struct super_block *sb;
13588+ struct inode *h_inode;
13589+ int (*f)(struct inode *inode);
13590+
13591+ sb = inode->i_sb;
13592+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
13593+ ii_write_lock_child(inode);
13594+ bindex = au_ibtop(inode);
13595+ AuDebugOn(bindex < 0);
13596+ h_inode = au_h_iptr(inode, bindex);
13597+
13598+ f = h_inode->i_sb->s_export_op->commit_metadata;
13599+ if (f)
13600+ err = f(h_inode);
13601+ else
13602+ err = sync_inode_metadata(h_inode, /*wait*/1);
13603+
13604+ au_cpup_attr_timesizes(inode);
13605+ ii_write_unlock(inode);
13606+ si_read_unlock(sb);
13607+ return err;
13608+}
13609+
13610+/* ---------------------------------------------------------------------- */
13611+
13612+static struct export_operations aufs_export_op = {
13613+ .fh_to_dentry = aufs_fh_to_dentry,
13614+ /* .fh_to_parent = aufs_fh_to_parent, */
13615+ .encode_fh = aufs_encode_fh,
13616+ .commit_metadata = aufs_commit_metadata
13617+};
13618+
13619+void au_export_init(struct super_block *sb)
13620+{
13621+ struct au_sbinfo *sbinfo;
13622+ __u32 u;
13623+
13624+ BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
13625+ && IS_MODULE(CONFIG_EXPORTFS),
13626+ AUFS_NAME ": unsupported configuration "
13627+ "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
13628+
13629+ sb->s_export_op = &aufs_export_op;
13630+ sbinfo = au_sbi(sb);
13631+ sbinfo->si_xigen = NULL;
13632+ get_random_bytes(&u, sizeof(u));
13633+ BUILD_BUG_ON(sizeof(u) != sizeof(int));
13634+ atomic_set(&sbinfo->si_xigen_next, u);
13635+}
13636diff -urN /usr/share/empty/fs/aufs/fhsm.c linux/fs/aufs/fhsm.c
13637--- /usr/share/empty/fs/aufs/fhsm.c 1970-01-01 01:00:00.000000000 +0100
13638+++ linux/fs/aufs/fhsm.c 2022-11-05 23:02:18.962555950 +0100
13639@@ -0,0 +1,426 @@
13640+// SPDX-License-Identifier: GPL-2.0
13641+/*
13642+ * Copyright (C) 2011-2022 Junjiro R. Okajima
13643+ *
13644+ * This program is free software; you can redistribute it and/or modify
13645+ * it under the terms of the GNU General Public License as published by
13646+ * the Free Software Foundation; either version 2 of the License, or
13647+ * (at your option) any later version.
13648+ *
13649+ * This program is distributed in the hope that it will be useful,
13650+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
13651+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13652+ * GNU General Public License for more details.
13653+ *
13654+ * You should have received a copy of the GNU General Public License
13655+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
13656+ */
13657+
13658+/*
13659+ * File-based Hierarchy Storage Management
13660+ */
13661+
13662+#include <linux/anon_inodes.h>
13663+#include <linux/poll.h>
13664+#include <linux/seq_file.h>
13665+#include <linux/statfs.h>
13666+#include "aufs.h"
13667+
13668+static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
13669+{
13670+ struct au_sbinfo *sbinfo;
13671+ struct au_fhsm *fhsm;
13672+
13673+ SiMustAnyLock(sb);
13674+
13675+ sbinfo = au_sbi(sb);
13676+ fhsm = &sbinfo->si_fhsm;
13677+ AuDebugOn(!fhsm);
13678+ return fhsm->fhsm_bottom;
13679+}
13680+
13681+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
13682+{
13683+ struct au_sbinfo *sbinfo;
13684+ struct au_fhsm *fhsm;
13685+
13686+ SiMustWriteLock(sb);
13687+
13688+ sbinfo = au_sbi(sb);
13689+ fhsm = &sbinfo->si_fhsm;
13690+ AuDebugOn(!fhsm);
13691+ fhsm->fhsm_bottom = bindex;
13692+}
13693+
13694+/* ---------------------------------------------------------------------- */
13695+
13696+static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
13697+{
13698+ struct au_br_fhsm *bf;
13699+
13700+ bf = br->br_fhsm;
13701+ MtxMustLock(&bf->bf_lock);
13702+
13703+ return !bf->bf_readable
13704+ || time_after(jiffies,
13705+ bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
13706+}
13707+
13708+/* ---------------------------------------------------------------------- */
13709+
13710+static void au_fhsm_notify(struct super_block *sb, int val)
13711+{
13712+ struct au_sbinfo *sbinfo;
13713+ struct au_fhsm *fhsm;
13714+
13715+ SiMustAnyLock(sb);
13716+
13717+ sbinfo = au_sbi(sb);
13718+ fhsm = &sbinfo->si_fhsm;
13719+ if (au_fhsm_pid(fhsm)
13720+ && atomic_read(&fhsm->fhsm_readable) != -1) {
13721+ atomic_set(&fhsm->fhsm_readable, val);
13722+ if (val)
13723+ wake_up(&fhsm->fhsm_wqh);
13724+ }
13725+}
13726+
13727+static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
13728+ struct aufs_stfs *rstfs, int do_lock, int do_notify)
13729+{
13730+ int err;
13731+ struct au_branch *br;
13732+ struct au_br_fhsm *bf;
13733+
13734+ br = au_sbr(sb, bindex);
13735+ AuDebugOn(au_br_rdonly(br));
13736+ bf = br->br_fhsm;
13737+ AuDebugOn(!bf);
13738+
13739+ if (do_lock)
13740+ mutex_lock(&bf->bf_lock);
13741+ else
13742+ MtxMustLock(&bf->bf_lock);
13743+
13744+ /* sb->s_root for NFS is unreliable */
13745+ err = au_br_stfs(br, &bf->bf_stfs);
13746+ if (unlikely(err)) {
13747+ AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
13748+ goto out;
13749+ }
13750+
13751+ bf->bf_jiffy = jiffies;
13752+ bf->bf_readable = 1;
13753+ if (do_notify)
13754+ au_fhsm_notify(sb, /*val*/1);
13755+ if (rstfs)
13756+ *rstfs = bf->bf_stfs;
13757+
13758+out:
13759+ if (do_lock)
13760+ mutex_unlock(&bf->bf_lock);
13761+ au_fhsm_notify(sb, /*val*/1);
13762+
13763+ return err;
13764+}
13765+
13766+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
13767+{
13768+ int err;
13769+ struct au_sbinfo *sbinfo;
13770+ struct au_fhsm *fhsm;
13771+ struct au_branch *br;
13772+ struct au_br_fhsm *bf;
13773+
13774+ AuDbg("b%d, force %d\n", bindex, force);
13775+ SiMustAnyLock(sb);
13776+
13777+ sbinfo = au_sbi(sb);
13778+ fhsm = &sbinfo->si_fhsm;
13779+ if (!au_ftest_si(sbinfo, FHSM)
13780+ || fhsm->fhsm_bottom == bindex)
13781+ return;
13782+
13783+ br = au_sbr(sb, bindex);
13784+ bf = br->br_fhsm;
13785+ AuDebugOn(!bf);
13786+ mutex_lock(&bf->bf_lock);
13787+ if (force
13788+ || au_fhsm_pid(fhsm)
13789+ || au_fhsm_test_jiffy(sbinfo, br))
13790+ err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
13791+ /*do_notify*/1);
13792+ mutex_unlock(&bf->bf_lock);
13793+}
13794+
13795+void au_fhsm_wrote_all(struct super_block *sb, int force)
13796+{
13797+ aufs_bindex_t bindex, bbot;
13798+ struct au_branch *br;
13799+
13800+ /* exclude the bottom */
13801+ bbot = au_fhsm_bottom(sb);
13802+ for (bindex = 0; bindex < bbot; bindex++) {
13803+ br = au_sbr(sb, bindex);
13804+ if (au_br_fhsm(br->br_perm))
13805+ au_fhsm_wrote(sb, bindex, force);
13806+ }
13807+}
13808+
13809+/* ---------------------------------------------------------------------- */
13810+
13811+static __poll_t au_fhsm_poll(struct file *file, struct poll_table_struct *wait)
13812+{
13813+ __poll_t mask;
13814+ struct au_sbinfo *sbinfo;
13815+ struct au_fhsm *fhsm;
13816+
13817+ mask = 0;
13818+ sbinfo = file->private_data;
13819+ fhsm = &sbinfo->si_fhsm;
13820+ poll_wait(file, &fhsm->fhsm_wqh, wait);
13821+ if (atomic_read(&fhsm->fhsm_readable))
13822+ mask = EPOLLIN /* | EPOLLRDNORM */;
13823+
13824+ if (!mask)
13825+ AuDbg("mask 0x%x\n", mask);
13826+ return mask;
13827+}
13828+
13829+static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
13830+ struct aufs_stfs *stfs, __s16 brid)
13831+{
13832+ int err;
13833+
13834+ err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
13835+ if (!err)
13836+ err = __put_user(brid, &stbr->brid);
13837+ if (unlikely(err))
13838+ err = -EFAULT;
13839+
13840+ return err;
13841+}
13842+
13843+static ssize_t au_fhsm_do_read(struct super_block *sb,
13844+ struct aufs_stbr __user *stbr, size_t count)
13845+{
13846+ ssize_t err;
13847+ int nstbr;
13848+ aufs_bindex_t bindex, bbot;
13849+ struct au_branch *br;
13850+ struct au_br_fhsm *bf;
13851+
13852+ /* except the bottom branch */
13853+ err = 0;
13854+ nstbr = 0;
13855+ bbot = au_fhsm_bottom(sb);
13856+ for (bindex = 0; !err && bindex < bbot; bindex++) {
13857+ br = au_sbr(sb, bindex);
13858+ if (!au_br_fhsm(br->br_perm))
13859+ continue;
13860+
13861+ bf = br->br_fhsm;
13862+ mutex_lock(&bf->bf_lock);
13863+ if (bf->bf_readable) {
13864+ err = -EFAULT;
13865+ if (count >= sizeof(*stbr))
13866+ err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
13867+ br->br_id);
13868+ if (!err) {
13869+ bf->bf_readable = 0;
13870+ count -= sizeof(*stbr);
13871+ nstbr++;
13872+ }
13873+ }
13874+ mutex_unlock(&bf->bf_lock);
13875+ }
13876+ if (!err)
13877+ err = sizeof(*stbr) * nstbr;
13878+
13879+ return err;
13880+}
13881+
13882+static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
13883+ loff_t *pos)
13884+{
13885+ ssize_t err;
13886+ int readable;
13887+ aufs_bindex_t nfhsm, bindex, bbot;
13888+ struct au_sbinfo *sbinfo;
13889+ struct au_fhsm *fhsm;
13890+ struct au_branch *br;
13891+ struct super_block *sb;
13892+
13893+ err = 0;
13894+ sbinfo = file->private_data;
13895+ fhsm = &sbinfo->si_fhsm;
13896+need_data:
13897+ spin_lock_irq(&fhsm->fhsm_wqh.lock);
13898+ if (!atomic_read(&fhsm->fhsm_readable)) {
13899+ if (vfsub_file_flags(file) & O_NONBLOCK)
13900+ err = -EAGAIN;
13901+ else
13902+ err = wait_event_interruptible_locked_irq
13903+ (fhsm->fhsm_wqh,
13904+ atomic_read(&fhsm->fhsm_readable));
13905+ }
13906+ spin_unlock_irq(&fhsm->fhsm_wqh.lock);
13907+ if (unlikely(err))
13908+ goto out;
13909+
13910+ /* sb may already be dead */
13911+ au_rw_read_lock(&sbinfo->si_rwsem);
13912+ readable = atomic_read(&fhsm->fhsm_readable);
13913+ if (readable > 0) {
13914+ sb = sbinfo->si_sb;
13915+ AuDebugOn(!sb);
13916+ /* exclude the bottom branch */
13917+ nfhsm = 0;
13918+ bbot = au_fhsm_bottom(sb);
13919+ for (bindex = 0; bindex < bbot; bindex++) {
13920+ br = au_sbr(sb, bindex);
13921+ if (au_br_fhsm(br->br_perm))
13922+ nfhsm++;
13923+ }
13924+ err = -EMSGSIZE;
13925+ if (nfhsm * sizeof(struct aufs_stbr) <= count) {
13926+ atomic_set(&fhsm->fhsm_readable, 0);
13927+ err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
13928+ count);
13929+ }
13930+ }
13931+ au_rw_read_unlock(&sbinfo->si_rwsem);
13932+ if (!readable)
13933+ goto need_data;
13934+
13935+out:
13936+ return err;
13937+}
13938+
13939+static int au_fhsm_release(struct inode *inode, struct file *file)
13940+{
13941+ struct au_sbinfo *sbinfo;
13942+ struct au_fhsm *fhsm;
13943+
13944+ /* sb may already be dead */
13945+ sbinfo = file->private_data;
13946+ fhsm = &sbinfo->si_fhsm;
13947+ spin_lock(&fhsm->fhsm_spin);
13948+ fhsm->fhsm_pid = 0;
13949+ spin_unlock(&fhsm->fhsm_spin);
13950+ kobject_put(&sbinfo->si_kobj);
13951+
13952+ return 0;
13953+}
13954+
13955+static const struct file_operations au_fhsm_fops = {
13956+ .owner = THIS_MODULE,
13957+ .llseek = noop_llseek,
13958+ .read = au_fhsm_read,
13959+ .poll = au_fhsm_poll,
13960+ .release = au_fhsm_release
13961+};
13962+
13963+int au_fhsm_fd(struct super_block *sb, int oflags)
13964+{
13965+ int err, fd;
13966+ struct au_sbinfo *sbinfo;
13967+ struct au_fhsm *fhsm;
13968+
13969+ err = -EPERM;
13970+ if (unlikely(!capable(CAP_SYS_ADMIN)))
13971+ goto out;
13972+
13973+ err = -EINVAL;
13974+ if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
13975+ goto out;
13976+
13977+ err = 0;
13978+ sbinfo = au_sbi(sb);
13979+ fhsm = &sbinfo->si_fhsm;
13980+ spin_lock(&fhsm->fhsm_spin);
13981+ if (!fhsm->fhsm_pid)
13982+ fhsm->fhsm_pid = current->pid;
13983+ else
13984+ err = -EBUSY;
13985+ spin_unlock(&fhsm->fhsm_spin);
13986+ if (unlikely(err))
13987+ goto out;
13988+
13989+ oflags |= O_RDONLY;
13990+ /* oflags |= FMODE_NONOTIFY; */
13991+ fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
13992+ err = fd;
13993+ if (unlikely(fd < 0))
13994+ goto out_pid;
13995+
13996+ /* succeed regardless 'fhsm' status */
13997+ kobject_get(&sbinfo->si_kobj);
13998+ si_noflush_read_lock(sb);
13999+ if (au_ftest_si(sbinfo, FHSM))
14000+ au_fhsm_wrote_all(sb, /*force*/0);
14001+ si_read_unlock(sb);
14002+ goto out; /* success */
14003+
14004+out_pid:
14005+ spin_lock(&fhsm->fhsm_spin);
14006+ fhsm->fhsm_pid = 0;
14007+ spin_unlock(&fhsm->fhsm_spin);
14008+out:
14009+ AuTraceErr(err);
14010+ return err;
14011+}
14012+
14013+/* ---------------------------------------------------------------------- */
14014+
14015+int au_fhsm_br_alloc(struct au_branch *br)
14016+{
14017+ int err;
14018+
14019+ err = 0;
14020+ br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
14021+ if (br->br_fhsm)
14022+ au_br_fhsm_init(br->br_fhsm);
14023+ else
14024+ err = -ENOMEM;
14025+
14026+ return err;
14027+}
14028+
14029+/* ---------------------------------------------------------------------- */
14030+
14031+void au_fhsm_fin(struct super_block *sb)
14032+{
14033+ au_fhsm_notify(sb, /*val*/-1);
14034+}
14035+
14036+void au_fhsm_init(struct au_sbinfo *sbinfo)
14037+{
14038+ struct au_fhsm *fhsm;
14039+
14040+ fhsm = &sbinfo->si_fhsm;
14041+ spin_lock_init(&fhsm->fhsm_spin);
14042+ init_waitqueue_head(&fhsm->fhsm_wqh);
14043+ atomic_set(&fhsm->fhsm_readable, 0);
14044+ fhsm->fhsm_expire
14045+ = msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
14046+ fhsm->fhsm_bottom = -1;
14047+}
14048+
14049+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
14050+{
14051+ sbinfo->si_fhsm.fhsm_expire
14052+ = msecs_to_jiffies(sec * MSEC_PER_SEC);
14053+}
14054+
14055+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
14056+{
14057+ unsigned int u;
14058+
14059+ if (!au_ftest_si(sbinfo, FHSM))
14060+ return;
14061+
14062+ u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
14063+ if (u != AUFS_FHSM_CACHE_DEF_SEC)
14064+ seq_printf(seq, ",fhsm_sec=%u", u);
14065+}
14066diff -urN /usr/share/empty/fs/aufs/file.c linux/fs/aufs/file.c
14067--- /usr/share/empty/fs/aufs/file.c 1970-01-01 01:00:00.000000000 +0100
14068+++ linux/fs/aufs/file.c 2022-12-17 09:21:34.799855195 +0100
14069@@ -0,0 +1,860 @@
14070+// SPDX-License-Identifier: GPL-2.0
14071+/*
14072+ * Copyright (C) 2005-2022 Junjiro R. Okajima
14073+ *
14074+ * This program is free software; you can redistribute it and/or modify
14075+ * it under the terms of the GNU General Public License as published by
14076+ * the Free Software Foundation; either version 2 of the License, or
14077+ * (at your option) any later version.
14078+ *
14079+ * This program is distributed in the hope that it will be useful,
14080+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14081+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14082+ * GNU General Public License for more details.
14083+ *
14084+ * You should have received a copy of the GNU General Public License
14085+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
14086+ */
14087+
14088+/*
14089+ * handling file/dir, and address_space operation
14090+ */
14091+
14092+#ifdef CONFIG_AUFS_DEBUG
14093+#include <linux/migrate.h>
14094+#endif
14095+#include <linux/pagemap.h>
14096+#include "aufs.h"
14097+
14098+/* drop flags for writing */
14099+unsigned int au_file_roflags(unsigned int flags)
14100+{
14101+ flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
14102+ flags |= O_RDONLY | O_NOATIME;
14103+ return flags;
14104+}
14105+
14106+/* common functions to regular file and dir */
14107+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
14108+ struct file *file, int force_wr)
14109+{
14110+ struct file *h_file;
14111+ struct dentry *h_dentry;
14112+ struct inode *h_inode;
14113+ struct super_block *sb;
14114+ struct au_branch *br;
14115+ struct path h_path;
14116+ int err;
14117+
14118+ /* a race condition can happen between open and unlink/rmdir */
14119+ h_file = ERR_PTR(-ENOENT);
14120+ h_dentry = au_h_dptr(dentry, bindex);
14121+ if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
14122+ goto out;
14123+ h_inode = d_inode(h_dentry);
14124+ spin_lock(&h_dentry->d_lock);
14125+ err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
14126+ /* || !d_inode(dentry)->i_nlink */
14127+ ;
14128+ spin_unlock(&h_dentry->d_lock);
14129+ if (unlikely(err))
14130+ goto out;
14131+
14132+ sb = dentry->d_sb;
14133+ br = au_sbr(sb, bindex);
14134+ err = au_br_test_oflag(flags, br);
14135+ h_file = ERR_PTR(err);
14136+ if (unlikely(err))
14137+ goto out;
14138+
14139+ /* drop flags for writing */
14140+ if (au_test_ro(sb, bindex, d_inode(dentry))) {
14141+ if (force_wr && !(flags & O_WRONLY))
14142+ force_wr = 0;
14143+ flags = au_file_roflags(flags);
14144+ if (force_wr) {
14145+ h_file = ERR_PTR(-EROFS);
14146+ flags = au_file_roflags(flags);
14147+ if (unlikely(vfsub_native_ro(h_inode)
14148+ || IS_APPEND(h_inode)))
14149+ goto out;
14150+ flags &= ~O_ACCMODE;
14151+ flags |= O_WRONLY;
14152+ }
14153+ }
14154+ flags &= ~O_CREAT;
14155+ au_lcnt_inc(&br->br_nfiles);
14156+ h_path.dentry = h_dentry;
14157+ h_path.mnt = au_br_mnt(br);
14158+ h_file = vfsub_dentry_open(&h_path, flags);
14159+ if (IS_ERR(h_file))
14160+ goto out_br;
14161+
14162+ if (flags & __FMODE_EXEC) {
14163+ err = deny_write_access(h_file);
14164+ if (unlikely(err)) {
14165+ fput(h_file);
14166+ h_file = ERR_PTR(err);
14167+ goto out_br;
14168+ }
14169+ }
14170+ fsnotify_open(h_file);
14171+ goto out; /* success */
14172+
14173+out_br:
14174+ au_lcnt_dec(&br->br_nfiles);
14175+out:
14176+ return h_file;
14177+}
14178+
14179+static int au_cmoo(struct dentry *dentry)
14180+{
14181+ int err, cmoo, matched;
14182+ unsigned int udba;
14183+ struct path h_path;
14184+ struct au_pin pin;
14185+ struct au_cp_generic cpg = {
14186+ .dentry = dentry,
14187+ .bdst = -1,
14188+ .bsrc = -1,
14189+ .len = -1,
14190+ .pin = &pin,
14191+ .flags = AuCpup_DTIME | AuCpup_HOPEN
14192+ };
14193+ struct inode *delegated;
14194+ struct super_block *sb;
14195+ struct au_sbinfo *sbinfo;
14196+ struct au_fhsm *fhsm;
14197+ pid_t pid;
14198+ struct au_branch *br;
14199+ struct dentry *parent;
14200+ struct au_hinode *hdir;
14201+
14202+ DiMustWriteLock(dentry);
14203+ IiMustWriteLock(d_inode(dentry));
14204+
14205+ err = 0;
14206+ if (IS_ROOT(dentry))
14207+ goto out;
14208+ cpg.bsrc = au_dbtop(dentry);
14209+ if (!cpg.bsrc)
14210+ goto out;
14211+
14212+ sb = dentry->d_sb;
14213+ sbinfo = au_sbi(sb);
14214+ fhsm = &sbinfo->si_fhsm;
14215+ pid = au_fhsm_pid(fhsm);
14216+ rcu_read_lock();
14217+ matched = (pid
14218+ && (current->pid == pid
14219+ || rcu_dereference(current->real_parent)->pid == pid));
14220+ rcu_read_unlock();
14221+ if (matched)
14222+ goto out;
14223+
14224+ br = au_sbr(sb, cpg.bsrc);
14225+ cmoo = au_br_cmoo(br->br_perm);
14226+ if (!cmoo)
14227+ goto out;
14228+ if (!d_is_reg(dentry))
14229+ cmoo &= AuBrAttr_COO_ALL;
14230+ if (!cmoo)
14231+ goto out;
14232+
14233+ parent = dget_parent(dentry);
14234+ di_write_lock_parent(parent);
14235+ err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
14236+ cpg.bdst = err;
14237+ if (unlikely(err < 0)) {
14238+ err = 0; /* there is no upper writable branch */
14239+ goto out_dgrade;
14240+ }
14241+ AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
14242+
14243+ /* do not respect the coo attrib for the target branch */
14244+ err = au_cpup_dirs(dentry, cpg.bdst);
14245+ if (unlikely(err))
14246+ goto out_dgrade;
14247+
14248+ di_downgrade_lock(parent, AuLock_IR);
14249+ udba = au_opt_udba(sb);
14250+ err = au_pin(&pin, dentry, cpg.bdst, udba,
14251+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14252+ if (unlikely(err))
14253+ goto out_parent;
14254+
14255+ err = au_sio_cpup_simple(&cpg);
14256+ au_unpin(&pin);
14257+ if (unlikely(err))
14258+ goto out_parent;
14259+ if (!(cmoo & AuBrWAttr_MOO))
14260+ goto out_parent; /* success */
14261+
14262+ err = au_pin(&pin, dentry, cpg.bsrc, udba,
14263+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14264+ if (unlikely(err))
14265+ goto out_parent;
14266+
14267+ h_path.mnt = au_br_mnt(br);
14268+ h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
14269+ hdir = au_hi(d_inode(parent), cpg.bsrc);
14270+ delegated = NULL;
14271+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
14272+ au_unpin(&pin);
14273+ /* todo: keep h_dentry or not? */
14274+ if (unlikely(err == -EWOULDBLOCK)) {
14275+ pr_warn("cannot retry for NFSv4 delegation"
14276+ " for an internal unlink\n");
14277+ iput(delegated);
14278+ }
14279+ if (unlikely(err)) {
14280+ pr_err("unlink %pd after coo failed (%d), ignored\n",
14281+ dentry, err);
14282+ err = 0;
14283+ }
14284+ goto out_parent; /* success */
14285+
14286+out_dgrade:
14287+ di_downgrade_lock(parent, AuLock_IR);
14288+out_parent:
14289+ di_read_unlock(parent, AuLock_IR);
14290+ dput(parent);
14291+out:
14292+ AuTraceErr(err);
14293+ return err;
14294+}
14295+
14296+int au_do_open(struct file *file, struct au_do_open_args *args)
14297+{
14298+ int err, aopen = args->aopen;
14299+ struct dentry *dentry;
14300+ struct au_finfo *finfo;
14301+
14302+ if (!aopen)
14303+ err = au_finfo_init(file, args->fidir);
14304+ else {
14305+ lockdep_off();
14306+ err = au_finfo_init(file, args->fidir);
14307+ lockdep_on();
14308+ }
14309+ if (unlikely(err))
14310+ goto out;
14311+
14312+ dentry = file->f_path.dentry;
14313+ AuDebugOn(IS_ERR_OR_NULL(dentry));
14314+ di_write_lock_child(dentry);
14315+ err = au_cmoo(dentry);
14316+ if (!err) {
14317+ if (!aopen) {
14318+ err = args->open(file, vfsub_file_flags(file),
14319+ au_di(dentry)->di_htmpfile);
14320+ di_write_unlock(dentry);
14321+ } else {
14322+ di_downgrade_lock(dentry, AuLock_IR);
14323+ lockdep_off();
14324+ err = args->open(file, vfsub_file_flags(file),
14325+ args->h_file);
14326+ lockdep_on();
14327+ di_read_unlock(dentry, AuLock_IR);
14328+ }
14329+ }
14330+
14331+ finfo = au_fi(file);
14332+ if (!err) {
14333+ finfo->fi_file = file;
14334+ au_hbl_add(&finfo->fi_hlist,
14335+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
14336+ }
14337+ if (!aopen)
14338+ fi_write_unlock(file);
14339+ else {
14340+ lockdep_off();
14341+ fi_write_unlock(file);
14342+ lockdep_on();
14343+ }
14344+ if (unlikely(err)) {
14345+ finfo->fi_hdir = NULL;
14346+ au_finfo_fin(file);
14347+ }
14348+
14349+out:
14350+ AuTraceErr(err);
14351+ return err;
14352+}
14353+
14354+int au_reopen_nondir(struct file *file)
14355+{
14356+ int err;
14357+ aufs_bindex_t btop;
14358+ struct dentry *dentry;
14359+ struct au_branch *br;
14360+ struct file *h_file, *h_file_tmp;
14361+
14362+ dentry = file->f_path.dentry;
14363+ btop = au_dbtop(dentry);
14364+ br = au_sbr(dentry->d_sb, btop);
14365+ h_file_tmp = NULL;
14366+ if (au_fbtop(file) == btop) {
14367+ h_file = au_hf_top(file);
14368+ if (file->f_mode == h_file->f_mode)
14369+ return 0; /* success */
14370+ h_file_tmp = h_file;
14371+ get_file(h_file_tmp);
14372+ au_lcnt_inc(&br->br_nfiles);
14373+ au_set_h_fptr(file, btop, NULL);
14374+ }
14375+ AuDebugOn(au_fi(file)->fi_hdir);
14376+ /*
14377+ * it can happen
14378+ * file exists on both of rw and ro
14379+ * open --> dbtop and fbtop are both 0
14380+ * prepend a branch as rw, "rw" become ro
14381+ * remove rw/file
14382+ * delete the top branch, "rw" becomes rw again
14383+ * --> dbtop is 1, fbtop is still 0
14384+ * write --> fbtop is 0 but dbtop is 1
14385+ */
14386+ /* AuDebugOn(au_fbtop(file) < btop); */
14387+
14388+ h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
14389+ file, /*force_wr*/0);
14390+ err = PTR_ERR(h_file);
14391+ if (IS_ERR(h_file)) {
14392+ if (h_file_tmp) {
14393+ /* revert */
14394+ au_set_h_fptr(file, btop, h_file_tmp);
14395+ h_file_tmp = NULL;
14396+ }
14397+ goto out; /* todo: close all? */
14398+ }
14399+
14400+ err = 0;
14401+ au_set_fbtop(file, btop);
14402+ au_set_h_fptr(file, btop, h_file);
14403+ au_update_figen(file);
14404+ /* todo: necessary? */
14405+ /* file->f_ra = h_file->f_ra; */
14406+
14407+out:
14408+ if (h_file_tmp) {
14409+ fput(h_file_tmp);
14410+ au_lcnt_dec(&br->br_nfiles);
14411+ }
14412+ return err;
14413+}
14414+
14415+/* ---------------------------------------------------------------------- */
14416+
14417+static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
14418+ struct dentry *hi_wh)
14419+{
14420+ int err;
14421+ aufs_bindex_t btop;
14422+ struct au_dinfo *dinfo;
14423+ struct dentry *h_dentry;
14424+ struct au_hdentry *hdp;
14425+
14426+ dinfo = au_di(file->f_path.dentry);
14427+ AuRwMustWriteLock(&dinfo->di_rwsem);
14428+
14429+ btop = dinfo->di_btop;
14430+ dinfo->di_btop = btgt;
14431+ hdp = au_hdentry(dinfo, btgt);
14432+ h_dentry = hdp->hd_dentry;
14433+ hdp->hd_dentry = hi_wh;
14434+ err = au_reopen_nondir(file);
14435+ hdp->hd_dentry = h_dentry;
14436+ dinfo->di_btop = btop;
14437+
14438+ return err;
14439+}
14440+
14441+static int au_ready_to_write_wh(struct file *file, loff_t len,
14442+ aufs_bindex_t bcpup, struct au_pin *pin)
14443+{
14444+ int err;
14445+ struct inode *inode, *h_inode;
14446+ struct dentry *h_dentry, *hi_wh;
14447+ struct au_cp_generic cpg = {
14448+ .dentry = file->f_path.dentry,
14449+ .bdst = bcpup,
14450+ .bsrc = -1,
14451+ .len = len,
14452+ .pin = pin
14453+ };
14454+
14455+ au_update_dbtop(cpg.dentry);
14456+ inode = d_inode(cpg.dentry);
14457+ h_inode = NULL;
14458+ if (au_dbtop(cpg.dentry) <= bcpup
14459+ && au_dbbot(cpg.dentry) >= bcpup) {
14460+ h_dentry = au_h_dptr(cpg.dentry, bcpup);
14461+ if (h_dentry && d_is_positive(h_dentry))
14462+ h_inode = d_inode(h_dentry);
14463+ }
14464+ hi_wh = au_hi_wh(inode, bcpup);
14465+ if (!hi_wh && !h_inode)
14466+ err = au_sio_cpup_wh(&cpg, file);
14467+ else
14468+ /* already copied-up after unlink */
14469+ err = au_reopen_wh(file, bcpup, hi_wh);
14470+
14471+ if (!err
14472+ && (inode->i_nlink > 1
14473+ || (inode->i_state & I_LINKABLE))
14474+ && au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
14475+ au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
14476+
14477+ return err;
14478+}
14479+
14480+/*
14481+ * prepare the @file for writing.
14482+ */
14483+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
14484+{
14485+ int err;
14486+ aufs_bindex_t dbtop;
14487+ struct dentry *parent;
14488+ struct inode *inode;
14489+ struct super_block *sb;
14490+ struct file *h_file;
14491+ struct au_cp_generic cpg = {
14492+ .dentry = file->f_path.dentry,
14493+ .bdst = -1,
14494+ .bsrc = -1,
14495+ .len = len,
14496+ .pin = pin,
14497+ .flags = AuCpup_DTIME
14498+ };
14499+
14500+ sb = cpg.dentry->d_sb;
14501+ inode = d_inode(cpg.dentry);
14502+ cpg.bsrc = au_fbtop(file);
14503+ err = au_test_ro(sb, cpg.bsrc, inode);
14504+ if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
14505+ err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
14506+ /*flags*/0);
14507+ goto out;
14508+ }
14509+
14510+ /* need to cpup or reopen */
14511+ parent = dget_parent(cpg.dentry);
14512+ di_write_lock_parent(parent);
14513+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
14514+ cpg.bdst = err;
14515+ if (unlikely(err < 0))
14516+ goto out_dgrade;
14517+ err = 0;
14518+
14519+ if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
14520+ err = au_cpup_dirs(cpg.dentry, cpg.bdst);
14521+ if (unlikely(err))
14522+ goto out_dgrade;
14523+ }
14524+
14525+ err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
14526+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14527+ if (unlikely(err))
14528+ goto out_dgrade;
14529+
14530+ dbtop = au_dbtop(cpg.dentry);
14531+ if (dbtop <= cpg.bdst)
14532+ cpg.bsrc = cpg.bdst;
14533+
14534+ if (dbtop <= cpg.bdst /* just reopen */
14535+ || !d_unhashed(cpg.dentry) /* copyup and reopen */
14536+ ) {
14537+ h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
14538+ if (IS_ERR(h_file))
14539+ err = PTR_ERR(h_file);
14540+ else {
14541+ di_downgrade_lock(parent, AuLock_IR);
14542+ if (dbtop > cpg.bdst)
14543+ err = au_sio_cpup_simple(&cpg);
14544+ if (!err)
14545+ err = au_reopen_nondir(file);
14546+ au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
14547+ }
14548+ } else { /* copyup as wh and reopen */
14549+ /*
14550+ * since writable hfsplus branch is not supported,
14551+ * h_open_pre/post() are unnecessary.
14552+ */
14553+ err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
14554+ di_downgrade_lock(parent, AuLock_IR);
14555+ }
14556+
14557+ if (!err) {
14558+ au_pin_set_parent_lflag(pin, /*lflag*/0);
14559+ goto out_dput; /* success */
14560+ }
14561+ au_unpin(pin);
14562+ goto out_unlock;
14563+
14564+out_dgrade:
14565+ di_downgrade_lock(parent, AuLock_IR);
14566+out_unlock:
14567+ di_read_unlock(parent, AuLock_IR);
14568+out_dput:
14569+ dput(parent);
14570+out:
14571+ return err;
14572+}
14573+
14574+/* ---------------------------------------------------------------------- */
14575+
14576+int au_do_flush(struct file *file, fl_owner_t id,
14577+ int (*flush)(struct file *file, fl_owner_t id))
14578+{
14579+ int err;
14580+ struct super_block *sb;
14581+ struct inode *inode;
14582+
14583+ inode = file_inode(file);
14584+ sb = inode->i_sb;
14585+ si_noflush_read_lock(sb);
14586+ fi_read_lock(file);
14587+ ii_read_lock_child(inode);
14588+
14589+ err = flush(file, id);
14590+ au_cpup_attr_timesizes(inode);
14591+
14592+ ii_read_unlock(inode);
14593+ fi_read_unlock(file);
14594+ si_read_unlock(sb);
14595+ return err;
14596+}
14597+
14598+/* ---------------------------------------------------------------------- */
14599+
14600+static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
14601+{
14602+ int err;
14603+ struct au_pin pin;
14604+ struct au_finfo *finfo;
14605+ struct dentry *parent, *hi_wh;
14606+ struct inode *inode;
14607+ struct super_block *sb;
14608+ struct au_cp_generic cpg = {
14609+ .dentry = file->f_path.dentry,
14610+ .bdst = -1,
14611+ .bsrc = -1,
14612+ .len = -1,
14613+ .pin = &pin,
14614+ .flags = AuCpup_DTIME
14615+ };
14616+
14617+ FiMustWriteLock(file);
14618+
14619+ err = 0;
14620+ finfo = au_fi(file);
14621+ sb = cpg.dentry->d_sb;
14622+ inode = d_inode(cpg.dentry);
14623+ cpg.bdst = au_ibtop(inode);
14624+ if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
14625+ goto out;
14626+
14627+ parent = dget_parent(cpg.dentry);
14628+ if (au_test_ro(sb, cpg.bdst, inode)) {
14629+ di_read_lock_parent(parent, !AuLock_IR);
14630+ err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
14631+ cpg.bdst = err;
14632+ di_read_unlock(parent, !AuLock_IR);
14633+ if (unlikely(err < 0))
14634+ goto out_parent;
14635+ err = 0;
14636+ }
14637+
14638+ di_read_lock_parent(parent, AuLock_IR);
14639+ hi_wh = au_hi_wh(inode, cpg.bdst);
14640+ if (!S_ISDIR(inode->i_mode)
14641+ && au_opt_test(au_mntflags(sb), PLINK)
14642+ && au_plink_test(inode)
14643+ && !d_unhashed(cpg.dentry)
14644+ && cpg.bdst < au_dbtop(cpg.dentry)) {
14645+ err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
14646+ if (unlikely(err))
14647+ goto out_unlock;
14648+
14649+ /* always superio. */
14650+ err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
14651+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
14652+ if (!err) {
14653+ err = au_sio_cpup_simple(&cpg);
14654+ au_unpin(&pin);
14655+ }
14656+ } else if (hi_wh) {
14657+ /* already copied-up after unlink */
14658+ err = au_reopen_wh(file, cpg.bdst, hi_wh);
14659+ *need_reopen = 0;
14660+ }
14661+
14662+out_unlock:
14663+ di_read_unlock(parent, AuLock_IR);
14664+out_parent:
14665+ dput(parent);
14666+out:
14667+ return err;
14668+}
14669+
14670+static void au_do_refresh_dir(struct file *file)
14671+{
14672+ aufs_bindex_t bindex, bbot, new_bindex, brid;
14673+ struct au_hfile *p, tmp, *q;
14674+ struct au_finfo *finfo;
14675+ struct super_block *sb;
14676+ struct au_fidir *fidir;
14677+
14678+ FiMustWriteLock(file);
14679+
14680+ sb = file->f_path.dentry->d_sb;
14681+ finfo = au_fi(file);
14682+ fidir = finfo->fi_hdir;
14683+ AuDebugOn(!fidir);
14684+ p = fidir->fd_hfile + finfo->fi_btop;
14685+ brid = p->hf_br->br_id;
14686+ bbot = fidir->fd_bbot;
14687+ for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
14688+ if (!p->hf_file)
14689+ continue;
14690+
14691+ new_bindex = au_br_index(sb, p->hf_br->br_id);
14692+ if (new_bindex == bindex)
14693+ continue;
14694+ if (new_bindex < 0) {
14695+ au_set_h_fptr(file, bindex, NULL);
14696+ continue;
14697+ }
14698+
14699+ /* swap two lower inode, and loop again */
14700+ q = fidir->fd_hfile + new_bindex;
14701+ tmp = *q;
14702+ *q = *p;
14703+ *p = tmp;
14704+ if (tmp.hf_file) {
14705+ bindex--;
14706+ p--;
14707+ }
14708+ }
14709+
14710+ p = fidir->fd_hfile;
14711+ if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
14712+ bbot = au_sbbot(sb);
14713+ for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
14714+ finfo->fi_btop++, p++)
14715+ if (p->hf_file) {
14716+ if (file_inode(p->hf_file))
14717+ break;
14718+ au_hfput(p, /*execed*/0);
14719+ }
14720+ } else {
14721+ bbot = au_br_index(sb, brid);
14722+ for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
14723+ finfo->fi_btop++, p++)
14724+ if (p->hf_file)
14725+ au_hfput(p, /*execed*/0);
14726+ bbot = au_sbbot(sb);
14727+ }
14728+
14729+ p = fidir->fd_hfile + bbot;
14730+ for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
14731+ fidir->fd_bbot--, p--)
14732+ if (p->hf_file) {
14733+ if (file_inode(p->hf_file))
14734+ break;
14735+ au_hfput(p, /*execed*/0);
14736+ }
14737+ AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
14738+}
14739+
14740+/*
14741+ * after branch manipulating, refresh the file.
14742+ */
14743+static int refresh_file(struct file *file, int (*reopen)(struct file *file))
14744+{
14745+ int err, need_reopen, nbr;
14746+ aufs_bindex_t bbot, bindex;
14747+ struct dentry *dentry;
14748+ struct super_block *sb;
14749+ struct au_finfo *finfo;
14750+ struct au_hfile *hfile;
14751+
14752+ dentry = file->f_path.dentry;
14753+ sb = dentry->d_sb;
14754+ nbr = au_sbbot(sb) + 1;
14755+ finfo = au_fi(file);
14756+ if (!finfo->fi_hdir) {
14757+ hfile = &finfo->fi_htop;
14758+ AuDebugOn(!hfile->hf_file);
14759+ bindex = au_br_index(sb, hfile->hf_br->br_id);
14760+ AuDebugOn(bindex < 0);
14761+ if (bindex != finfo->fi_btop)
14762+ au_set_fbtop(file, bindex);
14763+ } else {
14764+ err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
14765+ if (unlikely(err))
14766+ goto out;
14767+ au_do_refresh_dir(file);
14768+ }
14769+
14770+ err = 0;
14771+ need_reopen = 1;
14772+ if (!au_test_mmapped(file))
14773+ err = au_file_refresh_by_inode(file, &need_reopen);
14774+ if (finfo->fi_hdir)
14775+ /* harmless if err */
14776+ au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
14777+ if (!err && need_reopen && !d_unlinked(dentry))
14778+ err = reopen(file);
14779+ if (!err) {
14780+ au_update_figen(file);
14781+ goto out; /* success */
14782+ }
14783+
14784+ /* error, close all lower files */
14785+ if (finfo->fi_hdir) {
14786+ bbot = au_fbbot_dir(file);
14787+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
14788+ au_set_h_fptr(file, bindex, NULL);
14789+ }
14790+
14791+out:
14792+ return err;
14793+}
14794+
14795+/* common function to regular file and dir */
14796+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
14797+ int wlock, unsigned int fi_lsc)
14798+{
14799+ int err;
14800+ unsigned int sigen, figen;
14801+ aufs_bindex_t btop;
14802+ unsigned char pseudo_link;
14803+ struct dentry *dentry;
14804+ struct inode *inode;
14805+
14806+ err = 0;
14807+ dentry = file->f_path.dentry;
14808+ inode = d_inode(dentry);
14809+ sigen = au_sigen(dentry->d_sb);
14810+ fi_write_lock_nested(file, fi_lsc);
14811+ figen = au_figen(file);
14812+ if (!fi_lsc)
14813+ di_write_lock_child(dentry);
14814+ else
14815+ di_write_lock_child2(dentry);
14816+ btop = au_dbtop(dentry);
14817+ pseudo_link = (btop != au_ibtop(inode));
14818+ if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
14819+ if (!wlock) {
14820+ di_downgrade_lock(dentry, AuLock_IR);
14821+ fi_downgrade_lock(file);
14822+ }
14823+ goto out; /* success */
14824+ }
14825+
14826+ AuDbg("sigen %d, figen %d\n", sigen, figen);
14827+ if (au_digen_test(dentry, sigen)) {
14828+ err = au_reval_dpath(dentry, sigen);
14829+ AuDebugOn(!err && au_digen_test(dentry, sigen));
14830+ }
14831+
14832+ if (!err)
14833+ err = refresh_file(file, reopen);
14834+ if (!err) {
14835+ if (!wlock) {
14836+ di_downgrade_lock(dentry, AuLock_IR);
14837+ fi_downgrade_lock(file);
14838+ }
14839+ } else {
14840+ di_write_unlock(dentry);
14841+ fi_write_unlock(file);
14842+ }
14843+
14844+out:
14845+ return err;
14846+}
14847+
14848+/* ---------------------------------------------------------------------- */
14849+
14850+/* cf. aufs_nopage() */
14851+/* for madvise(2) */
14852+static int aufs_read_folio(struct file *file __maybe_unused, struct folio *folio)
14853+{
14854+ folio_unlock(folio);
14855+ return 0;
14856+}
14857+
14858+/* it will never be called, but necessary to support O_DIRECT */
14859+static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
14860+{ BUG(); return 0; }
14861+
14862+/* they will never be called. */
14863+#ifdef CONFIG_AUFS_DEBUG
14864+static int aufs_write_begin(struct file *file, struct address_space *mapping,
14865+ loff_t pos, unsigned len,
14866+ struct page **pagep, void **fsdata)
14867+{ AuUnsupport(); return 0; }
14868+static int aufs_write_end(struct file *file, struct address_space *mapping,
14869+ loff_t pos, unsigned len, unsigned copied,
14870+ struct page *page, void *fsdata)
14871+{ AuUnsupport(); return 0; }
14872+static int aufs_writepage(struct page *page, struct writeback_control *wbc)
14873+{ AuUnsupport(); return 0; }
14874+
14875+static bool aufs_dirty_folio(struct address_space *mapping, struct folio *folio)
14876+{ AuUnsupport(); return true; }
14877+static void aufs_invalidate_folio(struct folio *folio, size_t offset, size_t len)
14878+{ AuUnsupport(); }
14879+static bool aufs_release_folio(struct folio *folio, gfp_t gfp)
14880+{ AuUnsupport(); return true; }
14881+#if 0 /* called by memory compaction regardless file */
14882+static int aufs_migrate_folio(struct address_space *mapping, struct folio *dst,
14883+ struct folio *src, enum migrate_mode mode)
14884+{ AuUnsupport(); return 0; }
14885+#endif
14886+static int aufs_launder_folio(struct folio *folio)
14887+{ AuUnsupport(); return 0; }
14888+static bool aufs_is_partially_uptodate(struct folio *folio, size_t from,
14889+ size_t count)
14890+{ AuUnsupport(); return true; }
14891+static void aufs_is_dirty_writeback(struct folio *folio, bool *dirty,
14892+ bool *writeback)
14893+{ AuUnsupport(); }
14894+static int aufs_error_remove_page(struct address_space *mapping,
14895+ struct page *page)
14896+{ AuUnsupport(); return 0; }
14897+static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
14898+ sector_t *span)
14899+{ AuUnsupport(); return 0; }
14900+static void aufs_swap_deactivate(struct file *file)
14901+{ AuUnsupport(); }
14902+static int aufs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
14903+{ AuUnsupport(); return 0; }
14904+#endif /* CONFIG_AUFS_DEBUG */
14905+
14906+const struct address_space_operations aufs_aop = {
14907+ .read_folio = aufs_read_folio,
14908+ .direct_IO = aufs_direct_IO,
14909+#ifdef CONFIG_AUFS_DEBUG
14910+ .writepage = aufs_writepage,
14911+ /* no writepages, because of writepage */
14912+ .dirty_folio = aufs_dirty_folio,
14913+ /* no readpages, because of readpage */
14914+ .write_begin = aufs_write_begin,
14915+ .write_end = aufs_write_end,
14916+ /* no bmap, no block device */
14917+ .invalidate_folio = aufs_invalidate_folio,
14918+ .release_folio = aufs_release_folio,
14919+ /* is fallback_migrate_page ok? */
14920+ /* .migrate_folio = aufs_migrate_folio, */
14921+ .launder_folio = aufs_launder_folio,
14922+ .is_partially_uptodate = aufs_is_partially_uptodate,
14923+ .is_dirty_writeback = aufs_is_dirty_writeback,
14924+ .error_remove_page = aufs_error_remove_page,
14925+ .swap_activate = aufs_swap_activate,
14926+ .swap_deactivate = aufs_swap_deactivate,
14927+ .swap_rw = aufs_swap_rw
14928+#endif /* CONFIG_AUFS_DEBUG */
14929+};
14930diff -urN /usr/share/empty/fs/aufs/file.h linux/fs/aufs/file.h
14931--- /usr/share/empty/fs/aufs/file.h 1970-01-01 01:00:00.000000000 +0100
14932+++ linux/fs/aufs/file.h 2022-11-05 23:02:18.965889284 +0100
14933@@ -0,0 +1,342 @@
14934+/* SPDX-License-Identifier: GPL-2.0 */
14935+/*
14936+ * Copyright (C) 2005-2022 Junjiro R. Okajima
14937+ *
14938+ * This program is free software; you can redistribute it and/or modify
14939+ * it under the terms of the GNU General Public License as published by
14940+ * the Free Software Foundation; either version 2 of the License, or
14941+ * (at your option) any later version.
14942+ *
14943+ * This program is distributed in the hope that it will be useful,
14944+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
14945+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14946+ * GNU General Public License for more details.
14947+ *
14948+ * You should have received a copy of the GNU General Public License
14949+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
14950+ */
14951+
14952+/*
14953+ * file operations
14954+ */
14955+
14956+#ifndef __AUFS_FILE_H__
14957+#define __AUFS_FILE_H__
14958+
14959+#ifdef __KERNEL__
14960+
14961+#include <linux/file.h>
14962+#include <linux/fs.h>
14963+#include <linux/mm_types.h>
14964+#include <linux/poll.h>
14965+#include "rwsem.h"
14966+
14967+struct au_branch;
14968+struct au_hfile {
14969+ struct file *hf_file;
14970+ struct au_branch *hf_br;
14971+};
14972+
14973+struct au_vdir;
14974+struct au_fidir {
14975+ aufs_bindex_t fd_bbot;
14976+ aufs_bindex_t fd_nent;
14977+ struct au_vdir *fd_vdir_cache;
14978+ struct au_hfile fd_hfile[];
14979+};
14980+
14981+static inline int au_fidir_sz(int nent)
14982+{
14983+ AuDebugOn(nent < 0);
14984+ return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
14985+}
14986+
14987+struct au_finfo {
14988+ atomic_t fi_generation;
14989+
14990+ struct au_rwsem fi_rwsem;
14991+ aufs_bindex_t fi_btop;
14992+
14993+ /* do not union them */
14994+ struct { /* for non-dir */
14995+ struct au_hfile fi_htop;
14996+ atomic_t fi_mmapped;
14997+ };
14998+ struct au_fidir *fi_hdir; /* for dir only */
14999+
15000+ struct hlist_bl_node fi_hlist;
15001+ struct file *fi_file; /* very ugly */
15002+ struct rcu_head rcu;
15003+} ____cacheline_aligned_in_smp;
15004+
15005+/* ---------------------------------------------------------------------- */
15006+
15007+/* file.c */
15008+extern const struct address_space_operations aufs_aop;
15009+unsigned int au_file_roflags(unsigned int flags);
15010+struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
15011+ struct file *file, int force_wr);
15012+struct au_do_open_args {
15013+ int aopen;
15014+ int (*open)(struct file *file, int flags,
15015+ struct file *h_file);
15016+ struct au_fidir *fidir;
15017+ struct file *h_file;
15018+};
15019+int au_do_open(struct file *file, struct au_do_open_args *args);
15020+int au_reopen_nondir(struct file *file);
15021+struct au_pin;
15022+int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
15023+int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
15024+ int wlock, unsigned int fi_lsc);
15025+int au_do_flush(struct file *file, fl_owner_t id,
15026+ int (*flush)(struct file *file, fl_owner_t id));
15027+
15028+/* poll.c */
15029+#ifdef CONFIG_AUFS_POLL
15030+__poll_t aufs_poll(struct file *file, struct poll_table_struct *pt);
15031+#endif
15032+
15033+#ifdef CONFIG_AUFS_BR_HFSPLUS
15034+/* hfsplus.c */
15035+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
15036+ int force_wr);
15037+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
15038+ struct file *h_file);
15039+#else
15040+AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
15041+ aufs_bindex_t bindex, int force_wr)
15042+AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
15043+ struct file *h_file);
15044+#endif
15045+
15046+/* f_op.c */
15047+extern const struct file_operations aufs_file_fop;
15048+int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
15049+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
15050+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc);
15051+
15052+/* finfo.c */
15053+void au_hfput(struct au_hfile *hf, int execed);
15054+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
15055+ struct file *h_file);
15056+
15057+void au_update_figen(struct file *file);
15058+struct au_fidir *au_fidir_alloc(struct super_block *sb);
15059+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
15060+
15061+void au_fi_init_once(void *_fi);
15062+void au_finfo_fin(struct file *file);
15063+int au_finfo_init(struct file *file, struct au_fidir *fidir);
15064+
15065+/* ioctl.c */
15066+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
15067+#ifdef CONFIG_COMPAT
15068+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
15069+ unsigned long arg);
15070+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
15071+ unsigned long arg);
15072+#endif
15073+
15074+/* ---------------------------------------------------------------------- */
15075+
15076+static inline struct au_finfo *au_fi(struct file *file)
15077+{
15078+ return file->private_data;
15079+}
15080+
15081+/* ---------------------------------------------------------------------- */
15082+
15083+#define fi_read_lock(f) au_rw_read_lock(&au_fi(f)->fi_rwsem)
15084+#define fi_write_lock(f) au_rw_write_lock(&au_fi(f)->fi_rwsem)
15085+#define fi_read_trylock(f) au_rw_read_trylock(&au_fi(f)->fi_rwsem)
15086+#define fi_write_trylock(f) au_rw_write_trylock(&au_fi(f)->fi_rwsem)
15087+/*
15088+#define fi_read_trylock_nested(f) \
15089+ au_rw_read_trylock_nested(&au_fi(f)->fi_rwsem)
15090+#define fi_write_trylock_nested(f) \
15091+ au_rw_write_trylock_nested(&au_fi(f)->fi_rwsem)
15092+*/
15093+
15094+#define fi_read_unlock(f) au_rw_read_unlock(&au_fi(f)->fi_rwsem)
15095+#define fi_write_unlock(f) au_rw_write_unlock(&au_fi(f)->fi_rwsem)
15096+#define fi_downgrade_lock(f) au_rw_dgrade_lock(&au_fi(f)->fi_rwsem)
15097+
15098+/* lock subclass for finfo */
15099+enum {
15100+ AuLsc_FI_1,
15101+ AuLsc_FI_2
15102+};
15103+
15104+static inline void fi_read_lock_nested(struct file *f, unsigned int lsc)
15105+{
15106+ au_rw_read_lock_nested(&au_fi(f)->fi_rwsem, lsc);
15107+}
15108+
15109+static inline void fi_write_lock_nested(struct file *f, unsigned int lsc)
15110+{
15111+ au_rw_write_lock_nested(&au_fi(f)->fi_rwsem, lsc);
15112+}
15113+
15114+/*
15115+ * fi_read_lock_1, fi_write_lock_1,
15116+ * fi_read_lock_2, fi_write_lock_2
15117+ */
15118+#define AuReadLockFunc(name) \
15119+static inline void fi_read_lock_##name(struct file *f) \
15120+{ fi_read_lock_nested(f, AuLsc_FI_##name); }
15121+
15122+#define AuWriteLockFunc(name) \
15123+static inline void fi_write_lock_##name(struct file *f) \
15124+{ fi_write_lock_nested(f, AuLsc_FI_##name); }
15125+
15126+#define AuRWLockFuncs(name) \
15127+ AuReadLockFunc(name) \
15128+ AuWriteLockFunc(name)
15129+
15130+AuRWLockFuncs(1);
15131+AuRWLockFuncs(2);
15132+
15133+#undef AuReadLockFunc
15134+#undef AuWriteLockFunc
15135+#undef AuRWLockFuncs
15136+
15137+#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
15138+#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
15139+#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
15140+
15141+/* ---------------------------------------------------------------------- */
15142+
15143+/* todo: hard/soft set? */
15144+static inline aufs_bindex_t au_fbtop(struct file *file)
15145+{
15146+ FiMustAnyLock(file);
15147+ return au_fi(file)->fi_btop;
15148+}
15149+
15150+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
15151+{
15152+ FiMustAnyLock(file);
15153+ AuDebugOn(!au_fi(file)->fi_hdir);
15154+ return au_fi(file)->fi_hdir->fd_bbot;
15155+}
15156+
15157+static inline struct au_vdir *au_fvdir_cache(struct file *file)
15158+{
15159+ FiMustAnyLock(file);
15160+ AuDebugOn(!au_fi(file)->fi_hdir);
15161+ return au_fi(file)->fi_hdir->fd_vdir_cache;
15162+}
15163+
15164+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
15165+{
15166+ FiMustWriteLock(file);
15167+ au_fi(file)->fi_btop = bindex;
15168+}
15169+
15170+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
15171+{
15172+ FiMustWriteLock(file);
15173+ AuDebugOn(!au_fi(file)->fi_hdir);
15174+ au_fi(file)->fi_hdir->fd_bbot = bindex;
15175+}
15176+
15177+static inline void au_set_fvdir_cache(struct file *file,
15178+ struct au_vdir *vdir_cache)
15179+{
15180+ FiMustWriteLock(file);
15181+ AuDebugOn(!au_fi(file)->fi_hdir);
15182+ au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
15183+}
15184+
15185+static inline struct file *au_hf_top(struct file *file)
15186+{
15187+ FiMustAnyLock(file);
15188+ AuDebugOn(au_fi(file)->fi_hdir);
15189+ return au_fi(file)->fi_htop.hf_file;
15190+}
15191+
15192+static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
15193+{
15194+ FiMustAnyLock(file);
15195+ AuDebugOn(!au_fi(file)->fi_hdir);
15196+ return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
15197+}
15198+
15199+/* todo: memory barrier? */
15200+static inline unsigned int au_figen(struct file *f)
15201+{
15202+ return atomic_read(&au_fi(f)->fi_generation);
15203+}
15204+
15205+static inline void au_set_mmapped(struct file *f)
15206+{
15207+ if (atomic_inc_return(&au_fi(f)->fi_mmapped))
15208+ return;
15209+ pr_warn("fi_mmapped wrapped around\n");
15210+ while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
15211+ ;
15212+}
15213+
15214+static inline void au_unset_mmapped(struct file *f)
15215+{
15216+ atomic_dec(&au_fi(f)->fi_mmapped);
15217+}
15218+
15219+static inline int au_test_mmapped(struct file *f)
15220+{
15221+ return atomic_read(&au_fi(f)->fi_mmapped);
15222+}
15223+
15224+/* customize vma->vm_file */
15225+
15226+static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
15227+ struct file *file)
15228+{
15229+ struct file *f;
15230+
15231+ f = vma->vm_file;
15232+ get_file(file);
15233+ vma->vm_file = file;
15234+ fput(f);
15235+}
15236+
15237+#ifdef CONFIG_MMU
15238+#define AuDbgVmRegion(file, vma) do {} while (0)
15239+
15240+static inline void au_vm_file_reset(struct vm_area_struct *vma,
15241+ struct file *file)
15242+{
15243+ au_do_vm_file_reset(vma, file);
15244+}
15245+#else
15246+#define AuDbgVmRegion(file, vma) \
15247+ AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
15248+
15249+static inline void au_vm_file_reset(struct vm_area_struct *vma,
15250+ struct file *file)
15251+{
15252+ struct file *f;
15253+
15254+ au_do_vm_file_reset(vma, file);
15255+ f = vma->vm_region->vm_file;
15256+ get_file(file);
15257+ vma->vm_region->vm_file = file;
15258+ fput(f);
15259+}
15260+#endif /* CONFIG_MMU */
15261+
15262+/* handle vma->vm_prfile */
15263+static inline void au_vm_prfile_set(struct vm_area_struct *vma,
15264+ struct file *file)
15265+{
15266+ get_file(file);
15267+ vma->vm_prfile = file;
15268+#ifndef CONFIG_MMU
15269+ get_file(file);
15270+ vma->vm_region->vm_prfile = file;
15271+#endif
15272+}
15273+
15274+#endif /* __KERNEL__ */
15275+#endif /* __AUFS_FILE_H__ */
15276diff -urN /usr/share/empty/fs/aufs/finfo.c linux/fs/aufs/finfo.c
15277--- /usr/share/empty/fs/aufs/finfo.c 1970-01-01 01:00:00.000000000 +0100
15278+++ linux/fs/aufs/finfo.c 2022-11-05 23:02:18.965889284 +0100
15279@@ -0,0 +1,149 @@
15280+// SPDX-License-Identifier: GPL-2.0
15281+/*
15282+ * Copyright (C) 2005-2022 Junjiro R. Okajima
15283+ *
15284+ * This program is free software; you can redistribute it and/or modify
15285+ * it under the terms of the GNU General Public License as published by
15286+ * the Free Software Foundation; either version 2 of the License, or
15287+ * (at your option) any later version.
15288+ *
15289+ * This program is distributed in the hope that it will be useful,
15290+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15291+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15292+ * GNU General Public License for more details.
15293+ *
15294+ * You should have received a copy of the GNU General Public License
15295+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
15296+ */
15297+
15298+/*
15299+ * file private data
15300+ */
15301+
15302+#include "aufs.h"
15303+
15304+void au_hfput(struct au_hfile *hf, int execed)
15305+{
15306+ if (execed)
15307+ allow_write_access(hf->hf_file);
15308+ fput(hf->hf_file);
15309+ hf->hf_file = NULL;
15310+ au_lcnt_dec(&hf->hf_br->br_nfiles);
15311+ hf->hf_br = NULL;
15312+}
15313+
15314+void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
15315+{
15316+ struct au_finfo *finfo = au_fi(file);
15317+ struct au_hfile *hf;
15318+ struct au_fidir *fidir;
15319+
15320+ fidir = finfo->fi_hdir;
15321+ if (!fidir) {
15322+ AuDebugOn(finfo->fi_btop != bindex);
15323+ hf = &finfo->fi_htop;
15324+ } else
15325+ hf = fidir->fd_hfile + bindex;
15326+
15327+ if (hf && hf->hf_file)
15328+ au_hfput(hf, vfsub_file_execed(file));
15329+ if (val) {
15330+ FiMustWriteLock(file);
15331+ AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
15332+ hf->hf_file = val;
15333+ hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
15334+ }
15335+}
15336+
15337+void au_update_figen(struct file *file)
15338+{
15339+ atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
15340+ /* smp_mb(); */ /* atomic_set */
15341+}
15342+
15343+/* ---------------------------------------------------------------------- */
15344+
15345+struct au_fidir *au_fidir_alloc(struct super_block *sb)
15346+{
15347+ struct au_fidir *fidir;
15348+ int nbr;
15349+
15350+ nbr = au_sbbot(sb) + 1;
15351+ if (nbr < 2)
15352+ nbr = 2; /* initial allocate for 2 branches */
15353+ fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
15354+ if (fidir) {
15355+ fidir->fd_bbot = -1;
15356+ fidir->fd_nent = nbr;
15357+ }
15358+
15359+ return fidir;
15360+}
15361+
15362+int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
15363+{
15364+ int err;
15365+ struct au_fidir *fidir, *p;
15366+
15367+ AuRwMustWriteLock(&finfo->fi_rwsem);
15368+ fidir = finfo->fi_hdir;
15369+ AuDebugOn(!fidir);
15370+
15371+ err = -ENOMEM;
15372+ p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
15373+ GFP_NOFS, may_shrink);
15374+ if (p) {
15375+ p->fd_nent = nbr;
15376+ finfo->fi_hdir = p;
15377+ err = 0;
15378+ }
15379+
15380+ return err;
15381+}
15382+
15383+/* ---------------------------------------------------------------------- */
15384+
15385+void au_finfo_fin(struct file *file)
15386+{
15387+ struct au_finfo *finfo;
15388+
15389+ au_lcnt_dec(&au_sbi(file->f_path.dentry->d_sb)->si_nfiles);
15390+
15391+ finfo = au_fi(file);
15392+ AuDebugOn(finfo->fi_hdir);
15393+ AuRwDestroy(&finfo->fi_rwsem);
15394+ au_cache_free_finfo(finfo);
15395+}
15396+
15397+void au_fi_init_once(void *_finfo)
15398+{
15399+ struct au_finfo *finfo = _finfo;
15400+
15401+ au_rw_init(&finfo->fi_rwsem);
15402+}
15403+
15404+int au_finfo_init(struct file *file, struct au_fidir *fidir)
15405+{
15406+ int err;
15407+ struct au_finfo *finfo;
15408+ struct dentry *dentry;
15409+
15410+ err = -ENOMEM;
15411+ dentry = file->f_path.dentry;
15412+ finfo = au_cache_alloc_finfo();
15413+ if (unlikely(!finfo))
15414+ goto out;
15415+
15416+ err = 0;
15417+ au_lcnt_inc(&au_sbi(dentry->d_sb)->si_nfiles);
15418+ au_rw_write_lock(&finfo->fi_rwsem);
15419+ finfo->fi_btop = -1;
15420+ finfo->fi_hdir = fidir;
15421+ atomic_set(&finfo->fi_generation, au_digen(dentry));
15422+ /* smp_mb(); */ /* atomic_set */
15423+
15424+ file->private_data = finfo;
15425+
15426+out:
15427+ return err;
15428+}
15429diff -urN /usr/share/empty/fs/aufs/f_op.c linux/fs/aufs/f_op.c
15430--- /usr/share/empty/fs/aufs/f_op.c 1970-01-01 01:00:00.000000000 +0100
15431+++ linux/fs/aufs/f_op.c 2023-02-20 21:05:51.959693785 +0100
15432@@ -0,0 +1,780 @@
15433+// SPDX-License-Identifier: GPL-2.0
15434+/*
15435+ * Copyright (C) 2005-2022 Junjiro R. Okajima
15436+ *
15437+ * This program is free software; you can redistribute it and/or modify
15438+ * it under the terms of the GNU General Public License as published by
15439+ * the Free Software Foundation; either version 2 of the License, or
15440+ * (at your option) any later version.
15441+ *
15442+ * This program is distributed in the hope that it will be useful,
15443+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
15444+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15445+ * GNU General Public License for more details.
15446+ *
15447+ * You should have received a copy of the GNU General Public License
15448+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
15449+ */
15450+
15451+/*
15452+ * file and vm operations
15453+ */
15454+
15455+#include <linux/aio.h>
15456+#include <linux/fs_stack.h>
15457+#include <linux/mman.h>
15458+#include <linux/security.h>
15459+#include "aufs.h"
15460+
15461+int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
15462+{
15463+ int err;
15464+ aufs_bindex_t bindex;
15465+ struct dentry *dentry, *h_dentry;
15466+ struct au_finfo *finfo;
15467+ struct inode *h_inode;
15468+
15469+ FiMustWriteLock(file);
15470+
15471+ err = 0;
15472+ dentry = file->f_path.dentry;
15473+ AuDebugOn(IS_ERR_OR_NULL(dentry));
15474+ finfo = au_fi(file);
15475+ memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
15476+ atomic_set(&finfo->fi_mmapped, 0);
15477+ bindex = au_dbtop(dentry);
15478+ if (!h_file) {
15479+ h_dentry = au_h_dptr(dentry, bindex);
15480+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
15481+ if (unlikely(err))
15482+ goto out;
15483+ h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
15484+ if (IS_ERR(h_file)) {
15485+ err = PTR_ERR(h_file);
15486+ goto out;
15487+ }
15488+ } else {
15489+ h_dentry = h_file->f_path.dentry;
15490+ err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
15491+ if (unlikely(err))
15492+ goto out;
15493+ /* br ref is already inc-ed */
15494+ }
15495+
15496+ if (flags & __O_TMPFILE) {
15497+ AuDebugOn(!h_file);
15498+ AuDebugOn(h_file != au_di(dentry)->di_htmpfile);
15499+ au_di(dentry)->di_htmpfile = NULL;
15500+
15501+ if (!(flags & O_EXCL)) {
15502+ h_inode = file_inode(h_file);
15503+ spin_lock(&h_inode->i_lock);
15504+ h_inode->i_state |= I_LINKABLE;
15505+ spin_unlock(&h_inode->i_lock);
15506+ }
15507+ }
15508+ au_set_fbtop(file, bindex);
15509+ au_set_h_fptr(file, bindex, h_file);
15510+ au_update_figen(file);
15511+ /* todo: necessary? */
15512+ /* file->f_ra = h_file->f_ra; */
15513+
15514+out:
15515+ return err;
15516+}
15517+
15518+static int aufs_open_nondir(struct inode *inode __maybe_unused,
15519+ struct file *file)
15520+{
15521+ int err;
15522+ struct super_block *sb;
15523+ struct au_do_open_args args = {
15524+ .open = au_do_open_nondir
15525+ };
15526+
15527+ AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
15528+ file, vfsub_file_flags(file), file->f_mode);
15529+
15530+ sb = file->f_path.dentry->d_sb;
15531+ si_read_lock(sb, AuLock_FLUSH);
15532+ err = au_do_open(file, &args);
15533+ si_read_unlock(sb);
15534+ return err;
15535+}
15536+
15537+int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
15538+{
15539+ struct au_finfo *finfo;
15540+ aufs_bindex_t bindex;
15541+
15542+ finfo = au_fi(file);
15543+ au_hbl_del(&finfo->fi_hlist,
15544+ &au_sbi(file->f_path.dentry->d_sb)->si_files);
15545+ bindex = finfo->fi_btop;
15546+ if (bindex >= 0)
15547+ au_set_h_fptr(file, bindex, NULL);
15548+
15549+ au_finfo_fin(file);
15550+ return 0;
15551+}
15552+
15553+/* ---------------------------------------------------------------------- */
15554+
15555+static int au_do_flush_nondir(struct file *file, fl_owner_t id)
15556+{
15557+ int err;
15558+ struct file *h_file;
15559+
15560+ err = 0;
15561+ h_file = au_hf_top(file);
15562+ if (h_file)
15563+ err = vfsub_flush(h_file, id);
15564+ return err;
15565+}
15566+
15567+static int aufs_flush_nondir(struct file *file, fl_owner_t id)
15568+{
15569+ return au_do_flush(file, id, au_do_flush_nondir);
15570+}
15571+
15572+/* ---------------------------------------------------------------------- */
15573+/*
15574+ * read and write functions acquire [fdi]_rwsem once, but release before
15575+ * mmap_sem. This is because to stop a race condition between mmap(2).
15576+ * Releasing these aufs-rwsem should be safe, no branch-management (by keeping
15577+ * si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
15578+ * read functions after [fdi]_rwsem are released, but it should be harmless.
15579+ */
15580+
15581+/* Callers should call au_read_post() or fput() in the end */
15582+struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc)
15583+{
15584+ struct file *h_file;
15585+ int err;
15586+
15587+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc);
15588+ if (!err) {
15589+ di_read_unlock(file->f_path.dentry, AuLock_IR);
15590+ h_file = au_hf_top(file);
15591+ get_file(h_file);
15592+ if (!keep_fi)
15593+ fi_read_unlock(file);
15594+ } else
15595+ h_file = ERR_PTR(err);
15596+
15597+ return h_file;
15598+}
15599+
15600+static void au_read_post(struct inode *inode, struct file *h_file)
15601+{
15602+ /* update without lock, I don't think it a problem */
15603+ fsstack_copy_attr_atime(inode, file_inode(h_file));
15604+ fput(h_file);
15605+}
15606+
15607+struct au_write_pre {
15608+ /* input */
15609+ unsigned int lsc;
15610+
15611+ /* output */
15612+ blkcnt_t blks;
15613+ aufs_bindex_t btop;
15614+};
15615+
15616+/*
15617+ * return with iinfo is write-locked
15618+ * callers should call au_write_post() or iinfo_write_unlock() + fput() in the
15619+ * end
15620+ */
15621+static struct file *au_write_pre(struct file *file, int do_ready,
15622+ struct au_write_pre *wpre)
15623+{
15624+ struct file *h_file;
15625+ struct dentry *dentry;
15626+ int err;
15627+ unsigned int lsc;
15628+ struct au_pin pin;
15629+
15630+ lsc = 0;
15631+ if (wpre)
15632+ lsc = wpre->lsc;
15633+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc);
15634+ h_file = ERR_PTR(err);
15635+ if (unlikely(err))
15636+ goto out;
15637+
15638+ dentry = file->f_path.dentry;
15639+ if (do_ready) {
15640+ err = au_ready_to_write(file, -1, &pin);
15641+ if (unlikely(err)) {
15642+ h_file = ERR_PTR(err);
15643+ di_write_unlock(dentry);
15644+ goto out_fi;
15645+ }
15646+ }
15647+
15648+ di_downgrade_lock(dentry, /*flags*/0);
15649+ if (wpre)
15650+ wpre->btop = au_fbtop(file);
15651+ h_file = au_hf_top(file);
15652+ get_file(h_file);
15653+ if (wpre)
15654+ wpre->blks = file_inode(h_file)->i_blocks;
15655+ if (do_ready)
15656+ au_unpin(&pin);
15657+ di_read_unlock(dentry, /*flags*/0);
15658+
15659+out_fi:
15660+ fi_write_unlock(file);
15661+out:
15662+ return h_file;
15663+}
15664+
15665+static void au_write_post(struct inode *inode, struct file *h_file,
15666+ struct au_write_pre *wpre, ssize_t written)
15667+{
15668+ struct inode *h_inode;
15669+
15670+ au_cpup_attr_timesizes(inode);
15671+ AuDebugOn(au_ibtop(inode) != wpre->btop);
15672+ h_inode = file_inode(h_file);
15673+ inode->i_mode = h_inode->i_mode;
15674+ ii_write_unlock(inode);
15675+ /* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
15676+ if (written > 0)
15677+ au_fhsm_wrote(inode->i_sb, wpre->btop,
15678+ /*force*/h_inode->i_blocks > wpre->blks);
15679+ fput(h_file);
15680+}
15681+
15682+/*
15683+ * todo: very ugly
15684+ * it locks both of i_mutex and si_rwsem for read in safe.
15685+ * if the plink maintenance mode continues forever (that is the problem),
15686+ * may loop forever.
15687+ */
15688+static void au_mtx_and_read_lock(struct inode *inode)
15689+{
15690+ int err;
15691+ struct super_block *sb = inode->i_sb;
15692+
15693+ while (1) {
15694+ inode_lock(inode);
15695+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
15696+ if (!err)
15697+ break;
15698+ inode_unlock(inode);
15699+ si_read_lock(sb, AuLock_NOPLMW);
15700+ si_read_unlock(sb);
15701+ }
15702+}
15703+
15704+static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
15705+ struct iov_iter *iov_iter)
15706+{
15707+ ssize_t err;
15708+ struct file *file;
15709+ ssize_t (*iter)(struct kiocb *, struct iov_iter *);
15710+
15711+ err = security_file_permission(h_file, rw);
15712+ if (unlikely(err))
15713+ goto out;
15714+
15715+ err = -ENOSYS; /* the branch doesn't have its ->(read|write)_iter() */
15716+ iter = NULL;
15717+ if (rw == MAY_READ)
15718+ iter = h_file->f_op->read_iter;
15719+ else if (rw == MAY_WRITE)
15720+ iter = h_file->f_op->write_iter;
15721+
15722+ file = kio->ki_filp;
15723+ kio->ki_filp = h_file;
15724+ if (iter) {
15725+ lockdep_off();
15726+ err = iter(kio, iov_iter);
15727+ lockdep_on();
15728+ } else
15729+ /* currently there is no such fs */
15730+ WARN_ON_ONCE(1);
15731+ kio->ki_filp = file;
15732+
15733+out:
15734+ return err;
15735+}
15736+
15737+static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
15738+{
15739+ ssize_t err;
15740+ struct file *file, *h_file;
15741+ struct inode *inode;
15742+ struct super_block *sb;
15743+
15744+ file = kio->ki_filp;
15745+ inode = file_inode(file);
15746+ sb = inode->i_sb;
15747+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
15748+
15749+ h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0);
15750+ err = PTR_ERR(h_file);
15751+ if (IS_ERR(h_file))
15752+ goto out;
15753+
15754+ if (au_test_loopback_kthread()) {
15755+ au_warn_loopback(h_file->f_path.dentry->d_sb);
15756+ if (file->f_mapping != h_file->f_mapping) {
15757+ file->f_mapping = h_file->f_mapping;
15758+ smp_mb(); /* unnecessary? */
15759+ }
15760+ }
15761+ fi_read_unlock(file);
15762+
15763+ err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
15764+ /* todo: necessary? */
15765+ /* file->f_ra = h_file->f_ra; */
15766+ au_read_post(inode, h_file);
15767+
15768+out:
15769+ si_read_unlock(sb);
15770+ return err;
15771+}
15772+
15773+static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
15774+{
15775+ ssize_t err;
15776+ struct au_write_pre wpre;
15777+ struct inode *inode;
15778+ struct file *file, *h_file;
15779+
15780+ file = kio->ki_filp;
15781+ inode = file_inode(file);
15782+ au_mtx_and_read_lock(inode);
15783+
15784+ wpre.lsc = 0;
15785+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
15786+ err = PTR_ERR(h_file);
15787+ if (IS_ERR(h_file))
15788+ goto out;
15789+
15790+ err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
15791+ au_write_post(inode, h_file, &wpre, err);
15792+
15793+out:
15794+ si_read_unlock(inode->i_sb);
15795+ inode_unlock(inode);
15796+ return err;
15797+}
15798+
15799+/*
15800+ * We may be able to remove aufs_splice_{read,write}() since almost all FSes
15801+ * don't have their own .splice_{read,write} implimentations, and they use
15802+ * generic_file_splice_read() and iter_file_splice_write() who can act like the
15803+ * simple converters to f_op->iter_read() and ->iter_write().
15804+ * But we keep our own implementations because some non-mainlined FSes may have
15805+ * their own .splice_{read,write} implimentations and aufs doesn't want to take
15806+ * away an opportunity to co-work with aufs from them.
15807+ */
15808+static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
15809+ struct pipe_inode_info *pipe, size_t len,
15810+ unsigned int flags)
15811+{
15812+ ssize_t err;
15813+ struct file *h_file;
15814+ struct inode *inode;
15815+ struct super_block *sb;
15816+
15817+ inode = file_inode(file);
15818+ sb = inode->i_sb;
15819+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
15820+
15821+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
15822+ err = PTR_ERR(h_file);
15823+ if (IS_ERR(h_file))
15824+ goto out;
15825+
15826+ err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
15827+ /* todo: necessary? */
15828+ /* file->f_ra = h_file->f_ra; */
15829+ au_read_post(inode, h_file);
15830+
15831+out:
15832+ si_read_unlock(sb);
15833+ return err;
15834+}
15835+
15836+static ssize_t
15837+aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
15838+ size_t len, unsigned int flags)
15839+{
15840+ ssize_t err;
15841+ struct au_write_pre wpre;
15842+ struct inode *inode;
15843+ struct file *h_file;
15844+
15845+ inode = file_inode(file);
15846+ au_mtx_and_read_lock(inode);
15847+
15848+ wpre.lsc = 0;
15849+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
15850+ err = PTR_ERR(h_file);
15851+ if (IS_ERR(h_file))
15852+ goto out;
15853+
15854+ err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
15855+ au_write_post(inode, h_file, &wpre, err);
15856+
15857+out:
15858+ si_read_unlock(inode->i_sb);
15859+ inode_unlock(inode);
15860+ return err;
15861+}
15862+
15863+static long aufs_fallocate(struct file *file, int mode, loff_t offset,
15864+ loff_t len)
15865+{
15866+ long err;
15867+ struct au_write_pre wpre;
15868+ struct inode *inode;
15869+ struct file *h_file;
15870+
15871+ inode = file_inode(file);
15872+ au_mtx_and_read_lock(inode);
15873+
15874+ wpre.lsc = 0;
15875+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
15876+ err = PTR_ERR(h_file);
15877+ if (IS_ERR(h_file))
15878+ goto out;
15879+
15880+ lockdep_off();
15881+ err = vfs_fallocate(h_file, mode, offset, len);
15882+ lockdep_on();
15883+ /*
15884+ * we don't need to call file_modifed() here since au_write_post()
15885+ * is equivalent and copies-up all timestamps and permission bits.
15886+ */
15887+ au_write_post(inode, h_file, &wpre, /*written*/1);
15888+
15889+out:
15890+ si_read_unlock(inode->i_sb);
15891+ inode_unlock(inode);
15892+ return err;
15893+}
15894+
15895+static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos,
15896+ struct file *dst, loff_t dst_pos,
15897+ size_t len, unsigned int flags)
15898+{
15899+ ssize_t err;
15900+ struct au_write_pre wpre;
15901+ enum { SRC, DST };
15902+ struct {
15903+ struct inode *inode;
15904+ struct file *h_file;
15905+ struct super_block *h_sb;
15906+ } a[2];
15907+#define a_src a[SRC]
15908+#define a_dst a[DST]
15909+
15910+ err = -EINVAL;
15911+ a_src.inode = file_inode(src);
15912+ if (unlikely(!S_ISREG(a_src.inode->i_mode)))
15913+ goto out;
15914+ a_dst.inode = file_inode(dst);
15915+ if (unlikely(!S_ISREG(a_dst.inode->i_mode)))
15916+ goto out;
15917+
15918+ au_mtx_and_read_lock(a_dst.inode);
15919+ /*
15920+ * in order to match the order in di_write_lock2_{child,parent}(),
15921+ * use f_path.dentry for this comparison.
15922+ */
15923+ if (src->f_path.dentry < dst->f_path.dentry) {
15924+ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1);
15925+ err = PTR_ERR(a_src.h_file);
15926+ if (IS_ERR(a_src.h_file))
15927+ goto out_si;
15928+
15929+ wpre.lsc = AuLsc_FI_2;
15930+ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
15931+ err = PTR_ERR(a_dst.h_file);
15932+ if (IS_ERR(a_dst.h_file)) {
15933+ au_read_post(a_src.inode, a_src.h_file);
15934+ goto out_si;
15935+ }
15936+ } else {
15937+ wpre.lsc = AuLsc_FI_1;
15938+ a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
15939+ err = PTR_ERR(a_dst.h_file);
15940+ if (IS_ERR(a_dst.h_file))
15941+ goto out_si;
15942+
15943+ a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2);
15944+ err = PTR_ERR(a_src.h_file);
15945+ if (IS_ERR(a_src.h_file)) {
15946+ au_write_post(a_dst.inode, a_dst.h_file, &wpre,
15947+ /*written*/0);
15948+ goto out_si;
15949+ }
15950+ }
15951+
15952+ err = -EXDEV;
15953+ a_src.h_sb = file_inode(a_src.h_file)->i_sb;
15954+ a_dst.h_sb = file_inode(a_dst.h_file)->i_sb;
15955+ if (unlikely(a_src.h_sb != a_dst.h_sb)) {
15956+ AuDbgFile(src);
15957+ AuDbgFile(dst);
15958+ goto out_file;
15959+ }
15960+
15961+ err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file,
15962+ dst_pos, len, flags);
15963+
15964+out_file:
15965+ au_write_post(a_dst.inode, a_dst.h_file, &wpre, err);
15966+ fi_read_unlock(src);
15967+ au_read_post(a_src.inode, a_src.h_file);
15968+out_si:
15969+ si_read_unlock(a_dst.inode->i_sb);
15970+ inode_unlock(a_dst.inode);
15971+out:
15972+ return err;
15973+#undef a_src
15974+#undef a_dst
15975+}
15976+
15977+/* ---------------------------------------------------------------------- */
15978+
15979+/*
15980+ * The locking order around current->mmap_sem.
15981+ * - in most and regular cases
15982+ * file I/O syscall -- aufs_read() or something
15983+ * -- si_rwsem for read -- mmap_sem
15984+ * (Note that [fdi]i_rwsem are released before mmap_sem).
15985+ * - in mmap case
15986+ * mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
15987+ * This AB-BA order is definitely bad, but is not a problem since "si_rwsem for
15988+ * read" allows multiple processes to acquire it and [fdi]i_rwsem are not held
15989+ * in file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
15990+ * It means that when aufs acquires si_rwsem for write, the process should never
15991+ * acquire mmap_sem.
15992+ *
15993+ * Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
15994+ * problem either since any directory is not able to be mmap-ed.
15995+ * The similar scenario is applied to aufs_readlink() too.
15996+ */
15997+
15998+#if 0 /* stop calling security_file_mmap() */
15999+/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
16000+#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
16001+
16002+static unsigned long au_arch_prot_conv(unsigned long flags)
16003+{
16004+ /* currently ppc64 only */
16005+#ifdef CONFIG_PPC64
16006+ /* cf. linux/arch/powerpc/include/asm/mman.h */
16007+ AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
16008+ return AuConv_VM_PROT(flags, SAO);
16009+#else
16010+ AuDebugOn(arch_calc_vm_prot_bits(-1));
16011+ return 0;
16012+#endif
16013+}
16014+
16015+static unsigned long au_prot_conv(unsigned long flags)
16016+{
16017+ return AuConv_VM_PROT(flags, READ)
16018+ | AuConv_VM_PROT(flags, WRITE)
16019+ | AuConv_VM_PROT(flags, EXEC)
16020+ | au_arch_prot_conv(flags);
16021+}
16022+
16023+/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
16024+#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
16025+
16026+static unsigned long au_flag_conv(unsigned long flags)
16027+{
16028+ return AuConv_VM_MAP(flags, GROWSDOWN)
16029+ | AuConv_VM_MAP(flags, DENYWRITE)
16030+ | AuConv_VM_MAP(flags, LOCKED);
16031+}
16032+#endif
16033+
16034+static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
16035+{
16036+ int err;
16037+ const unsigned char wlock
16038+ = (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
16039+ struct super_block *sb;
16040+ struct file *h_file;
16041+ struct inode *inode;
16042+
16043+ AuDbgVmRegion(file, vma);
16044+
16045+ inode = file_inode(file);
16046+ sb = inode->i_sb;
16047+ lockdep_off();
16048+ si_read_lock(sb, AuLock_NOPLMW);
16049+
16050+ h_file = au_write_pre(file, wlock, /*wpre*/NULL);
16051+ lockdep_on();
16052+ err = PTR_ERR(h_file);
16053+ if (IS_ERR(h_file))
16054+ goto out;
16055+
16056+ err = 0;
16057+ au_set_mmapped(file);
16058+ au_vm_file_reset(vma, h_file);
16059+ /*
16060+ * we cannot call security_mmap_file() here since it may acquire
16061+ * mmap_sem or i_mutex.
16062+ *
16063+ * err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
16064+ * au_flag_conv(vma->vm_flags));
16065+ */
16066+ if (!err)
16067+ err = call_mmap(h_file, vma);
16068+ if (!err) {
16069+ au_vm_prfile_set(vma, file);
16070+ fsstack_copy_attr_atime(inode, file_inode(h_file));
16071+ goto out_fput; /* success */
16072+ }
16073+ au_unset_mmapped(file);
16074+ au_vm_file_reset(vma, file);
16075+
16076+out_fput:
16077+ lockdep_off();
16078+ ii_write_unlock(inode);
16079+ lockdep_on();
16080+ fput(h_file);
16081+out:
16082+ lockdep_off();
16083+ si_read_unlock(sb);
16084+ lockdep_on();
16085+ AuTraceErr(err);
16086+ return err;
16087+}
16088+
16089+/* ---------------------------------------------------------------------- */
16090+
16091+static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
16092+ int datasync)
16093+{
16094+ int err;
16095+ struct au_write_pre wpre;
16096+ struct inode *inode;
16097+ struct file *h_file;
16098+
16099+ err = 0; /* -EBADF; */ /* posix? */
16100+ if (unlikely(!(file->f_mode & FMODE_WRITE)))
16101+ goto out;
16102+
16103+ inode = file_inode(file);
16104+ au_mtx_and_read_lock(inode);
16105+
16106+ wpre.lsc = 0;
16107+ h_file = au_write_pre(file, /*do_ready*/1, &wpre);
16108+ err = PTR_ERR(h_file);
16109+ if (IS_ERR(h_file))
16110+ goto out_unlock;
16111+
16112+ err = vfsub_fsync(h_file, &h_file->f_path, datasync);
16113+ au_write_post(inode, h_file, &wpre, /*written*/0);
16114+
16115+out_unlock:
16116+ si_read_unlock(inode->i_sb);
16117+ inode_unlock(inode);
16118+out:
16119+ return err;
16120+}
16121+
16122+static int aufs_fasync(int fd, struct file *file, int flag)
16123+{
16124+ int err;
16125+ struct file *h_file;
16126+ struct super_block *sb;
16127+
16128+ sb = file->f_path.dentry->d_sb;
16129+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
16130+
16131+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
16132+ err = PTR_ERR(h_file);
16133+ if (IS_ERR(h_file))
16134+ goto out;
16135+
16136+ if (h_file->f_op->fasync)
16137+ err = h_file->f_op->fasync(fd, h_file, flag);
16138+ fput(h_file); /* instead of au_read_post() */
16139+
16140+out:
16141+ si_read_unlock(sb);
16142+ return err;
16143+}
16144+
16145+static int aufs_setfl(struct file *file, unsigned long arg)
16146+{
16147+ int err;
16148+ struct file *h_file;
16149+ struct super_block *sb;
16150+
16151+ sb = file->f_path.dentry->d_sb;
16152+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
16153+
16154+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
16155+ err = PTR_ERR(h_file);
16156+ if (IS_ERR(h_file))
16157+ goto out;
16158+
16159+ /* stop calling h_file->fasync */
16160+ arg |= vfsub_file_flags(file) & FASYNC;
16161+ err = setfl(/*unused fd*/-1, h_file, arg);
16162+ fput(h_file); /* instead of au_read_post() */
16163+
16164+out:
16165+ si_read_unlock(sb);
16166+ return err;
16167+}
16168+
16169+/* ---------------------------------------------------------------------- */
16170+
16171+/* no one supports this operation, currently */
16172+#if 0 /* reserved for future use */
16173+static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
16174+ size_t len, loff_t *pos, int more)
16175+{
16176+}
16177+#endif
16178+
16179+/* ---------------------------------------------------------------------- */
16180+
16181+const struct file_operations aufs_file_fop = {
16182+ .owner = THIS_MODULE,
16183+
16184+ .llseek = default_llseek,
16185+
16186+ .read_iter = aufs_read_iter,
16187+ .write_iter = aufs_write_iter,
16188+
16189+#ifdef CONFIG_AUFS_POLL
16190+ .poll = aufs_poll,
16191+#endif
16192+ .unlocked_ioctl = aufs_ioctl_nondir,
16193+#ifdef CONFIG_COMPAT
16194+ .compat_ioctl = aufs_compat_ioctl_nondir,
16195+#endif
16196+ .mmap = aufs_mmap,
16197+ .open = aufs_open_nondir,
16198+ .flush = aufs_flush_nondir,
16199+ .release = aufs_release_nondir,
16200+ .fsync = aufs_fsync_nondir,
16201+ .fasync = aufs_fasync,
16202+ /* .sendpage = aufs_sendpage, */
16203+ .setfl = aufs_setfl,
16204+ .splice_write = aufs_splice_write,
16205+ .splice_read = aufs_splice_read,
16206+#if 0 /* reserved for future use */
16207+ .aio_splice_write = aufs_aio_splice_write,
16208+ .aio_splice_read = aufs_aio_splice_read,
16209+#endif
16210+ .fallocate = aufs_fallocate,
16211+ .copy_file_range = aufs_copy_file_range
16212+};
16213diff -urN /usr/share/empty/fs/aufs/fsctx.c linux/fs/aufs/fsctx.c
16214--- /usr/share/empty/fs/aufs/fsctx.c 1970-01-01 01:00:00.000000000 +0100
16215+++ linux/fs/aufs/fsctx.c 2022-11-05 23:02:18.965889284 +0100
16216@@ -0,0 +1,1242 @@
16217+// SPDX-License-Identifier: GPL-2.0
16218+/*
16219+ * Copyright (C) 2022 Junjiro R. Okajima
16220+ *
16221+ * This program is free software; you can redistribute it and/or modify
16222+ * it under the terms of the GNU General Public License as published by
16223+ * the Free Software Foundation; either version 2 of the License, or
16224+ * (at your option) any later version.
16225+ *
16226+ * This program is distributed in the hope that it will be useful,
16227+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
16228+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16229+ * GNU General Public License for more details.
16230+ *
16231+ * You should have received a copy of the GNU General Public License
16232+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
16233+ */
16234+
16235+/*
16236+ * fs context, aka new mount api
16237+ */
16238+
16239+#include <linux/fs_context.h>
16240+#include "aufs.h"
16241+
16242+struct au_fsctx_opts {
16243+ aufs_bindex_t bindex;
16244+ unsigned char skipped;
16245+ struct au_opt *opt, *opt_tail;
16246+ struct super_block *sb;
16247+ struct au_sbinfo *sbinfo;
16248+ struct au_opts opts;
16249+};
16250+
16251+/* stop extra interpretation of errno in mount(8), and strange error messages */
16252+static int cvt_err(int err)
16253+{
16254+ AuTraceErr(err);
16255+
16256+ switch (err) {
16257+ case -ENOENT:
16258+ case -ENOTDIR:
16259+ case -EEXIST:
16260+ case -EIO:
16261+ err = -EINVAL;
16262+ }
16263+ return err;
16264+}
16265+
16266+static int au_fsctx_reconfigure(struct fs_context *fc)
16267+{
16268+ int err, do_dx;
16269+ unsigned int mntflags;
16270+ struct dentry *root;
16271+ struct super_block *sb;
16272+ struct inode *inode;
16273+ struct au_fsctx_opts *a = fc->fs_private;
16274+
16275+ AuDbg("fc %p\n", fc);
16276+
16277+ root = fc->root;
16278+ sb = root->d_sb;
16279+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16280+ if (!err) {
16281+ di_write_lock_child(root);
16282+ err = au_opts_verify(sb, fc->sb_flags, /*pending*/0);
16283+ aufs_write_unlock(root);
16284+ }
16285+
16286+ inode = d_inode(root);
16287+ inode_lock(inode);
16288+ err = si_write_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
16289+ if (unlikely(err))
16290+ goto out;
16291+ di_write_lock_child(root);
16292+
16293+ /* au_opts_remount() may return an error */
16294+ err = au_opts_remount(sb, &a->opts);
16295+
16296+ if (au_ftest_opts(a->opts.flags, REFRESH))
16297+ au_remount_refresh(sb, au_ftest_opts(a->opts.flags,
16298+ REFRESH_IDOP));
16299+
16300+ if (au_ftest_opts(a->opts.flags, REFRESH_DYAOP)) {
16301+ mntflags = au_mntflags(sb);
16302+ do_dx = !!au_opt_test(mntflags, DIO);
16303+ au_dy_arefresh(do_dx);
16304+ }
16305+
16306+ au_fhsm_wrote_all(sb, /*force*/1); /* ?? */
16307+ aufs_write_unlock(root);
16308+
16309+out:
16310+ inode_unlock(inode);
16311+ err = cvt_err(err);
16312+ AuTraceErr(err);
16313+
16314+ return err;
16315+}
16316+
16317+/* ---------------------------------------------------------------------- */
16318+
16319+static int au_fsctx_fill_super(struct super_block *sb, struct fs_context *fc)
16320+{
16321+ int err;
16322+ struct au_fsctx_opts *a = fc->fs_private;
16323+ struct au_sbinfo *sbinfo = a->sbinfo;
16324+ struct dentry *root;
16325+ struct inode *inode;
16326+
16327+ sbinfo->si_sb = sb;
16328+ sb->s_fs_info = sbinfo;
16329+ kobject_get(&sbinfo->si_kobj);
16330+
16331+ __si_write_lock(sb);
16332+ si_pid_set(sb);
16333+ au_sbilist_add(sb);
16334+
16335+ /* all timestamps always follow the ones on the branch */
16336+ sb->s_flags |= SB_NOATIME | SB_NODIRATIME;
16337+ sb->s_flags |= SB_I_VERSION; /* do we really need this? */
16338+ sb->s_op = &aufs_sop;
16339+ sb->s_d_op = &aufs_dop;
16340+ sb->s_magic = AUFS_SUPER_MAGIC;
16341+ sb->s_maxbytes = 0;
16342+ sb->s_stack_depth = 1;
16343+ au_export_init(sb);
16344+ au_xattr_init(sb);
16345+
16346+ err = au_alloc_root(sb);
16347+ if (unlikely(err)) {
16348+ si_write_unlock(sb);
16349+ goto out;
16350+ }
16351+ root = sb->s_root;
16352+ inode = d_inode(root);
16353+ ii_write_lock_parent(inode);
16354+ aufs_write_unlock(root);
16355+
16356+ /* lock vfs_inode first, then aufs. */
16357+ inode_lock(inode);
16358+ aufs_write_lock(root);
16359+ err = au_opts_mount(sb, &a->opts);
16360+ AuTraceErr(err);
16361+ if (!err && au_ftest_si(sbinfo, NO_DREVAL)) {
16362+ sb->s_d_op = &aufs_dop_noreval;
16363+ /* infofc(fc, "%ps", sb->s_d_op); */
16364+ pr_info("%ps\n", sb->s_d_op);
16365+ au_refresh_dop(root, /*force_reval*/0);
16366+ sbinfo->si_iop_array = aufs_iop_nogetattr;
16367+ au_refresh_iop(inode, /*force_getattr*/0);
16368+ }
16369+ aufs_write_unlock(root);
16370+ inode_unlock(inode);
16371+ if (!err)
16372+ goto out; /* success */
16373+
16374+ dput(root);
16375+ sb->s_root = NULL;
16376+
16377+out:
16378+ if (unlikely(err))
16379+ kobject_put(&sbinfo->si_kobj);
16380+ AuTraceErr(err);
16381+ err = cvt_err(err);
16382+ AuTraceErr(err);
16383+ return err;
16384+}
16385+
16386+static int au_fsctx_get_tree(struct fs_context *fc)
16387+{
16388+ int err;
16389+
16390+ AuDbg("fc %p\n", fc);
16391+ err = get_tree_nodev(fc, au_fsctx_fill_super);
16392+
16393+ AuTraceErr(err);
16394+ return err;
16395+}
16396+
16397+/* ---------------------------------------------------------------------- */
16398+
16399+static void au_fsctx_dump(struct au_opts *opts)
16400+{
16401+#ifdef CONFIG_AUFS_DEBUG
16402+ /* reduce stack space */
16403+ union {
16404+ struct au_opt_add *add;
16405+ struct au_opt_del *del;
16406+ struct au_opt_mod *mod;
16407+ struct au_opt_xino *xino;
16408+ struct au_opt_xino_itrunc *xino_itrunc;
16409+ struct au_opt_wbr_create *create;
16410+ } u;
16411+ struct au_opt *opt;
16412+
16413+ opt = opts->opt;
16414+ while (opt->type != Opt_tail) {
16415+ switch (opt->type) {
16416+ case Opt_add:
16417+ u.add = &opt->add;
16418+ AuDbg("add {b%d, %s, 0x%x, %p}\n",
16419+ u.add->bindex, u.add->pathname, u.add->perm,
16420+ u.add->path.dentry);
16421+ break;
16422+ case Opt_del:
16423+ fallthrough;
16424+ case Opt_idel:
16425+ u.del = &opt->del;
16426+ AuDbg("del {%s, %p}\n",
16427+ u.del->pathname, u.del->h_path.dentry);
16428+ break;
16429+ case Opt_mod:
16430+ fallthrough;
16431+ case Opt_imod:
16432+ u.mod = &opt->mod;
16433+ AuDbg("mod {%s, 0x%x, %p}\n",
16434+ u.mod->path, u.mod->perm, u.mod->h_root);
16435+ break;
16436+ case Opt_append:
16437+ u.add = &opt->add;
16438+ AuDbg("append {b%d, %s, 0x%x, %p}\n",
16439+ u.add->bindex, u.add->pathname, u.add->perm,
16440+ u.add->path.dentry);
16441+ break;
16442+ case Opt_prepend:
16443+ u.add = &opt->add;
16444+ AuDbg("prepend {b%d, %s, 0x%x, %p}\n",
16445+ u.add->bindex, u.add->pathname, u.add->perm,
16446+ u.add->path.dentry);
16447+ break;
16448+
16449+ case Opt_dirwh:
16450+ AuDbg("dirwh %d\n", opt->dirwh);
16451+ break;
16452+ case Opt_rdcache:
16453+ AuDbg("rdcache %d\n", opt->rdcache);
16454+ break;
16455+ case Opt_rdblk:
16456+ AuDbg("rdblk %d\n", opt->rdblk);
16457+ break;
16458+ case Opt_rdhash:
16459+ AuDbg("rdhash %u\n", opt->rdhash);
16460+ break;
16461+
16462+ case Opt_xino:
16463+ u.xino = &opt->xino;
16464+ AuDbg("xino {%s %pD}\n", u.xino->path, u.xino->file);
16465+ break;
16466+
16467+#define au_fsctx_TF(name) \
16468+ case Opt_##name: \
16469+ if (opt->tf) \
16470+ AuLabel(name); \
16471+ else \
16472+ AuLabel(no##name); \
16473+ break;
16474+
16475+ /* simple true/false flag */
16476+ au_fsctx_TF(trunc_xino);
16477+ au_fsctx_TF(trunc_xib);
16478+ au_fsctx_TF(dirperm1);
16479+ au_fsctx_TF(plink);
16480+ au_fsctx_TF(shwh);
16481+ au_fsctx_TF(dio);
16482+ au_fsctx_TF(warn_perm);
16483+ au_fsctx_TF(verbose);
16484+ au_fsctx_TF(sum);
16485+ au_fsctx_TF(dirren);
16486+ au_fsctx_TF(acl);
16487+#undef au_fsctx_TF
16488+
16489+ case Opt_trunc_xino_path:
16490+ fallthrough;
16491+ case Opt_itrunc_xino:
16492+ u.xino_itrunc = &opt->xino_itrunc;
16493+ AuDbg("trunc_xino %d\n", u.xino_itrunc->bindex);
16494+ break;
16495+ case Opt_noxino:
16496+ AuLabel(noxino);
16497+ break;
16498+
16499+ case Opt_list_plink:
16500+ AuLabel(list_plink);
16501+ break;
16502+ case Opt_udba:
16503+ AuDbg("udba %d, %s\n",
16504+ opt->udba, au_optstr_udba(opt->udba));
16505+ break;
16506+ case Opt_diropq_a:
16507+ AuLabel(diropq_a);
16508+ break;
16509+ case Opt_diropq_w:
16510+ AuLabel(diropq_w);
16511+ break;
16512+ case Opt_wsum:
16513+ AuLabel(wsum);
16514+ break;
16515+ case Opt_wbr_create:
16516+ u.create = &opt->wbr_create;
16517+ AuDbg("create %d, %s\n", u.create->wbr_create,
16518+ au_optstr_wbr_create(u.create->wbr_create));
16519+ switch (u.create->wbr_create) {
16520+ case AuWbrCreate_MFSV:
16521+ fallthrough;
16522+ case AuWbrCreate_PMFSV:
16523+ AuDbg("%d sec\n", u.create->mfs_second);
16524+ break;
16525+ case AuWbrCreate_MFSRR:
16526+ fallthrough;
16527+ case AuWbrCreate_TDMFS:
16528+ AuDbg("%llu watermark\n",
16529+ u.create->mfsrr_watermark);
16530+ break;
16531+ case AuWbrCreate_MFSRRV:
16532+ fallthrough;
16533+ case AuWbrCreate_TDMFSV:
16534+ fallthrough;
16535+ case AuWbrCreate_PMFSRRV:
16536+ AuDbg("%llu watermark, %d sec\n",
16537+ u.create->mfsrr_watermark,
16538+ u.create->mfs_second);
16539+ break;
16540+ }
16541+ break;
16542+ case Opt_wbr_copyup:
16543+ AuDbg("copyup %d, %s\n", opt->wbr_copyup,
16544+ au_optstr_wbr_copyup(opt->wbr_copyup));
16545+ break;
16546+ case Opt_fhsm_sec:
16547+ AuDbg("fhsm_sec %u\n", opt->fhsm_second);
16548+ break;
16549+
16550+ default:
16551+ AuDbg("type %d\n", opt->type);
16552+ BUG();
16553+ }
16554+ opt++;
16555+ }
16556+#endif
16557+}
16558+
16559+/* ---------------------------------------------------------------------- */
16560+
16561+/*
16562+ * For conditionally compiled mount options.
16563+ * Instead of fsparam_flag_no(), use this macro to distinguish ignore_silent.
16564+ */
16565+#define au_ignore_flag(name, action) \
16566+ fsparam_flag(name, action), \
16567+ fsparam_flag("no" name, Opt_ignore_silent)
16568+
16569+const struct fs_parameter_spec aufs_fsctx_paramspec[] = {
16570+ fsparam_string("br", Opt_br),
16571+
16572+ /* "add=%d:%s" or "ins=%d:%s" */
16573+ fsparam_string("add", Opt_add),
16574+ fsparam_string("ins", Opt_add),
16575+ fsparam_path("append", Opt_append),
16576+ fsparam_path("prepend", Opt_prepend),
16577+
16578+ fsparam_path("del", Opt_del),
16579+ /* fsparam_s32("idel", Opt_idel), */
16580+ fsparam_path("mod", Opt_mod),
16581+ /* fsparam_string("imod", Opt_imod), */
16582+
16583+ fsparam_s32("dirwh", Opt_dirwh),
16584+
16585+ fsparam_path("xino", Opt_xino),
16586+ fsparam_flag("noxino", Opt_noxino),
16587+ fsparam_flag_no("trunc_xino", Opt_trunc_xino),
16588+ /* "trunc_xino_v=%d:%d" */
16589+ /* fsparam_string("trunc_xino_v", Opt_trunc_xino_v), */
16590+ fsparam_path("trunc_xino", Opt_trunc_xino_path),
16591+ fsparam_s32("itrunc_xino", Opt_itrunc_xino),
16592+ /* fsparam_path("zxino", Opt_zxino), */
16593+ fsparam_flag_no("trunc_xib", Opt_trunc_xib),
16594+
16595+#ifdef CONFIG_PROC_FS
16596+ fsparam_flag_no("plink", Opt_plink),
16597+#else
16598+ au_ignore_flag("plink", Opt_ignore),
16599+#endif
16600+
16601+#ifdef CONFIG_AUFS_DEBUG
16602+ fsparam_flag("list_plink", Opt_list_plink),
16603+#endif
16604+
16605+ fsparam_string("udba", Opt_udba),
16606+
16607+ fsparam_flag_no("dio", Opt_dio),
16608+
16609+#ifdef CONFIG_AUFS_DIRREN
16610+ fsparam_flag_no("dirren", Opt_dirren),
16611+#else
16612+ au_ignore_flag("dirren", Opt_ignore),
16613+#endif
16614+
16615+#ifdef CONFIG_AUFS_FHSM
16616+ fsparam_s32("fhsm_sec", Opt_fhsm_sec),
16617+#else
16618+ fsparam_s32("fhsm_sec", Opt_ignore),
16619+#endif
16620+
16621+ /* always | a | whiteouted | w */
16622+ fsparam_string("diropq", Opt_diropq),
16623+
16624+ fsparam_flag_no("warn_perm", Opt_warn_perm),
16625+
16626+#ifdef CONFIG_AUFS_SHWH
16627+ fsparam_flag_no("shwh", Opt_shwh),
16628+#else
16629+ au_ignore_flag("shwh", Opt_err),
16630+#endif
16631+
16632+ fsparam_flag_no("dirperm1", Opt_dirperm1),
16633+
16634+ fsparam_flag_no("verbose", Opt_verbose),
16635+ fsparam_flag("v", Opt_verbose),
16636+ fsparam_flag("quiet", Opt_noverbose),
16637+ fsparam_flag("q", Opt_noverbose),
16638+ /* user-space may handle this */
16639+ fsparam_flag("silent", Opt_noverbose),
16640+
16641+ fsparam_flag_no("sum", Opt_sum),
16642+ fsparam_flag("wsum", Opt_wsum),
16643+
16644+ fsparam_s32("rdcache", Opt_rdcache),
16645+ /* "def" or s32 */
16646+ fsparam_string("rdblk", Opt_rdblk),
16647+ /* "def" or s32 */
16648+ fsparam_string("rdhash", Opt_rdhash),
16649+
16650+ fsparam_string("create", Opt_wbr_create),
16651+ fsparam_string("create_policy", Opt_wbr_create),
16652+ fsparam_string("cpup", Opt_wbr_copyup),
16653+ fsparam_string("copyup", Opt_wbr_copyup),
16654+ fsparam_string("copyup_policy", Opt_wbr_copyup),
16655+
16656+ /* generic VFS flag */
16657+#ifdef CONFIG_FS_POSIX_ACL
16658+ fsparam_flag_no("acl", Opt_acl),
16659+#else
16660+ au_ignore_flag("acl", Opt_ignore),
16661+#endif
16662+
16663+ /* internal use for the scripts */
16664+ fsparam_string("si", Opt_ignore_silent),
16665+
16666+ /* obsoleted, keep them temporary */
16667+ fsparam_flag("nodlgt", Opt_ignore_silent),
16668+ fsparam_flag("clean_plink", Opt_ignore),
16669+ fsparam_string("dirs", Opt_br),
16670+ fsparam_u32("debug", Opt_ignore),
16671+ /* "whiteout" or "all" */
16672+ fsparam_string("delete", Opt_ignore),
16673+ fsparam_string("imap", Opt_ignore),
16674+
16675+ /* temporary workaround, due to old mount(8)? */
16676+ fsparam_flag("relatime", Opt_ignore_silent),
16677+
16678+ {}
16679+};
16680+
16681+static int au_fsctx_parse_do_add(struct fs_context *fc, struct au_opt *opt,
16682+ char *brspec, size_t speclen,
16683+ aufs_bindex_t bindex)
16684+{
16685+ int err;
16686+ char *p;
16687+
16688+ AuDbg("brspec %s\n", brspec);
16689+
16690+ err = -ENOMEM;
16691+ if (!speclen)
16692+ speclen = strlen(brspec);
16693+ /* will be freed by au_fsctx_free() */
16694+ p = kmemdup_nul(brspec, speclen, GFP_NOFS);
16695+ if (unlikely(!p)) {
16696+ errorfc(fc, "failed in %s", brspec);
16697+ goto out;
16698+ }
16699+ err = au_opt_add(opt, p, fc->sb_flags, bindex);
16700+
16701+out:
16702+ AuTraceErr(err);
16703+ return err;
16704+}
16705+
16706+static int au_fsctx_parse_br(struct fs_context *fc, char *brspec)
16707+{
16708+ int err;
16709+ char *p;
16710+ struct au_fsctx_opts *a = fc->fs_private;
16711+ struct au_opt *opt = a->opt;
16712+ aufs_bindex_t bindex = a->bindex;
16713+
16714+ AuDbg("brspec %s\n", brspec);
16715+
16716+ err = -EINVAL;
16717+ while ((p = strsep(&brspec, ":")) && *p) {
16718+ err = au_fsctx_parse_do_add(fc, opt, p, /*len*/0, bindex);
16719+ AuTraceErr(err);
16720+ if (unlikely(err))
16721+ break;
16722+ bindex++;
16723+ opt++;
16724+ if (unlikely(opt > a->opt_tail)) {
16725+ err = -E2BIG;
16726+ bindex--;
16727+ opt--;
16728+ break;
16729+ }
16730+ opt->type = Opt_tail;
16731+ a->skipped = 1;
16732+ }
16733+ a->bindex = bindex;
16734+ a->opt = opt;
16735+
16736+ AuTraceErr(err);
16737+ return err;
16738+}
16739+
16740+static int au_fsctx_parse_add(struct fs_context *fc, char *addspec)
16741+{
16742+ int err, n;
16743+ char *p;
16744+ struct au_fsctx_opts *a = fc->fs_private;
16745+ struct au_opt *opt = a->opt;
16746+
16747+ err = -EINVAL;
16748+ p = strchr(addspec, ':');
16749+ if (unlikely(!p)) {
16750+ errorfc(fc, "bad arg in %s", addspec);
16751+ goto out;
16752+ }
16753+ *p++ = '\0';
16754+ err = kstrtoint(addspec, 0, &n);
16755+ if (unlikely(err)) {
16756+ errorfc(fc, "bad integer in %s", addspec);
16757+ goto out;
16758+ }
16759+ AuDbg("n %d\n", n);
16760+ err = au_fsctx_parse_do_add(fc, opt, p, /*len*/0, n);
16761+
16762+out:
16763+ AuTraceErr(err);
16764+ return err;
16765+}
16766+
16767+static int au_fsctx_parse_del(struct fs_context *fc, struct au_opt_del *del,
16768+ struct fs_parameter *param)
16769+{
16770+ int err;
16771+
16772+ err = -ENOMEM;
16773+ /* will be freed by au_fsctx_free() */
16774+ del->pathname = kmemdup_nul(param->string, param->size, GFP_NOFS);
16775+ if (unlikely(!del->pathname))
16776+ goto out;
16777+ AuDbg("del %s\n", del->pathname);
16778+ err = vfsub_kern_path(del->pathname, AuOpt_LkupDirFlags, &del->h_path);
16779+ if (unlikely(err))
16780+ errorfc(fc, "lookup failed %s (%d)", del->pathname, err);
16781+
16782+out:
16783+ AuTraceErr(err);
16784+ return err;
16785+}
16786+
16787+#if 0 /* reserved for future use */
16788+static int au_fsctx_parse_idel(struct fs_context *fc, struct au_opt_del *del,
16789+ aufs_bindex_t bindex)
16790+{
16791+ int err;
16792+ struct super_block *sb;
16793+ struct dentry *root;
16794+ struct au_fsctx_opts *a = fc->fs_private;
16795+
16796+ sb = a->sb;
16797+ AuDebugOn(!sb);
16798+
16799+ err = -EINVAL;
16800+ root = sb->s_root;
16801+ aufs_read_lock(root, AuLock_FLUSH);
16802+ if (bindex < 0 || au_sbbot(sb) < bindex) {
16803+ errorfc(fc, "out of bounds, %d", bindex);
16804+ goto out;
16805+ }
16806+
16807+ err = 0;
16808+ del->h_path.dentry = dget(au_h_dptr(root, bindex));
16809+ del->h_path.mnt = mntget(au_sbr_mnt(sb, bindex));
16810+
16811+out:
16812+ aufs_read_unlock(root, !AuLock_IR);
16813+ AuTraceErr(err);
16814+ return err;
16815+}
16816+#endif
16817+
16818+static int au_fsctx_parse_mod(struct fs_context *fc, struct au_opt_mod *mod,
16819+ struct fs_parameter *param)
16820+{
16821+ int err;
16822+ struct path path;
16823+ char *p;
16824+
16825+ err = -ENOMEM;
16826+ /* will be freed by au_fsctx_free() */
16827+ mod->path = kmemdup_nul(param->string, param->size, GFP_NOFS);
16828+ if (unlikely(!mod->path))
16829+ goto out;
16830+
16831+ err = -EINVAL;
16832+ p = strchr(mod->path, '=');
16833+ if (unlikely(!p)) {
16834+ errorfc(fc, "no permission %s", mod->path);
16835+ goto out;
16836+ }
16837+
16838+ *p++ = 0;
16839+ err = vfsub_kern_path(mod->path, AuOpt_LkupDirFlags, &path);
16840+ if (unlikely(err)) {
16841+ errorfc(fc, "lookup failed %s (%d)", mod->path, err);
16842+ goto out;
16843+ }
16844+
16845+ mod->perm = au_br_perm_val(p);
16846+ AuDbg("mod path %s, perm 0x%x, %s", mod->path, mod->perm, p);
16847+ mod->h_root = dget(path.dentry);
16848+ path_put(&path);
16849+
16850+out:
16851+ AuTraceErr(err);
16852+ return err;
16853+}
16854+
16855+#if 0 /* reserved for future use */
16856+static int au_fsctx_parse_imod(struct fs_context *fc, struct au_opt_mod *mod,
16857+ char *ibrspec)
16858+{
16859+ int err, n;
16860+ char *p;
16861+ struct super_block *sb;
16862+ struct dentry *root;
16863+ struct au_fsctx_opts *a = fc->fs_private;
16864+
16865+ sb = a->sb;
16866+ AuDebugOn(!sb);
16867+
16868+ err = -EINVAL;
16869+ p = strchr(ibrspec, ':');
16870+ if (unlikely(!p)) {
16871+ errorfc(fc, "no index, %s", ibrspec);
16872+ goto out;
16873+ }
16874+ *p++ = '\0';
16875+ err = kstrtoint(ibrspec, 0, &n);
16876+ if (unlikely(err)) {
16877+ errorfc(fc, "bad integer in %s", ibrspec);
16878+ goto out;
16879+ }
16880+ AuDbg("n %d\n", n);
16881+
16882+ root = sb->s_root;
16883+ aufs_read_lock(root, AuLock_FLUSH);
16884+ if (n < 0 || au_sbbot(sb) < n) {
16885+ errorfc(fc, "out of bounds, %d", bindex);
16886+ goto out_root;
16887+ }
16888+
16889+ err = 0;
16890+ mod->perm = au_br_perm_val(p);
16891+ AuDbg("mod path %s, perm 0x%x, %s\n",
16892+ mod->path, mod->perm, p);
16893+ mod->h_root = dget(au_h_dptr(root, bindex));
16894+
16895+out_root:
16896+ aufs_read_unlock(root, !AuLock_IR);
16897+out:
16898+ AuTraceErr(err);
16899+ return err;
16900+}
16901+#endif
16902+
16903+static int au_fsctx_parse_xino(struct fs_context *fc,
16904+ struct au_opt_xino *xino,
16905+ struct fs_parameter *param)
16906+{
16907+ int err;
16908+ struct au_fsctx_opts *a = fc->fs_private;
16909+
16910+ err = -ENOMEM;
16911+ /* will be freed by au_opts_free() */
16912+ xino->path = kmemdup_nul(param->string, param->size, GFP_NOFS);
16913+ if (unlikely(!xino->path))
16914+ goto out;
16915+ AuDbg("path %s\n", xino->path);
16916+
16917+ xino->file = au_xino_create(a->sb, xino->path, /*silent*/0,
16918+ /*wbrtop*/0);
16919+ err = PTR_ERR(xino->file);
16920+ if (IS_ERR(xino->file)) {
16921+ xino->file = NULL;
16922+ goto out;
16923+ }
16924+
16925+ err = 0;
16926+ if (unlikely(a->sb && xino->file->f_path.dentry->d_sb == a->sb)) {
16927+ err = -EINVAL;
16928+ errorfc(fc, "%s must be outside", xino->path);
16929+ }
16930+
16931+out:
16932+ AuTraceErr(err);
16933+ return err;
16934+}
16935+
16936+static
16937+int au_fsctx_parse_xino_itrunc_path(struct fs_context *fc,
16938+ struct au_opt_xino_itrunc *xino_itrunc,
16939+ char *pathname)
16940+{
16941+ int err;
16942+ aufs_bindex_t bbot, bindex;
16943+ struct path path;
16944+ struct dentry *root;
16945+ struct au_fsctx_opts *a = fc->fs_private;
16946+
16947+ AuDebugOn(!a->sb);
16948+
16949+ err = vfsub_kern_path(pathname, AuOpt_LkupDirFlags, &path);
16950+ if (unlikely(err)) {
16951+ errorfc(fc, "lookup failed %s (%d)", pathname, err);
16952+ goto out;
16953+ }
16954+
16955+ xino_itrunc->bindex = -1;
16956+ root = a->sb->s_root;
16957+ aufs_read_lock(root, AuLock_FLUSH);
16958+ bbot = au_sbbot(a->sb);
16959+ for (bindex = 0; bindex <= bbot; bindex++) {
16960+ if (au_h_dptr(root, bindex) == path.dentry) {
16961+ xino_itrunc->bindex = bindex;
16962+ break;
16963+ }
16964+ }
16965+ aufs_read_unlock(root, !AuLock_IR);
16966+ path_put(&path);
16967+
16968+ if (unlikely(xino_itrunc->bindex < 0)) {
16969+ err = -EINVAL;
16970+ errorfc(fc, "no such branch %s", pathname);
16971+ }
16972+
16973+out:
16974+ AuTraceErr(err);
16975+ return err;
16976+}
16977+
16978+static int au_fsctx_parse_xino_itrunc(struct fs_context *fc,
16979+ struct au_opt_xino_itrunc *xino_itrunc,
16980+ unsigned int bindex)
16981+{
16982+ int err;
16983+ aufs_bindex_t bbot;
16984+ struct super_block *sb;
16985+ struct au_fsctx_opts *a = fc->fs_private;
16986+
16987+ sb = a->sb;
16988+ AuDebugOn(!sb);
16989+
16990+ err = 0;
16991+ si_noflush_read_lock(sb);
16992+ bbot = au_sbbot(sb);
16993+ si_read_unlock(sb);
16994+ if (bindex <= bbot)
16995+ xino_itrunc->bindex = bindex;
16996+ else {
16997+ err = -EINVAL;
16998+ errorfc(fc, "out of bounds, %u", bindex);
16999+ }
17000+
17001+ AuTraceErr(err);
17002+ return err;
17003+}
17004+
17005+static int au_fsctx_parse_param(struct fs_context *fc, struct fs_parameter *param)
17006+{
17007+ int err, token;
17008+ struct fs_parse_result result;
17009+ struct au_fsctx_opts *a = fc->fs_private;
17010+ struct au_opt *opt = a->opt;
17011+
17012+ AuDbg("fc %p, param {key %s, string %s}\n",
17013+ fc, param->key, param->string);
17014+ err = fs_parse(fc, aufs_fsctx_paramspec, param, &result);
17015+ if (unlikely(err < 0))
17016+ goto out;
17017+ token = err;
17018+ AuDbg("token %d, res{negated %d, uint64 %llu}\n",
17019+ token, result.negated, result.uint_64);
17020+
17021+ err = -EINVAL;
17022+ a->skipped = 0;
17023+ switch (token) {
17024+ case Opt_br:
17025+ err = au_fsctx_parse_br(fc, param->string);
17026+ break;
17027+ case Opt_add:
17028+ err = au_fsctx_parse_add(fc, param->string);
17029+ break;
17030+ case Opt_append:
17031+ err = au_fsctx_parse_do_add(fc, opt, param->string, param->size,
17032+ /*dummy bindex*/1);
17033+ break;
17034+ case Opt_prepend:
17035+ err = au_fsctx_parse_do_add(fc, opt, param->string, param->size,
17036+ /*bindex*/0);
17037+ break;
17038+
17039+ case Opt_del:
17040+ err = au_fsctx_parse_del(fc, &opt->del, param);
17041+ break;
17042+#if 0 /* reserved for future use */
17043+ case Opt_idel:
17044+ if (!a->sb) {
17045+ err = 0;
17046+ a->skipped = 1;
17047+ break;
17048+ }
17049+ del->pathname = "(indexed)";
17050+ err = au_opts_parse_idel(fc, &opt->del, result.uint_32);
17051+ break;
17052+#endif
17053+
17054+ case Opt_mod:
17055+ err = au_fsctx_parse_mod(fc, &opt->mod, param);
17056+ break;
17057+#ifdef IMOD /* reserved for future use */
17058+ case Opt_imod:
17059+ if (!a->sb) {
17060+ err = 0;
17061+ a->skipped = 1;
17062+ break;
17063+ }
17064+ u.mod->path = "(indexed)";
17065+ err = au_opts_parse_imod(fc, &opt->mod, param->string);
17066+ break;
17067+#endif
17068+
17069+ case Opt_xino:
17070+ err = au_fsctx_parse_xino(fc, &opt->xino, param);
17071+ break;
17072+ case Opt_trunc_xino_path:
17073+ if (!a->sb) {
17074+ errorfc(fc, "no such branch %s", param->string);
17075+ break;
17076+ }
17077+ err = au_fsctx_parse_xino_itrunc_path(fc, &opt->xino_itrunc,
17078+ param->string);
17079+ break;
17080+#if 0
17081+ case Opt_trunc_xino_v:
17082+ if (!a->sb) {
17083+ err = 0;
17084+ a->skipped = 1;
17085+ break;
17086+ }
17087+ err = au_fsctx_parse_xino_itrunc_path(fc, &opt->xino_itrunc,
17088+ param->string);
17089+ break;
17090+#endif
17091+ case Opt_itrunc_xino:
17092+ if (!a->sb) {
17093+ errorfc(fc, "out of bounds %s", param->string);
17094+ break;
17095+ }
17096+ err = au_fsctx_parse_xino_itrunc(fc, &opt->xino_itrunc,
17097+ result.int_32);
17098+ break;
17099+
17100+ case Opt_dirwh:
17101+ err = 0;
17102+ opt->dirwh = result.int_32;
17103+ break;
17104+
17105+ case Opt_rdcache:
17106+ if (unlikely(result.int_32 > AUFS_RDCACHE_MAX)) {
17107+ errorfc(fc, "rdcache must be smaller than %d",
17108+ AUFS_RDCACHE_MAX);
17109+ break;
17110+ }
17111+ err = 0;
17112+ opt->rdcache = result.int_32;
17113+ break;
17114+
17115+ case Opt_rdblk:
17116+ err = 0;
17117+ opt->rdblk = AUFS_RDBLK_DEF;
17118+ if (!strcmp(param->string, "def"))
17119+ break;
17120+
17121+ err = kstrtoint(param->string, 0, &result.int_32);
17122+ if (unlikely(err)) {
17123+ errorfc(fc, "bad value in %s", param->key);
17124+ break;
17125+ }
17126+ err = -EINVAL;
17127+ if (unlikely(result.int_32 < 0
17128+ || result.int_32 > KMALLOC_MAX_SIZE)) {
17129+ errorfc(fc, "bad value in %s", param->key);
17130+ break;
17131+ }
17132+ if (unlikely(result.int_32 && result.int_32 < NAME_MAX)) {
17133+ errorfc(fc, "rdblk must be larger than %d", NAME_MAX);
17134+ break;
17135+ }
17136+ err = 0;
17137+ opt->rdblk = result.int_32;
17138+ break;
17139+
17140+ case Opt_rdhash:
17141+ err = 0;
17142+ opt->rdhash = AUFS_RDHASH_DEF;
17143+ if (!strcmp(param->string, "def"))
17144+ break;
17145+
17146+ err = kstrtoint(param->string, 0, &result.int_32);
17147+ if (unlikely(err)) {
17148+ errorfc(fc, "bad value in %s", param->key);
17149+ break;
17150+ }
17151+ /* how about zero? */
17152+ if (result.int_32 < 0
17153+ || result.int_32 * sizeof(struct hlist_head)
17154+ > KMALLOC_MAX_SIZE) {
17155+ err = -EINVAL;
17156+ errorfc(fc, "bad integer in %s", param->key);
17157+ break;
17158+ }
17159+ opt->rdhash = result.int_32;
17160+ break;
17161+
17162+ case Opt_diropq:
17163+ /*
17164+ * As other options, fs/aufs/opts.c can handle these strings by
17165+ * match_token(). But "diropq=" is deprecated now and will
17166+ * never have other value. So simple strcmp() is enough here.
17167+ */
17168+ if (!strcmp(param->string, "a") ||
17169+ !strcmp(param->string, "always")) {
17170+ err = 0;
17171+ opt->type = Opt_diropq_a;
17172+ } else if (!strcmp(param->string, "w") ||
17173+ !strcmp(param->string, "whiteouted")) {
17174+ err = 0;
17175+ opt->type = Opt_diropq_w;
17176+ } else
17177+ errorfc(fc, "unknown value %s", param->string);
17178+ break;
17179+
17180+ case Opt_udba:
17181+ opt->udba = au_udba_val(param->string);
17182+ if (opt->udba >= 0)
17183+ err = 0;
17184+ else
17185+ errorf(fc, "wrong value, %s", param->string);
17186+ break;
17187+
17188+ case Opt_wbr_create:
17189+ opt->wbr_create.wbr_create
17190+ = au_wbr_create_val(param->string, &opt->wbr_create);
17191+ if (opt->wbr_create.wbr_create >= 0)
17192+ err = 0;
17193+ else
17194+ errorf(fc, "wrong value, %s", param->key);
17195+ break;
17196+
17197+ case Opt_wbr_copyup:
17198+ opt->wbr_copyup = au_wbr_copyup_val(param->string);
17199+ if (opt->wbr_copyup >= 0)
17200+ err = 0;
17201+ else
17202+ errorfc(fc, "wrong value, %s", param->key);
17203+ break;
17204+
17205+ case Opt_fhsm_sec:
17206+ if (unlikely(result.int_32 < 0)) {
17207+ errorfc(fc, "bad integer in %s\n", param->key);
17208+ break;
17209+ }
17210+ err = 0;
17211+ if (sysaufs_brs)
17212+ opt->fhsm_second = result.int_32;
17213+ else
17214+ warnfc(fc, "ignored %s %s", param->key, param->string);
17215+ break;
17216+
17217+ /* simple true/false flag */
17218+#define au_fsctx_TF(name) \
17219+ case Opt_##name: \
17220+ err = 0; \
17221+ opt->tf = !result.negated; \
17222+ break
17223+ au_fsctx_TF(trunc_xino);
17224+ au_fsctx_TF(trunc_xib);
17225+ au_fsctx_TF(dirperm1);
17226+ au_fsctx_TF(plink);
17227+ au_fsctx_TF(shwh);
17228+ au_fsctx_TF(dio);
17229+ au_fsctx_TF(warn_perm);
17230+ au_fsctx_TF(verbose);
17231+ au_fsctx_TF(sum);
17232+ au_fsctx_TF(dirren);
17233+ au_fsctx_TF(acl);
17234+#undef au_fsctx_TF
17235+
17236+ case Opt_noverbose:
17237+ err = 0;
17238+ opt->type = Opt_verbose;
17239+ opt->tf = false;
17240+ break;
17241+
17242+ case Opt_noxino:
17243+ fallthrough;
17244+ case Opt_list_plink:
17245+ fallthrough;
17246+ case Opt_wsum:
17247+ err = 0;
17248+ break;
17249+
17250+ case Opt_ignore:
17251+ warnfc(fc, "ignored %s", param->key);
17252+ fallthrough;
17253+ case Opt_ignore_silent:
17254+ a->skipped = 1;
17255+ err = 0;
17256+ break;
17257+ default:
17258+ a->skipped = 1;
17259+ err = -ENOPARAM;
17260+ break;
17261+ }
17262+ if (unlikely(err))
17263+ goto out;
17264+ if (a->skipped)
17265+ goto out;
17266+
17267+ switch (token) {
17268+ case Opt_br:
17269+ fallthrough;
17270+ case Opt_noverbose:
17271+ fallthrough;
17272+ case Opt_diropq:
17273+ break;
17274+ default:
17275+ opt->type = token;
17276+ break;
17277+ }
17278+ opt++;
17279+ if (unlikely(opt > a->opt_tail)) {
17280+ err = -E2BIG;
17281+ opt--;
17282+ }
17283+ opt->type = Opt_tail;
17284+ a->opt = opt;
17285+
17286+out:
17287+ return err;
17288+}
17289+
17290+/*
17291+ * these options accept both 'name=val' and 'name:val' form.
17292+ * some accept optional '=' in its value.
17293+ * eg. br:/br1=rw:/br2=ro and br=/br1=rw:/br2=ro
17294+ */
17295+static inline unsigned int is_colonopt(char *str)
17296+{
17297+#define do_test(name) \
17298+ if (!strncmp(str, name ":", sizeof(name))) \
17299+ return sizeof(name) - 1
17300+ do_test("br");
17301+ do_test("add");
17302+ do_test("ins");
17303+ do_test("append");
17304+ do_test("prepend");
17305+ do_test("del");
17306+ /* do_test("idel"); */
17307+ do_test("mod");
17308+ /* do_test("imod"); */
17309+#undef do_test
17310+
17311+ return 0;
17312+}
17313+
17314+static int au_fsctx_parse_monolithic(struct fs_context *fc, void *data)
17315+{
17316+ int err;
17317+ unsigned int u;
17318+ char *str;
17319+ struct au_fsctx_opts *a = fc->fs_private;
17320+
17321+ str = data;
17322+ AuDbg("str %s\n", str);
17323+ while (str) {
17324+ u = is_colonopt(str);
17325+ if (u)
17326+ str[u] = '=';
17327+ str = strchr(str, ',');
17328+ if (!str)
17329+ break;
17330+ str++;
17331+ }
17332+ str = data;
17333+ AuDbg("str %s\n", str);
17334+
17335+ err = generic_parse_monolithic(fc, str);
17336+ AuTraceErr(err);
17337+ au_fsctx_dump(&a->opts);
17338+
17339+ return err;
17340+}
17341+
17342+/* ---------------------------------------------------------------------- */
17343+
17344+static void au_fsctx_opts_free(struct au_opts *opts)
17345+{
17346+ struct au_opt *opt;
17347+
17348+ opt = opts->opt;
17349+ while (opt->type != Opt_tail) {
17350+ switch (opt->type) {
17351+ case Opt_add:
17352+ fallthrough;
17353+ case Opt_append:
17354+ fallthrough;
17355+ case Opt_prepend:
17356+ kfree(opt->add.pathname);
17357+ path_put(&opt->add.path);
17358+ break;
17359+ case Opt_del:
17360+ kfree(opt->del.pathname);
17361+ fallthrough;
17362+ case Opt_idel:
17363+ path_put(&opt->del.h_path);
17364+ break;
17365+ case Opt_mod:
17366+ kfree(opt->mod.path);
17367+ fallthrough;
17368+ case Opt_imod:
17369+ dput(opt->mod.h_root);
17370+ break;
17371+ case Opt_xino:
17372+ kfree(opt->xino.path);
17373+ fput(opt->xino.file);
17374+ break;
17375+ }
17376+ opt++;
17377+ }
17378+}
17379+
17380+static void au_fsctx_free(struct fs_context *fc)
17381+{
17382+ struct au_fsctx_opts *a = fc->fs_private;
17383+
17384+ /* fs_type=%p, root=%pD */
17385+ AuDbg("fc %p{sb_flags 0x%x, sb_flags_mask 0x%x, purpose %u\n",
17386+ fc, fc->sb_flags, fc->sb_flags_mask, fc->purpose);
17387+
17388+ kobject_put(&a->sbinfo->si_kobj);
17389+ au_fsctx_opts_free(&a->opts);
17390+ free_page((unsigned long)a->opts.opt);
17391+ au_kfree_rcu(a);
17392+}
17393+
17394+static const struct fs_context_operations au_fsctx_ops = {
17395+ .free = au_fsctx_free,
17396+ .parse_param = au_fsctx_parse_param,
17397+ .parse_monolithic = au_fsctx_parse_monolithic,
17398+ .get_tree = au_fsctx_get_tree,
17399+ .reconfigure = au_fsctx_reconfigure
17400+ /*
17401+ * nfs4 requires ->dup()? No.
17402+ * I don't know what is this ->dup() for.
17403+ */
17404+};
17405+
17406+int aufs_fsctx_init(struct fs_context *fc)
17407+{
17408+ int err;
17409+ struct au_fsctx_opts *a;
17410+
17411+ /* fs_type=%p, root=%pD */
17412+ AuDbg("fc %p{sb_flags 0x%x, sb_flags_mask 0x%x, purpose %u\n",
17413+ fc, fc->sb_flags, fc->sb_flags_mask, fc->purpose);
17414+
17415+ /* they will be freed by au_fsctx_free() */
17416+ err = -ENOMEM;
17417+ a = kzalloc(sizeof(*a), GFP_NOFS);
17418+ if (unlikely(!a))
17419+ goto out;
17420+ a->bindex = 0;
17421+ a->opts.opt = (void *)__get_free_page(GFP_NOFS);
17422+ if (unlikely(!a->opts.opt))
17423+ goto out_a;
17424+ a->opt = a->opts.opt;
17425+ a->opt->type = Opt_tail;
17426+ a->opts.max_opt = PAGE_SIZE / sizeof(*a->opts.opt);
17427+ a->opt_tail = a->opt + a->opts.max_opt - 1;
17428+ a->opts.sb_flags = fc->sb_flags;
17429+
17430+ a->sb = NULL;
17431+ if (fc->root) {
17432+ AuDebugOn(fc->purpose != FS_CONTEXT_FOR_RECONFIGURE);
17433+ a->opts.flags = AuOpts_REMOUNT;
17434+ a->sb = fc->root->d_sb;
17435+ a->sbinfo = au_sbi(a->sb);
17436+ kobject_get(&a->sbinfo->si_kobj);
17437+ } else {
17438+ a->sbinfo = au_si_alloc(a->sb);
17439+ AuDebugOn(!a->sbinfo);
17440+ err = PTR_ERR(a->sbinfo);
17441+ if (IS_ERR(a->sbinfo))
17442+ goto out_opt;
17443+ au_rw_write_unlock(&a->sbinfo->si_rwsem);
17444+ }
17445+
17446+ err = 0;
17447+ fc->fs_private = a;
17448+ fc->ops = &au_fsctx_ops;
17449+ goto out; /* success */
17450+
17451+out_opt:
17452+ free_page((unsigned long)a->opts.opt);
17453+out_a:
17454+ au_kfree_rcu(a);
17455+out:
17456+ AuTraceErr(err);
17457+ return err;
17458+}
17459diff -urN /usr/share/empty/fs/aufs/fstype.h linux/fs/aufs/fstype.h
17460--- /usr/share/empty/fs/aufs/fstype.h 1970-01-01 01:00:00.000000000 +0100
17461+++ linux/fs/aufs/fstype.h 2022-11-05 23:02:18.965889284 +0100
17462@@ -0,0 +1,401 @@
17463+/* SPDX-License-Identifier: GPL-2.0 */
17464+/*
17465+ * Copyright (C) 2005-2022 Junjiro R. Okajima
17466+ *
17467+ * This program is free software; you can redistribute it and/or modify
17468+ * it under the terms of the GNU General Public License as published by
17469+ * the Free Software Foundation; either version 2 of the License, or
17470+ * (at your option) any later version.
17471+ *
17472+ * This program is distributed in the hope that it will be useful,
17473+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17474+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17475+ * GNU General Public License for more details.
17476+ *
17477+ * You should have received a copy of the GNU General Public License
17478+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17479+ */
17480+
17481+/*
17482+ * judging filesystem type
17483+ */
17484+
17485+#ifndef __AUFS_FSTYPE_H__
17486+#define __AUFS_FSTYPE_H__
17487+
17488+#ifdef __KERNEL__
17489+
17490+#include <linux/fs.h>
17491+#include <linux/magic.h>
17492+#include <linux/nfs_fs.h>
17493+#include <linux/romfs_fs.h>
17494+
17495+static inline int au_test_aufs(struct super_block *sb)
17496+{
17497+ return sb->s_magic == AUFS_SUPER_MAGIC;
17498+}
17499+
17500+static inline const char *au_sbtype(struct super_block *sb)
17501+{
17502+ return sb->s_type->name;
17503+}
17504+
17505+static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
17506+{
17507+#if IS_ENABLED(CONFIG_ISO9660_FS)
17508+ return sb->s_magic == ISOFS_SUPER_MAGIC;
17509+#else
17510+ return 0;
17511+#endif
17512+}
17513+
17514+static inline int au_test_romfs(struct super_block *sb __maybe_unused)
17515+{
17516+#if IS_ENABLED(CONFIG_ROMFS_FS)
17517+ return sb->s_magic == ROMFS_MAGIC;
17518+#else
17519+ return 0;
17520+#endif
17521+}
17522+
17523+static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
17524+{
17525+#if IS_ENABLED(CONFIG_CRAMFS)
17526+ return sb->s_magic == CRAMFS_MAGIC;
17527+#endif
17528+ return 0;
17529+}
17530+
17531+static inline int au_test_nfs(struct super_block *sb __maybe_unused)
17532+{
17533+#if IS_ENABLED(CONFIG_NFS_FS)
17534+ return sb->s_magic == NFS_SUPER_MAGIC;
17535+#else
17536+ return 0;
17537+#endif
17538+}
17539+
17540+static inline int au_test_fuse(struct super_block *sb __maybe_unused)
17541+{
17542+#if IS_ENABLED(CONFIG_FUSE_FS)
17543+ return sb->s_magic == FUSE_SUPER_MAGIC;
17544+#else
17545+ return 0;
17546+#endif
17547+}
17548+
17549+static inline int au_test_xfs(struct super_block *sb __maybe_unused)
17550+{
17551+#if IS_ENABLED(CONFIG_XFS_FS)
17552+ return sb->s_magic == XFS_SB_MAGIC;
17553+#else
17554+ return 0;
17555+#endif
17556+}
17557+
17558+static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
17559+{
17560+#ifdef CONFIG_TMPFS
17561+ return sb->s_magic == TMPFS_MAGIC;
17562+#else
17563+ return 0;
17564+#endif
17565+}
17566+
17567+static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
17568+{
17569+#if IS_ENABLED(CONFIG_ECRYPT_FS)
17570+ return !strcmp(au_sbtype(sb), "ecryptfs");
17571+#else
17572+ return 0;
17573+#endif
17574+}
17575+
17576+static inline int au_test_ramfs(struct super_block *sb)
17577+{
17578+ return sb->s_magic == RAMFS_MAGIC;
17579+}
17580+
17581+static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
17582+{
17583+#if IS_ENABLED(CONFIG_UBIFS_FS)
17584+ return sb->s_magic == UBIFS_SUPER_MAGIC;
17585+#else
17586+ return 0;
17587+#endif
17588+}
17589+
17590+static inline int au_test_procfs(struct super_block *sb __maybe_unused)
17591+{
17592+#ifdef CONFIG_PROC_FS
17593+ return sb->s_magic == PROC_SUPER_MAGIC;
17594+#else
17595+ return 0;
17596+#endif
17597+}
17598+
17599+static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
17600+{
17601+#ifdef CONFIG_SYSFS
17602+ return sb->s_magic == SYSFS_MAGIC;
17603+#else
17604+ return 0;
17605+#endif
17606+}
17607+
17608+static inline int au_test_configfs(struct super_block *sb __maybe_unused)
17609+{
17610+#if IS_ENABLED(CONFIG_CONFIGFS_FS)
17611+ return sb->s_magic == CONFIGFS_MAGIC;
17612+#else
17613+ return 0;
17614+#endif
17615+}
17616+
17617+static inline int au_test_minix(struct super_block *sb __maybe_unused)
17618+{
17619+#if IS_ENABLED(CONFIG_MINIX_FS)
17620+ return sb->s_magic == MINIX3_SUPER_MAGIC
17621+ || sb->s_magic == MINIX2_SUPER_MAGIC
17622+ || sb->s_magic == MINIX2_SUPER_MAGIC2
17623+ || sb->s_magic == MINIX_SUPER_MAGIC
17624+ || sb->s_magic == MINIX_SUPER_MAGIC2;
17625+#else
17626+ return 0;
17627+#endif
17628+}
17629+
17630+static inline int au_test_fat(struct super_block *sb __maybe_unused)
17631+{
17632+#if IS_ENABLED(CONFIG_FAT_FS)
17633+ return sb->s_magic == MSDOS_SUPER_MAGIC;
17634+#else
17635+ return 0;
17636+#endif
17637+}
17638+
17639+static inline int au_test_msdos(struct super_block *sb)
17640+{
17641+ return au_test_fat(sb);
17642+}
17643+
17644+static inline int au_test_vfat(struct super_block *sb)
17645+{
17646+ return au_test_fat(sb);
17647+}
17648+
17649+static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
17650+{
17651+#ifdef CONFIG_SECURITYFS
17652+ return sb->s_magic == SECURITYFS_MAGIC;
17653+#else
17654+ return 0;
17655+#endif
17656+}
17657+
17658+static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
17659+{
17660+#if IS_ENABLED(CONFIG_SQUASHFS)
17661+ return sb->s_magic == SQUASHFS_MAGIC;
17662+#else
17663+ return 0;
17664+#endif
17665+}
17666+
17667+static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
17668+{
17669+#if IS_ENABLED(CONFIG_BTRFS_FS)
17670+ return sb->s_magic == BTRFS_SUPER_MAGIC;
17671+#else
17672+ return 0;
17673+#endif
17674+}
17675+
17676+static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
17677+{
17678+#if IS_ENABLED(CONFIG_XENFS)
17679+ return sb->s_magic == XENFS_SUPER_MAGIC;
17680+#else
17681+ return 0;
17682+#endif
17683+}
17684+
17685+static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
17686+{
17687+#ifdef CONFIG_DEBUG_FS
17688+ return sb->s_magic == DEBUGFS_MAGIC;
17689+#else
17690+ return 0;
17691+#endif
17692+}
17693+
17694+static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
17695+{
17696+#if IS_ENABLED(CONFIG_NILFS)
17697+ return sb->s_magic == NILFS_SUPER_MAGIC;
17698+#else
17699+ return 0;
17700+#endif
17701+}
17702+
17703+static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
17704+{
17705+#if IS_ENABLED(CONFIG_HFSPLUS_FS)
17706+ return sb->s_magic == HFSPLUS_SUPER_MAGIC;
17707+#else
17708+ return 0;
17709+#endif
17710+}
17711+
17712+/* ---------------------------------------------------------------------- */
17713+/*
17714+ * they can't be an aufs branch.
17715+ */
17716+static inline int au_test_fs_unsuppoted(struct super_block *sb)
17717+{
17718+ return
17719+#ifndef CONFIG_AUFS_BR_RAMFS
17720+ au_test_ramfs(sb) ||
17721+#endif
17722+ au_test_procfs(sb)
17723+ || au_test_sysfs(sb)
17724+ || au_test_configfs(sb)
17725+ || au_test_debugfs(sb)
17726+ || au_test_securityfs(sb)
17727+ || au_test_xenfs(sb)
17728+ || au_test_ecryptfs(sb)
17729+ /* || !strcmp(au_sbtype(sb), "unionfs") */
17730+ || au_test_aufs(sb); /* will be supported in next version */
17731+}
17732+
17733+static inline int au_test_fs_remote(struct super_block *sb)
17734+{
17735+ return !au_test_tmpfs(sb)
17736+#ifdef CONFIG_AUFS_BR_RAMFS
17737+ && !au_test_ramfs(sb)
17738+#endif
17739+ && !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
17740+}
17741+
17742+/* ---------------------------------------------------------------------- */
17743+
17744+/*
17745+ * Note: these functions (below) are created after reading ->getattr() in all
17746+ * filesystems under linux/fs. it means we have to do so in every update...
17747+ */
17748+
17749+/*
17750+ * some filesystems require getattr to refresh the inode attributes before
17751+ * referencing.
17752+ * in most cases, we can rely on the inode attribute in NFS (or every remote fs)
17753+ * and leave the work for d_revalidate()
17754+ */
17755+static inline int au_test_fs_refresh_iattr(struct super_block *sb)
17756+{
17757+ return au_test_nfs(sb)
17758+ || au_test_fuse(sb)
17759+ /* || au_test_btrfs(sb) */ /* untested */
17760+ ;
17761+}
17762+
17763+/*
17764+ * filesystems which don't maintain i_size or i_blocks.
17765+ */
17766+static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
17767+{
17768+ return au_test_xfs(sb)
17769+ || au_test_btrfs(sb)
17770+ || au_test_ubifs(sb)
17771+ || au_test_hfsplus(sb) /* maintained, but incorrect */
17772+ /* || au_test_minix(sb) */ /* untested */
17773+ ;
17774+}
17775+
17776+/*
17777+ * filesystems which don't store the correct value in some of their inode
17778+ * attributes.
17779+ */
17780+static inline int au_test_fs_bad_iattr(struct super_block *sb)
17781+{
17782+ return au_test_fs_bad_iattr_size(sb)
17783+ || au_test_fat(sb)
17784+ || au_test_msdos(sb)
17785+ || au_test_vfat(sb);
17786+}
17787+
17788+/* they don't check i_nlink in link(2) */
17789+static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
17790+{
17791+ return au_test_tmpfs(sb)
17792+#ifdef CONFIG_AUFS_BR_RAMFS
17793+ || au_test_ramfs(sb)
17794+#endif
17795+ || au_test_ubifs(sb)
17796+ || au_test_hfsplus(sb);
17797+}
17798+
17799+/*
17800+ * filesystems which sets S_NOATIME and S_NOCMTIME.
17801+ */
17802+static inline int au_test_fs_notime(struct super_block *sb)
17803+{
17804+ return au_test_nfs(sb)
17805+ || au_test_fuse(sb)
17806+ || au_test_ubifs(sb)
17807+ ;
17808+}
17809+
17810+/* temporary support for i#1 in cramfs */
17811+static inline int au_test_fs_unique_ino(struct inode *inode)
17812+{
17813+ if (au_test_cramfs(inode->i_sb))
17814+ return inode->i_ino != 1;
17815+ return 1;
17816+}
17817+
17818+/* ---------------------------------------------------------------------- */
17819+
17820+/*
17821+ * the filesystem where the xino files placed must support i/o after unlink and
17822+ * maintain i_size and i_blocks.
17823+ */
17824+static inline int au_test_fs_bad_xino(struct super_block *sb)
17825+{
17826+ return au_test_fs_remote(sb)
17827+ || au_test_fs_bad_iattr_size(sb)
17828+ /* don't want unnecessary work for xino */
17829+ || au_test_aufs(sb)
17830+ || au_test_ecryptfs(sb)
17831+ || au_test_nilfs(sb);
17832+}
17833+
17834+static inline int au_test_fs_trunc_xino(struct super_block *sb)
17835+{
17836+ return au_test_tmpfs(sb)
17837+ || au_test_ramfs(sb);
17838+}
17839+
17840+/*
17841+ * test if the @sb is real-readonly.
17842+ */
17843+static inline int au_test_fs_rr(struct super_block *sb)
17844+{
17845+ return au_test_squashfs(sb)
17846+ || au_test_iso9660(sb)
17847+ || au_test_cramfs(sb)
17848+ || au_test_romfs(sb);
17849+}
17850+
17851+/*
17852+ * test if the @inode is nfs with 'noacl' option
17853+ * NFS always sets SB_POSIXACL regardless its mount option 'noacl.'
17854+ */
17855+static inline int au_test_nfs_noacl(struct inode *inode)
17856+{
17857+ return au_test_nfs(inode->i_sb)
17858+ /* && IS_POSIXACL(inode) */
17859+ && !nfs_server_capable(inode, NFS_CAP_ACLS);
17860+}
17861+
17862+#endif /* __KERNEL__ */
17863+#endif /* __AUFS_FSTYPE_H__ */
17864diff -urN /usr/share/empty/fs/aufs/hbl.h linux/fs/aufs/hbl.h
17865--- /usr/share/empty/fs/aufs/hbl.h 1970-01-01 01:00:00.000000000 +0100
17866+++ linux/fs/aufs/hbl.h 2022-11-05 23:02:18.965889284 +0100
17867@@ -0,0 +1,65 @@
17868+/* SPDX-License-Identifier: GPL-2.0 */
17869+/*
17870+ * Copyright (C) 2017-2022 Junjiro R. Okajima
17871+ *
17872+ * This program is free software; you can redistribute it and/or modify
17873+ * it under the terms of the GNU General Public License as published by
17874+ * the Free Software Foundation; either version 2 of the License, or
17875+ * (at your option) any later version.
17876+ *
17877+ * This program is distributed in the hope that it will be useful,
17878+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17879+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17880+ * GNU General Public License for more details.
17881+ *
17882+ * You should have received a copy of the GNU General Public License
17883+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17884+ */
17885+
17886+/*
17887+ * helpers for hlist_bl.h
17888+ */
17889+
17890+#ifndef __AUFS_HBL_H__
17891+#define __AUFS_HBL_H__
17892+
17893+#ifdef __KERNEL__
17894+
17895+#include <linux/list_bl.h>
17896+
17897+static inline void au_hbl_add(struct hlist_bl_node *node,
17898+ struct hlist_bl_head *hbl)
17899+{
17900+ hlist_bl_lock(hbl);
17901+ hlist_bl_add_head(node, hbl);
17902+ hlist_bl_unlock(hbl);
17903+}
17904+
17905+static inline void au_hbl_del(struct hlist_bl_node *node,
17906+ struct hlist_bl_head *hbl)
17907+{
17908+ hlist_bl_lock(hbl);
17909+ hlist_bl_del(node);
17910+ hlist_bl_unlock(hbl);
17911+}
17912+
17913+#define au_hbl_for_each(pos, head) \
17914+ for (pos = hlist_bl_first(head); \
17915+ pos; \
17916+ pos = pos->next)
17917+
17918+static inline unsigned long au_hbl_count(struct hlist_bl_head *hbl)
17919+{
17920+ unsigned long cnt;
17921+ struct hlist_bl_node *pos;
17922+
17923+ cnt = 0;
17924+ hlist_bl_lock(hbl);
17925+ au_hbl_for_each(pos, hbl)
17926+ cnt++;
17927+ hlist_bl_unlock(hbl);
17928+ return cnt;
17929+}
17930+
17931+#endif /* __KERNEL__ */
17932+#endif /* __AUFS_HBL_H__ */
17933diff -urN /usr/share/empty/fs/aufs/hfsnotify.c linux/fs/aufs/hfsnotify.c
17934--- /usr/share/empty/fs/aufs/hfsnotify.c 1970-01-01 01:00:00.000000000 +0100
17935+++ linux/fs/aufs/hfsnotify.c 2022-11-05 23:02:18.965889284 +0100
17936@@ -0,0 +1,290 @@
17937+// SPDX-License-Identifier: GPL-2.0
17938+/*
17939+ * Copyright (C) 2005-2022 Junjiro R. Okajima
17940+ *
17941+ * This program is free software; you can redistribute it and/or modify
17942+ * it under the terms of the GNU General Public License as published by
17943+ * the Free Software Foundation; either version 2 of the License, or
17944+ * (at your option) any later version.
17945+ *
17946+ * This program is distributed in the hope that it will be useful,
17947+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
17948+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17949+ * GNU General Public License for more details.
17950+ *
17951+ * You should have received a copy of the GNU General Public License
17952+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
17953+ */
17954+
17955+/*
17956+ * fsnotify for the lower directories
17957+ */
17958+
17959+#include "aufs.h"
17960+
17961+/* FS_IN_IGNORED is unnecessary */
17962+static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
17963+ | FS_CREATE | FS_EVENT_ON_CHILD);
17964+static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
17965+static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
17966+
17967+static void au_hfsn_free_mark(struct fsnotify_mark *mark)
17968+{
17969+ struct au_hnotify *hn = container_of(mark, struct au_hnotify,
17970+ hn_mark);
17971+ /* AuDbg("here\n"); */
17972+ au_cache_free_hnotify(hn);
17973+ smp_mb__before_atomic(); /* for atomic64_dec */
17974+ if (atomic64_dec_and_test(&au_hfsn_ifree))
17975+ wake_up(&au_hfsn_wq);
17976+}
17977+
17978+static int au_hfsn_alloc(struct au_hinode *hinode)
17979+{
17980+ int err;
17981+ struct au_hnotify *hn;
17982+ struct super_block *sb;
17983+ struct au_branch *br;
17984+ struct fsnotify_mark *mark;
17985+ aufs_bindex_t bindex;
17986+
17987+ hn = hinode->hi_notify;
17988+ sb = hn->hn_aufs_inode->i_sb;
17989+ bindex = au_br_index(sb, hinode->hi_id);
17990+ br = au_sbr(sb, bindex);
17991+ AuDebugOn(!br->br_hfsn);
17992+
17993+ mark = &hn->hn_mark;
17994+ fsnotify_init_mark(mark, br->br_hfsn->hfsn_group);
17995+ mark->mask = AuHfsnMask;
17996+ /*
17997+ * by udba rename or rmdir, aufs assign a new inode to the known
17998+ * h_inode, so specify 1 to allow dups.
17999+ */
18000+ lockdep_off();
18001+ err = fsnotify_add_inode_mark(mark, hinode->hi_inode, /*allow_dups*/1);
18002+ lockdep_on();
18003+
18004+ return err;
18005+}
18006+
18007+static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
18008+{
18009+ struct fsnotify_mark *mark;
18010+ unsigned long long ull;
18011+ struct fsnotify_group *group;
18012+
18013+ ull = atomic64_inc_return(&au_hfsn_ifree);
18014+ BUG_ON(!ull);
18015+
18016+ mark = &hn->hn_mark;
18017+ spin_lock(&mark->lock);
18018+ group = mark->group;
18019+ fsnotify_get_group(group);
18020+ spin_unlock(&mark->lock);
18021+ lockdep_off();
18022+ fsnotify_destroy_mark(mark, group);
18023+ fsnotify_put_mark(mark);
18024+ fsnotify_put_group(group);
18025+ lockdep_on();
18026+
18027+ /* free hn by myself */
18028+ return 0;
18029+}
18030+
18031+/* ---------------------------------------------------------------------- */
18032+
18033+static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
18034+{
18035+ struct fsnotify_mark *mark;
18036+
18037+ mark = &hinode->hi_notify->hn_mark;
18038+ spin_lock(&mark->lock);
18039+ if (do_set) {
18040+ AuDebugOn(mark->mask & AuHfsnMask);
18041+ mark->mask |= AuHfsnMask;
18042+ } else {
18043+ AuDebugOn(!(mark->mask & AuHfsnMask));
18044+ mark->mask &= ~AuHfsnMask;
18045+ }
18046+ spin_unlock(&mark->lock);
18047+ /* fsnotify_recalc_inode_mask(hinode->hi_inode); */
18048+}
18049+
18050+/* ---------------------------------------------------------------------- */
18051+
18052+/* #define AuDbgHnotify */
18053+#ifdef AuDbgHnotify
18054+static char *au_hfsn_name(u32 mask)
18055+{
18056+#ifdef CONFIG_AUFS_DEBUG
18057+#define test_ret(flag) \
18058+ do { \
18059+ if (mask & flag) \
18060+ return #flag; \
18061+ } while (0)
18062+ test_ret(FS_ACCESS);
18063+ test_ret(FS_MODIFY);
18064+ test_ret(FS_ATTRIB);
18065+ test_ret(FS_CLOSE_WRITE);
18066+ test_ret(FS_CLOSE_NOWRITE);
18067+ test_ret(FS_OPEN);
18068+ test_ret(FS_MOVED_FROM);
18069+ test_ret(FS_MOVED_TO);
18070+ test_ret(FS_CREATE);
18071+ test_ret(FS_DELETE);
18072+ test_ret(FS_DELETE_SELF);
18073+ test_ret(FS_MOVE_SELF);
18074+ test_ret(FS_UNMOUNT);
18075+ test_ret(FS_Q_OVERFLOW);
18076+ test_ret(FS_IN_IGNORED);
18077+ test_ret(FS_ISDIR);
18078+ test_ret(FS_IN_ONESHOT);
18079+ test_ret(FS_EVENT_ON_CHILD);
18080+ return "";
18081+#undef test_ret
18082+#else
18083+ return "??";
18084+#endif
18085+}
18086+#endif
18087+
18088+/* ---------------------------------------------------------------------- */
18089+
18090+static void au_hfsn_free_group(struct fsnotify_group *group)
18091+{
18092+ struct au_br_hfsnotify *hfsn = group->private;
18093+
18094+ /* AuDbg("here\n"); */
18095+ au_kfree_try_rcu(hfsn);
18096+}
18097+
18098+static int au_hfsn_handle_event(struct fsnotify_group *group,
18099+ u32 mask, const void *data, int data_type,
18100+ struct inode *dir,
18101+ const struct qstr *file_name, u32 cookie,
18102+ struct fsnotify_iter_info *iter_info)
18103+{
18104+ int err;
18105+ struct au_hnotify *hnotify;
18106+ struct inode *h_dir, *h_inode;
18107+ struct fsnotify_mark *inode_mark;
18108+
18109+ AuDebugOn(!(data_type == FSNOTIFY_EVENT_INODE
18110+ || data_type == FSNOTIFY_EVENT_DENTRY));
18111+
18112+ err = 0;
18113+ /* if FS_UNMOUNT happens, there must be another bug */
18114+ AuDebugOn(mask & FS_UNMOUNT);
18115+ if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
18116+ goto out;
18117+
18118+ h_dir = dir;
18119+ h_inode = NULL;
18120+#ifdef AuDbgHnotify
18121+ au_debug_on();
18122+ if (1 || file_name.len != sizeof(AUFS_XINO_FNAME) - 1
18123+ || strncmp(file_name.name, AUFS_XINO_FNAME, file_name.len)) {
18124+ AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
18125+ h_dir->i_ino, mask, au_hfsn_name(mask),
18126+ AuLNPair(file_name), h_inode ? h_inode->i_ino : 0);
18127+ /* WARN_ON(1); */
18128+ }
18129+ au_debug_off();
18130+#endif
18131+
18132+ inode_mark = fsnotify_iter_inode_mark(iter_info);
18133+ AuDebugOn(!inode_mark);
18134+ hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
18135+ err = au_hnotify(h_dir, hnotify, mask, file_name, h_inode);
18136+
18137+out:
18138+ return err;
18139+}
18140+
18141+static struct fsnotify_ops au_hfsn_ops = {
18142+ .handle_event = au_hfsn_handle_event,
18143+ .free_group_priv = au_hfsn_free_group,
18144+ .free_mark = au_hfsn_free_mark
18145+};
18146+
18147+/* ---------------------------------------------------------------------- */
18148+
18149+static void au_hfsn_fin_br(struct au_branch *br)
18150+{
18151+ struct au_br_hfsnotify *hfsn;
18152+
18153+ hfsn = br->br_hfsn;
18154+ if (hfsn) {
18155+ lockdep_off();
18156+ fsnotify_put_group(hfsn->hfsn_group);
18157+ lockdep_on();
18158+ }
18159+}
18160+
18161+static int au_hfsn_init_br(struct au_branch *br, int perm)
18162+{
18163+ int err;
18164+ struct fsnotify_group *group;
18165+ struct au_br_hfsnotify *hfsn;
18166+
18167+ err = 0;
18168+ br->br_hfsn = NULL;
18169+ if (!au_br_hnotifyable(perm))
18170+ goto out;
18171+
18172+ err = -ENOMEM;
18173+ hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
18174+ if (unlikely(!hfsn))
18175+ goto out;
18176+
18177+ err = 0;
18178+ group = fsnotify_alloc_group(&au_hfsn_ops,
18179+ /*flags - not for userspace*/0);
18180+ if (IS_ERR(group)) {
18181+ err = PTR_ERR(group);
18182+ pr_err("fsnotify_alloc_group() failed, %d\n", err);
18183+ goto out_hfsn;
18184+ }
18185+
18186+ group->private = hfsn;
18187+ hfsn->hfsn_group = group;
18188+ br->br_hfsn = hfsn;
18189+ goto out; /* success */
18190+
18191+out_hfsn:
18192+ au_kfree_try_rcu(hfsn);
18193+out:
18194+ return err;
18195+}
18196+
18197+static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
18198+{
18199+ int err;
18200+
18201+ err = 0;
18202+ if (!br->br_hfsn)
18203+ err = au_hfsn_init_br(br, perm);
18204+
18205+ return err;
18206+}
18207+
18208+/* ---------------------------------------------------------------------- */
18209+
18210+static void au_hfsn_fin(void)
18211+{
18212+ AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
18213+ wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
18214+}
18215+
18216+const struct au_hnotify_op au_hnotify_op = {
18217+ .ctl = au_hfsn_ctl,
18218+ .alloc = au_hfsn_alloc,
18219+ .free = au_hfsn_free,
18220+
18221+ .fin = au_hfsn_fin,
18222+
18223+ .reset_br = au_hfsn_reset_br,
18224+ .fin_br = au_hfsn_fin_br,
18225+ .init_br = au_hfsn_init_br
18226+};
18227diff -urN /usr/share/empty/fs/aufs/hfsplus.c linux/fs/aufs/hfsplus.c
18228--- /usr/share/empty/fs/aufs/hfsplus.c 1970-01-01 01:00:00.000000000 +0100
18229+++ linux/fs/aufs/hfsplus.c 2022-11-05 23:02:18.965889284 +0100
18230@@ -0,0 +1,60 @@
18231+// SPDX-License-Identifier: GPL-2.0
18232+/*
18233+ * Copyright (C) 2010-2022 Junjiro R. Okajima
18234+ *
18235+ * This program is free software; you can redistribute it and/or modify
18236+ * it under the terms of the GNU General Public License as published by
18237+ * the Free Software Foundation; either version 2 of the License, or
18238+ * (at your option) any later version.
18239+ *
18240+ * This program is distributed in the hope that it will be useful,
18241+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18242+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18243+ * GNU General Public License for more details.
18244+ *
18245+ * You should have received a copy of the GNU General Public License
18246+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
18247+ */
18248+
18249+/*
18250+ * special support for filesystems which acquires an inode mutex
18251+ * at final closing a file, eg, hfsplus.
18252+ *
18253+ * This trick is very simple and stupid, just to open the file before really
18254+ * necessary open to tell hfsplus that this is not the final closing.
18255+ * The caller should call au_h_open_pre() after acquiring the inode mutex,
18256+ * and au_h_open_post() after releasing it.
18257+ */
18258+
18259+#include "aufs.h"
18260+
18261+struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
18262+ int force_wr)
18263+{
18264+ struct file *h_file;
18265+ struct dentry *h_dentry;
18266+
18267+ h_dentry = au_h_dptr(dentry, bindex);
18268+ AuDebugOn(!h_dentry);
18269+ AuDebugOn(d_is_negative(h_dentry));
18270+
18271+ h_file = NULL;
18272+ if (au_test_hfsplus(h_dentry->d_sb)
18273+ && d_is_reg(h_dentry))
18274+ h_file = au_h_open(dentry, bindex,
18275+ O_RDONLY | O_NOATIME | O_LARGEFILE,
18276+ /*file*/NULL, force_wr);
18277+ return h_file;
18278+}
18279+
18280+void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
18281+ struct file *h_file)
18282+{
18283+ struct au_branch *br;
18284+
18285+ if (h_file) {
18286+ fput(h_file);
18287+ br = au_sbr(dentry->d_sb, bindex);
18288+ au_lcnt_dec(&br->br_nfiles);
18289+ }
18290+}
18291diff -urN /usr/share/empty/fs/aufs/hnotify.c linux/fs/aufs/hnotify.c
18292--- /usr/share/empty/fs/aufs/hnotify.c 1970-01-01 01:00:00.000000000 +0100
18293+++ linux/fs/aufs/hnotify.c 2022-11-05 23:02:18.965889284 +0100
18294@@ -0,0 +1,715 @@
18295+// SPDX-License-Identifier: GPL-2.0
18296+/*
18297+ * Copyright (C) 2005-2022 Junjiro R. Okajima
18298+ *
18299+ * This program is free software; you can redistribute it and/or modify
18300+ * it under the terms of the GNU General Public License as published by
18301+ * the Free Software Foundation; either version 2 of the License, or
18302+ * (at your option) any later version.
18303+ *
18304+ * This program is distributed in the hope that it will be useful,
18305+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
18306+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18307+ * GNU General Public License for more details.
18308+ *
18309+ * You should have received a copy of the GNU General Public License
18310+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
18311+ */
18312+
18313+/*
18314+ * abstraction to notify the direct changes on lower directories
18315+ */
18316+
18317+/* #include <linux/iversion.h> */
18318+#include "aufs.h"
18319+
18320+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
18321+{
18322+ int err;
18323+ struct au_hnotify *hn;
18324+
18325+ err = -ENOMEM;
18326+ hn = au_cache_alloc_hnotify();
18327+ if (hn) {
18328+ hn->hn_aufs_inode = inode;
18329+ hinode->hi_notify = hn;
18330+ err = au_hnotify_op.alloc(hinode);
18331+ AuTraceErr(err);
18332+ if (unlikely(err)) {
18333+ hinode->hi_notify = NULL;
18334+ au_cache_free_hnotify(hn);
18335+ /*
18336+ * The upper dir was removed by udba, but the same named
18337+ * dir left. In this case, aufs assigns a new inode
18338+ * number and set the monitor again.
18339+ * For the lower dir, the old monitor is still left.
18340+ */
18341+ if (err == -EEXIST)
18342+ err = 0;
18343+ }
18344+ }
18345+
18346+ AuTraceErr(err);
18347+ return err;
18348+}
18349+
18350+void au_hn_free(struct au_hinode *hinode)
18351+{
18352+ struct au_hnotify *hn;
18353+
18354+ hn = hinode->hi_notify;
18355+ if (hn) {
18356+ hinode->hi_notify = NULL;
18357+ if (au_hnotify_op.free(hinode, hn))
18358+ au_cache_free_hnotify(hn);
18359+ }
18360+}
18361+
18362+/* ---------------------------------------------------------------------- */
18363+
18364+void au_hn_ctl(struct au_hinode *hinode, int do_set)
18365+{
18366+ if (hinode->hi_notify)
18367+ au_hnotify_op.ctl(hinode, do_set);
18368+}
18369+
18370+void au_hn_reset(struct inode *inode, unsigned int flags)
18371+{
18372+ aufs_bindex_t bindex, bbot;
18373+ struct inode *hi;
18374+ struct dentry *iwhdentry;
18375+
18376+ bbot = au_ibbot(inode);
18377+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
18378+ hi = au_h_iptr(inode, bindex);
18379+ if (!hi)
18380+ continue;
18381+
18382+ /* inode_lock_nested(hi, AuLsc_I_CHILD); */
18383+ iwhdentry = au_hi_wh(inode, bindex);
18384+ if (iwhdentry)
18385+ dget(iwhdentry);
18386+ au_igrab(hi);
18387+ au_set_h_iptr(inode, bindex, NULL, 0);
18388+ au_set_h_iptr(inode, bindex, au_igrab(hi),
18389+ flags & ~AuHi_XINO);
18390+ iput(hi);
18391+ dput(iwhdentry);
18392+ /* inode_unlock(hi); */
18393+ }
18394+}
18395+
18396+/* ---------------------------------------------------------------------- */
18397+
18398+static int hn_xino(struct inode *inode, struct inode *h_inode)
18399+{
18400+ int err;
18401+ aufs_bindex_t bindex, bbot, bfound, btop;
18402+ struct inode *h_i;
18403+
18404+ err = 0;
18405+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
18406+ pr_warn("branch root dir was changed\n");
18407+ goto out;
18408+ }
18409+
18410+ bfound = -1;
18411+ bbot = au_ibbot(inode);
18412+ btop = au_ibtop(inode);
18413+#if 0 /* reserved for future use */
18414+ if (bindex == bbot) {
18415+ /* keep this ino in rename case */
18416+ goto out;
18417+ }
18418+#endif
18419+ for (bindex = btop; bindex <= bbot; bindex++)
18420+ if (au_h_iptr(inode, bindex) == h_inode) {
18421+ bfound = bindex;
18422+ break;
18423+ }
18424+ if (bfound < 0)
18425+ goto out;
18426+
18427+ for (bindex = btop; bindex <= bbot; bindex++) {
18428+ h_i = au_h_iptr(inode, bindex);
18429+ if (!h_i)
18430+ continue;
18431+
18432+ err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
18433+ /* ignore this error */
18434+ /* bad action? */
18435+ }
18436+
18437+ /* children inode number will be broken */
18438+
18439+out:
18440+ AuTraceErr(err);
18441+ return err;
18442+}
18443+
18444+static int hn_gen_tree(struct dentry *dentry)
18445+{
18446+ int err, i, j, ndentry;
18447+ struct au_dcsub_pages dpages;
18448+ struct au_dpage *dpage;
18449+ struct dentry **dentries;
18450+
18451+ err = au_dpages_init(&dpages, GFP_NOFS);
18452+ if (unlikely(err))
18453+ goto out;
18454+ err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
18455+ if (unlikely(err))
18456+ goto out_dpages;
18457+
18458+ for (i = 0; i < dpages.ndpage; i++) {
18459+ dpage = dpages.dpages + i;
18460+ dentries = dpage->dentries;
18461+ ndentry = dpage->ndentry;
18462+ for (j = 0; j < ndentry; j++) {
18463+ struct dentry *d;
18464+
18465+ d = dentries[j];
18466+ if (IS_ROOT(d))
18467+ continue;
18468+
18469+ au_digen_dec(d);
18470+ if (d_really_is_positive(d))
18471+ /* todo: reset children xino?
18472+ cached children only? */
18473+ au_iigen_dec(d_inode(d));
18474+ }
18475+ }
18476+
18477+out_dpages:
18478+ au_dpages_free(&dpages);
18479+out:
18480+ return err;
18481+}
18482+
18483+/*
18484+ * return 0 if processed.
18485+ */
18486+static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
18487+ const unsigned int isdir)
18488+{
18489+ int err;
18490+ struct dentry *d;
18491+ struct qstr *dname;
18492+
18493+ err = 1;
18494+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
18495+ pr_warn("branch root dir was changed\n");
18496+ err = 0;
18497+ goto out;
18498+ }
18499+
18500+ if (!isdir) {
18501+ AuDebugOn(!name);
18502+ au_iigen_dec(inode);
18503+ spin_lock(&inode->i_lock);
18504+ hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
18505+ spin_lock(&d->d_lock);
18506+ dname = &d->d_name;
18507+ if (dname->len != nlen
18508+ && memcmp(dname->name, name, nlen)) {
18509+ spin_unlock(&d->d_lock);
18510+ continue;
18511+ }
18512+ err = 0;
18513+ au_digen_dec(d);
18514+ spin_unlock(&d->d_lock);
18515+ break;
18516+ }
18517+ spin_unlock(&inode->i_lock);
18518+ } else {
18519+ au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
18520+ d = d_find_any_alias(inode);
18521+ if (!d) {
18522+ au_iigen_dec(inode);
18523+ goto out;
18524+ }
18525+
18526+ spin_lock(&d->d_lock);
18527+ dname = &d->d_name;
18528+ if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
18529+ spin_unlock(&d->d_lock);
18530+ err = hn_gen_tree(d);
18531+ spin_lock(&d->d_lock);
18532+ }
18533+ spin_unlock(&d->d_lock);
18534+ dput(d);
18535+ }
18536+
18537+out:
18538+ AuTraceErr(err);
18539+ return err;
18540+}
18541+
18542+static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
18543+{
18544+ int err;
18545+
18546+ if (IS_ROOT(dentry)) {
18547+ pr_warn("branch root dir was changed\n");
18548+ return 0;
18549+ }
18550+
18551+ err = 0;
18552+ if (!isdir) {
18553+ au_digen_dec(dentry);
18554+ if (d_really_is_positive(dentry))
18555+ au_iigen_dec(d_inode(dentry));
18556+ } else {
18557+ au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
18558+ if (d_really_is_positive(dentry))
18559+ err = hn_gen_tree(dentry);
18560+ }
18561+
18562+ AuTraceErr(err);
18563+ return err;
18564+}
18565+
18566+/* ---------------------------------------------------------------------- */
18567+
18568+/* hnotify job flags */
18569+#define AuHnJob_XINO0 1
18570+#define AuHnJob_GEN (1 << 1)
18571+#define AuHnJob_DIRENT (1 << 2)
18572+#define AuHnJob_ISDIR (1 << 3)
18573+#define AuHnJob_TRYXINO0 (1 << 4)
18574+#define AuHnJob_MNTPNT (1 << 5)
18575+#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
18576+#define au_fset_hnjob(flags, name) \
18577+ do { (flags) |= AuHnJob_##name; } while (0)
18578+#define au_fclr_hnjob(flags, name) \
18579+ do { (flags) &= ~AuHnJob_##name; } while (0)
18580+
18581+enum {
18582+ AuHn_CHILD,
18583+ AuHn_PARENT,
18584+ AuHnLast
18585+};
18586+
18587+struct au_hnotify_args {
18588+ struct inode *h_dir, *dir, *h_child_inode;
18589+ u32 mask;
18590+ unsigned int flags[AuHnLast];
18591+ unsigned int h_child_nlen;
18592+ char h_child_name[];
18593+};
18594+
18595+struct hn_job_args {
18596+ unsigned int flags;
18597+ struct inode *inode, *h_inode, *dir, *h_dir;
18598+ struct dentry *dentry;
18599+ char *h_name;
18600+ int h_nlen;
18601+};
18602+
18603+static int hn_job(struct hn_job_args *a)
18604+{
18605+ const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
18606+ int e;
18607+
18608+ /* reset xino */
18609+ if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
18610+ hn_xino(a->inode, a->h_inode); /* ignore this error */
18611+
18612+ if (au_ftest_hnjob(a->flags, TRYXINO0)
18613+ && a->inode
18614+ && a->h_inode) {
18615+ inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
18616+ if (!a->h_inode->i_nlink
18617+ && !(a->h_inode->i_state & I_LINKABLE))
18618+ hn_xino(a->inode, a->h_inode); /* ignore this error */
18619+ inode_unlock_shared(a->h_inode);
18620+ }
18621+
18622+ /* make the generation obsolete */
18623+ if (au_ftest_hnjob(a->flags, GEN)) {
18624+ e = -1;
18625+ if (a->inode)
18626+ e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
18627+ isdir);
18628+ if (e && a->dentry)
18629+ hn_gen_by_name(a->dentry, isdir);
18630+ /* ignore this error */
18631+ }
18632+
18633+ /* make dir entries obsolete */
18634+ if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
18635+ struct au_vdir *vdir;
18636+
18637+ vdir = au_ivdir(a->inode);
18638+ if (vdir)
18639+ vdir->vd_jiffy = 0;
18640+ /* IMustLock(a->inode); */
18641+ /* inode_inc_iversion(a->inode); */
18642+ }
18643+
18644+ /* can do nothing but warn */
18645+ if (au_ftest_hnjob(a->flags, MNTPNT)
18646+ && a->dentry
18647+ && d_mountpoint(a->dentry))
18648+ pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
18649+
18650+ return 0;
18651+}
18652+
18653+/* ---------------------------------------------------------------------- */
18654+
18655+static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
18656+ struct inode *dir)
18657+{
18658+ struct dentry *dentry, *d, *parent;
18659+ struct qstr *dname;
18660+
18661+ parent = d_find_any_alias(dir);
18662+ if (!parent)
18663+ return NULL;
18664+
18665+ dentry = NULL;
18666+ spin_lock(&parent->d_lock);
18667+ list_for_each_entry(d, &parent->d_subdirs, d_child) {
18668+ /* AuDbg("%pd\n", d); */
18669+ spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
18670+ dname = &d->d_name;
18671+ if (dname->len != nlen || memcmp(dname->name, name, nlen))
18672+ goto cont_unlock;
18673+ if (au_di(d))
18674+ au_digen_dec(d);
18675+ else
18676+ goto cont_unlock;
18677+ if (au_dcount(d) > 0) {
18678+ dentry = dget_dlock(d);
18679+ spin_unlock(&d->d_lock);
18680+ break;
18681+ }
18682+
18683+cont_unlock:
18684+ spin_unlock(&d->d_lock);
18685+ }
18686+ spin_unlock(&parent->d_lock);
18687+ dput(parent);
18688+
18689+ if (dentry)
18690+ di_write_lock_child(dentry);
18691+
18692+ return dentry;
18693+}
18694+
18695+static struct inode *lookup_wlock_by_ino(struct super_block *sb,
18696+ aufs_bindex_t bindex, ino_t h_ino)
18697+{
18698+ struct inode *inode;
18699+ ino_t ino;
18700+ int err;
18701+
18702+ inode = NULL;
18703+ err = au_xino_read(sb, bindex, h_ino, &ino);
18704+ if (!err && ino)
18705+ inode = ilookup(sb, ino);
18706+ if (!inode)
18707+ goto out;
18708+
18709+ if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
18710+ pr_warn("wrong root branch\n");
18711+ iput(inode);
18712+ inode = NULL;
18713+ goto out;
18714+ }
18715+
18716+ ii_write_lock_child(inode);
18717+
18718+out:
18719+ return inode;
18720+}
18721+
18722+static void au_hn_bh(void *_args)
18723+{
18724+ struct au_hnotify_args *a = _args;
18725+ struct super_block *sb;
18726+ aufs_bindex_t bindex, bbot, bfound;
18727+ unsigned char xino, try_iput;
18728+ int err;
18729+ struct inode *inode;
18730+ ino_t h_ino;
18731+ struct hn_job_args args;
18732+ struct dentry *dentry;
18733+ struct au_sbinfo *sbinfo;
18734+
18735+ AuDebugOn(!_args);
18736+ AuDebugOn(!a->h_dir);
18737+ AuDebugOn(!a->dir);
18738+ AuDebugOn(!a->mask);
18739+ AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
18740+ a->mask, a->dir->i_ino, a->h_dir->i_ino,
18741+ a->h_child_inode ? a->h_child_inode->i_ino : 0);
18742+
18743+ inode = NULL;
18744+ dentry = NULL;
18745+ /*
18746+ * do not lock a->dir->i_mutex here
18747+ * because of d_revalidate() may cause a deadlock.
18748+ */
18749+ sb = a->dir->i_sb;
18750+ AuDebugOn(!sb);
18751+ sbinfo = au_sbi(sb);
18752+ AuDebugOn(!sbinfo);
18753+ si_write_lock(sb, AuLock_NOPLMW);
18754+
18755+ if (au_opt_test(sbinfo->si_mntflags, DIRREN))
18756+ switch (a->mask & FS_EVENTS_POSS_ON_CHILD) {
18757+ case FS_MOVED_FROM:
18758+ case FS_MOVED_TO:
18759+ AuWarn1("DIRREN with UDBA may not work correctly "
18760+ "for the direct rename(2)\n");
18761+ }
18762+
18763+ ii_read_lock_parent(a->dir);
18764+ bfound = -1;
18765+ bbot = au_ibbot(a->dir);
18766+ for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
18767+ if (au_h_iptr(a->dir, bindex) == a->h_dir) {
18768+ bfound = bindex;
18769+ break;
18770+ }
18771+ ii_read_unlock(a->dir);
18772+ if (unlikely(bfound < 0))
18773+ goto out;
18774+
18775+ xino = !!au_opt_test(au_mntflags(sb), XINO);
18776+ h_ino = 0;
18777+ if (a->h_child_inode)
18778+ h_ino = a->h_child_inode->i_ino;
18779+
18780+ if (a->h_child_nlen
18781+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
18782+ || au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
18783+ dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
18784+ a->dir);
18785+ try_iput = 0;
18786+ if (dentry && d_really_is_positive(dentry))
18787+ inode = d_inode(dentry);
18788+ if (xino && !inode && h_ino
18789+ && (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
18790+ || au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
18791+ || au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
18792+ inode = lookup_wlock_by_ino(sb, bfound, h_ino);
18793+ try_iput = 1;
18794+ }
18795+
18796+ args.flags = a->flags[AuHn_CHILD];
18797+ args.dentry = dentry;
18798+ args.inode = inode;
18799+ args.h_inode = a->h_child_inode;
18800+ args.dir = a->dir;
18801+ args.h_dir = a->h_dir;
18802+ args.h_name = a->h_child_name;
18803+ args.h_nlen = a->h_child_nlen;
18804+ err = hn_job(&args);
18805+ if (dentry) {
18806+ if (au_di(dentry))
18807+ di_write_unlock(dentry);
18808+ dput(dentry);
18809+ }
18810+ if (inode && try_iput) {
18811+ ii_write_unlock(inode);
18812+ iput(inode);
18813+ }
18814+
18815+ ii_write_lock_parent(a->dir);
18816+ args.flags = a->flags[AuHn_PARENT];
18817+ args.dentry = NULL;
18818+ args.inode = a->dir;
18819+ args.h_inode = a->h_dir;
18820+ args.dir = NULL;
18821+ args.h_dir = NULL;
18822+ args.h_name = NULL;
18823+ args.h_nlen = 0;
18824+ err = hn_job(&args);
18825+ ii_write_unlock(a->dir);
18826+
18827+out:
18828+ iput(a->h_child_inode);
18829+ iput(a->h_dir);
18830+ iput(a->dir);
18831+ si_write_unlock(sb);
18832+ au_nwt_done(&sbinfo->si_nowait);
18833+ au_kfree_rcu(a);
18834+}
18835+
18836+/* ---------------------------------------------------------------------- */
18837+
18838+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
18839+ const struct qstr *h_child_qstr, struct inode *h_child_inode)
18840+{
18841+ int err, len;
18842+ unsigned int flags[AuHnLast], f;
18843+ unsigned char isdir, isroot, wh;
18844+ struct inode *dir;
18845+ struct au_hnotify_args *args;
18846+ char *p, *h_child_name;
18847+
18848+ err = 0;
18849+ AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
18850+ dir = igrab(hnotify->hn_aufs_inode);
18851+ if (!dir)
18852+ goto out;
18853+
18854+ isroot = (dir->i_ino == AUFS_ROOT_INO);
18855+ wh = 0;
18856+ h_child_name = (void *)h_child_qstr->name;
18857+ len = h_child_qstr->len;
18858+ if (h_child_name) {
18859+ if (len > AUFS_WH_PFX_LEN
18860+ && !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
18861+ h_child_name += AUFS_WH_PFX_LEN;
18862+ len -= AUFS_WH_PFX_LEN;
18863+ wh = 1;
18864+ }
18865+ }
18866+
18867+ isdir = 0;
18868+ if (h_child_inode)
18869+ isdir = !!S_ISDIR(h_child_inode->i_mode);
18870+ flags[AuHn_PARENT] = AuHnJob_ISDIR;
18871+ flags[AuHn_CHILD] = 0;
18872+ if (isdir)
18873+ flags[AuHn_CHILD] = AuHnJob_ISDIR;
18874+ au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
18875+ au_fset_hnjob(flags[AuHn_CHILD], GEN);
18876+ switch (mask & ALL_FSNOTIFY_DIRENT_EVENTS) {
18877+ case FS_MOVED_FROM:
18878+ case FS_MOVED_TO:
18879+ au_fset_hnjob(flags[AuHn_CHILD], XINO0);
18880+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
18881+ fallthrough;
18882+ case FS_CREATE:
18883+ AuDebugOn(!h_child_name);
18884+ break;
18885+
18886+ case FS_DELETE:
18887+ /*
18888+ * aufs never be able to get this child inode.
18889+ * revalidation should be in d_revalidate()
18890+ * by checking i_nlink, i_generation or d_unhashed().
18891+ */
18892+ AuDebugOn(!h_child_name);
18893+ au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
18894+ au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
18895+ break;
18896+
18897+ default:
18898+ AuDebugOn(1);
18899+ }
18900+
18901+ if (wh)
18902+ h_child_inode = NULL;
18903+
18904+ err = -ENOMEM;
18905+ /* iput() and kfree() will be called in au_hnotify() */
18906+ args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
18907+ if (unlikely(!args)) {
18908+ AuErr1("no memory\n");
18909+ iput(dir);
18910+ goto out;
18911+ }
18912+ args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
18913+ args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
18914+ args->mask = mask;
18915+ args->dir = dir;
18916+ args->h_dir = igrab(h_dir);
18917+ if (h_child_inode)
18918+ h_child_inode = igrab(h_child_inode); /* can be NULL */
18919+ args->h_child_inode = h_child_inode;
18920+ args->h_child_nlen = len;
18921+ if (len) {
18922+ p = (void *)args;
18923+ p += sizeof(*args);
18924+ memcpy(p, h_child_name, len);
18925+ p[len] = 0;
18926+ }
18927+
18928+ /* NFS fires the event for silly-renamed one from kworker */
18929+ f = 0;
18930+ if (!dir->i_nlink
18931+ || (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
18932+ f = AuWkq_NEST;
18933+ err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
18934+ if (unlikely(err)) {
18935+ pr_err("wkq %d\n", err);
18936+ iput(args->h_child_inode);
18937+ iput(args->h_dir);
18938+ iput(args->dir);
18939+ au_kfree_rcu(args);
18940+ }
18941+
18942+out:
18943+ return err;
18944+}
18945+
18946+/* ---------------------------------------------------------------------- */
18947+
18948+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
18949+{
18950+ int err;
18951+
18952+ AuDebugOn(!(udba & AuOptMask_UDBA));
18953+
18954+ err = 0;
18955+ if (au_hnotify_op.reset_br)
18956+ err = au_hnotify_op.reset_br(udba, br, perm);
18957+
18958+ return err;
18959+}
18960+
18961+int au_hnotify_init_br(struct au_branch *br, int perm)
18962+{
18963+ int err;
18964+
18965+ err = 0;
18966+ if (au_hnotify_op.init_br)
18967+ err = au_hnotify_op.init_br(br, perm);
18968+
18969+ return err;
18970+}
18971+
18972+void au_hnotify_fin_br(struct au_branch *br)
18973+{
18974+ if (au_hnotify_op.fin_br)
18975+ au_hnotify_op.fin_br(br);
18976+}
18977+
18978+static void au_hn_destroy_cache(void)
18979+{
18980+ kmem_cache_destroy(au_cache[AuCache_HNOTIFY]);
18981+ au_cache[AuCache_HNOTIFY] = NULL;
18982+}
18983+
18984+int __init au_hnotify_init(void)
18985+{
18986+ int err;
18987+
18988+ err = -ENOMEM;
18989+ au_cache[AuCache_HNOTIFY] = AuCache(au_hnotify);
18990+ if (au_cache[AuCache_HNOTIFY]) {
18991+ err = 0;
18992+ if (au_hnotify_op.init)
18993+ err = au_hnotify_op.init();
18994+ if (unlikely(err))
18995+ au_hn_destroy_cache();
18996+ }
18997+ AuTraceErr(err);
18998+ return err;
18999+}
19000+
19001+void au_hnotify_fin(void)
19002+{
19003+ if (au_hnotify_op.fin)
19004+ au_hnotify_op.fin();
19005+
19006+ /* cf. au_cache_fin() */
19007+ if (au_cache[AuCache_HNOTIFY])
19008+ au_hn_destroy_cache();
19009+}
19010diff -urN /usr/share/empty/fs/aufs/iinfo.c linux/fs/aufs/iinfo.c
19011--- /usr/share/empty/fs/aufs/iinfo.c 1970-01-01 01:00:00.000000000 +0100
19012+++ linux/fs/aufs/iinfo.c 2022-11-05 23:02:18.965889284 +0100
19013@@ -0,0 +1,286 @@
19014+// SPDX-License-Identifier: GPL-2.0
19015+/*
19016+ * Copyright (C) 2005-2022 Junjiro R. Okajima
19017+ *
19018+ * This program is free software; you can redistribute it and/or modify
19019+ * it under the terms of the GNU General Public License as published by
19020+ * the Free Software Foundation; either version 2 of the License, or
19021+ * (at your option) any later version.
19022+ *
19023+ * This program is distributed in the hope that it will be useful,
19024+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19025+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19026+ * GNU General Public License for more details.
19027+ *
19028+ * You should have received a copy of the GNU General Public License
19029+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
19030+ */
19031+
19032+/*
19033+ * inode private data
19034+ */
19035+
19036+#include "aufs.h"
19037+
19038+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
19039+{
19040+ struct inode *h_inode;
19041+ struct au_hinode *hinode;
19042+
19043+ IiMustAnyLock(inode);
19044+
19045+ hinode = au_hinode(au_ii(inode), bindex);
19046+ h_inode = hinode->hi_inode;
19047+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
19048+ return h_inode;
19049+}
19050+
19051+/* todo: hard/soft set? */
19052+void au_hiput(struct au_hinode *hinode)
19053+{
19054+ au_hn_free(hinode);
19055+ dput(hinode->hi_whdentry);
19056+ iput(hinode->hi_inode);
19057+}
19058+
19059+unsigned int au_hi_flags(struct inode *inode, int isdir)
19060+{
19061+ unsigned int flags;
19062+ const unsigned int mnt_flags = au_mntflags(inode->i_sb);
19063+
19064+ flags = 0;
19065+ if (au_opt_test(mnt_flags, XINO))
19066+ au_fset_hi(flags, XINO);
19067+ if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
19068+ au_fset_hi(flags, HNOTIFY);
19069+ return flags;
19070+}
19071+
19072+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
19073+ struct inode *h_inode, unsigned int flags)
19074+{
19075+ struct au_hinode *hinode;
19076+ struct inode *hi;
19077+ struct au_iinfo *iinfo = au_ii(inode);
19078+
19079+ IiMustWriteLock(inode);
19080+
19081+ hinode = au_hinode(iinfo, bindex);
19082+ hi = hinode->hi_inode;
19083+ AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
19084+
19085+ if (hi)
19086+ au_hiput(hinode);
19087+ hinode->hi_inode = h_inode;
19088+ if (h_inode) {
19089+ int err;
19090+ struct super_block *sb = inode->i_sb;
19091+ struct au_branch *br;
19092+
19093+ AuDebugOn(inode->i_mode
19094+ && (h_inode->i_mode & S_IFMT)
19095+ != (inode->i_mode & S_IFMT));
19096+ if (bindex == iinfo->ii_btop)
19097+ au_cpup_igen(inode, h_inode);
19098+ br = au_sbr(sb, bindex);
19099+ hinode->hi_id = br->br_id;
19100+ if (au_ftest_hi(flags, XINO)) {
19101+ err = au_xino_write(sb, bindex, h_inode->i_ino,
19102+ inode->i_ino);
19103+ if (unlikely(err))
19104+ AuIOErr1("failed au_xino_write() %d\n", err);
19105+ }
19106+
19107+ if (au_ftest_hi(flags, HNOTIFY)
19108+ && au_br_hnotifyable(br->br_perm)) {
19109+ err = au_hn_alloc(hinode, inode);
19110+ if (unlikely(err))
19111+ AuIOErr1("au_hn_alloc() %d\n", err);
19112+ }
19113+ }
19114+}
19115+
19116+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
19117+ struct dentry *h_wh)
19118+{
19119+ struct au_hinode *hinode;
19120+
19121+ IiMustWriteLock(inode);
19122+
19123+ hinode = au_hinode(au_ii(inode), bindex);
19124+ AuDebugOn(hinode->hi_whdentry);
19125+ hinode->hi_whdentry = h_wh;
19126+}
19127+
19128+void au_update_iigen(struct inode *inode, int half)
19129+{
19130+ struct au_iinfo *iinfo;
19131+ struct au_iigen *iigen;
19132+ unsigned int sigen;
19133+
19134+ sigen = au_sigen(inode->i_sb);
19135+ iinfo = au_ii(inode);
19136+ iigen = &iinfo->ii_generation;
19137+ spin_lock(&iigen->ig_spin);
19138+ iigen->ig_generation = sigen;
19139+ if (half)
19140+ au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
19141+ else
19142+ au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
19143+ spin_unlock(&iigen->ig_spin);
19144+}
19145+
19146+/* it may be called at remount time, too */
19147+void au_update_ibrange(struct inode *inode, int do_put_zero)
19148+{
19149+ struct au_iinfo *iinfo;
19150+ aufs_bindex_t bindex, bbot;
19151+
19152+ AuDebugOn(au_is_bad_inode(inode));
19153+ IiMustWriteLock(inode);
19154+
19155+ iinfo = au_ii(inode);
19156+ if (do_put_zero && iinfo->ii_btop >= 0) {
19157+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
19158+ bindex++) {
19159+ struct inode *h_i;
19160+
19161+ h_i = au_hinode(iinfo, bindex)->hi_inode;
19162+ if (h_i
19163+ && !h_i->i_nlink
19164+ && !(h_i->i_state & I_LINKABLE))
19165+ au_set_h_iptr(inode, bindex, NULL, 0);
19166+ }
19167+ }
19168+
19169+ iinfo->ii_btop = -1;
19170+ iinfo->ii_bbot = -1;
19171+ bbot = au_sbbot(inode->i_sb);
19172+ for (bindex = 0; bindex <= bbot; bindex++)
19173+ if (au_hinode(iinfo, bindex)->hi_inode) {
19174+ iinfo->ii_btop = bindex;
19175+ break;
19176+ }
19177+ if (iinfo->ii_btop >= 0)
19178+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
19179+ if (au_hinode(iinfo, bindex)->hi_inode) {
19180+ iinfo->ii_bbot = bindex;
19181+ break;
19182+ }
19183+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
19184+}
19185+
19186+/* ---------------------------------------------------------------------- */
19187+
19188+void au_icntnr_init_once(void *_c)
19189+{
19190+ struct au_icntnr *c = _c;
19191+ struct au_iinfo *iinfo = &c->iinfo;
19192+
19193+ spin_lock_init(&iinfo->ii_generation.ig_spin);
19194+ au_rw_init(&iinfo->ii_rwsem);
19195+ inode_init_once(&c->vfs_inode);
19196+}
19197+
19198+void au_hinode_init(struct au_hinode *hinode)
19199+{
19200+ hinode->hi_inode = NULL;
19201+ hinode->hi_id = -1;
19202+ au_hn_init(hinode);
19203+ hinode->hi_whdentry = NULL;
19204+}
19205+
19206+int au_iinfo_init(struct inode *inode)
19207+{
19208+ struct au_iinfo *iinfo;
19209+ struct super_block *sb;
19210+ struct au_hinode *hi;
19211+ int nbr, i;
19212+
19213+ sb = inode->i_sb;
19214+ iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
19215+ nbr = au_sbbot(sb) + 1;
19216+ if (unlikely(nbr <= 0))
19217+ nbr = 1;
19218+ hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
19219+ if (hi) {
19220+ au_lcnt_inc(&au_sbi(sb)->si_ninodes);
19221+
19222+ iinfo->ii_hinode = hi;
19223+ for (i = 0; i < nbr; i++, hi++)
19224+ au_hinode_init(hi);
19225+
19226+ iinfo->ii_generation.ig_generation = au_sigen(sb);
19227+ iinfo->ii_btop = -1;
19228+ iinfo->ii_bbot = -1;
19229+ iinfo->ii_vdir = NULL;
19230+ return 0;
19231+ }
19232+ return -ENOMEM;
19233+}
19234+
19235+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
19236+{
19237+ int err, i;
19238+ struct au_hinode *hip;
19239+
19240+ AuRwMustWriteLock(&iinfo->ii_rwsem);
19241+
19242+ err = -ENOMEM;
19243+ hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
19244+ may_shrink);
19245+ if (hip) {
19246+ iinfo->ii_hinode = hip;
19247+ i = iinfo->ii_bbot + 1;
19248+ hip += i;
19249+ for (; i < nbr; i++, hip++)
19250+ au_hinode_init(hip);
19251+ err = 0;
19252+ }
19253+
19254+ return err;
19255+}
19256+
19257+void au_iinfo_fin(struct inode *inode)
19258+{
19259+ struct au_iinfo *iinfo;
19260+ struct au_hinode *hi;
19261+ struct super_block *sb;
19262+ aufs_bindex_t bindex, bbot;
19263+ const unsigned char unlinked = !inode->i_nlink;
19264+
19265+ AuDebugOn(au_is_bad_inode(inode));
19266+
19267+ sb = inode->i_sb;
19268+ au_lcnt_dec(&au_sbi(sb)->si_ninodes);
19269+ if (si_pid_test(sb))
19270+ au_xino_delete_inode(inode, unlinked);
19271+ else {
19272+ /*
19273+ * it is safe to hide the dependency between sbinfo and
19274+ * sb->s_umount.
19275+ */
19276+ lockdep_off();
19277+ si_noflush_read_lock(sb);
19278+ au_xino_delete_inode(inode, unlinked);
19279+ si_read_unlock(sb);
19280+ lockdep_on();
19281+ }
19282+
19283+ iinfo = au_ii(inode);
19284+ if (iinfo->ii_vdir)
19285+ au_vdir_free(iinfo->ii_vdir);
19286+
19287+ bindex = iinfo->ii_btop;
19288+ if (bindex >= 0) {
19289+ hi = au_hinode(iinfo, bindex);
19290+ bbot = iinfo->ii_bbot;
19291+ while (bindex++ <= bbot) {
19292+ if (hi->hi_inode)
19293+ au_hiput(hi);
19294+ hi++;
19295+ }
19296+ }
19297+ au_kfree_rcu(iinfo->ii_hinode);
19298+ AuRwDestroy(&iinfo->ii_rwsem);
19299+}
19300diff -urN /usr/share/empty/fs/aufs/inode.c linux/fs/aufs/inode.c
19301--- /usr/share/empty/fs/aufs/inode.c 1970-01-01 01:00:00.000000000 +0100
19302+++ linux/fs/aufs/inode.c 2023-02-20 21:05:51.959693785 +0100
19303@@ -0,0 +1,531 @@
19304+// SPDX-License-Identifier: GPL-2.0
19305+/*
19306+ * Copyright (C) 2005-2022 Junjiro R. Okajima
19307+ *
19308+ * This program is free software; you can redistribute it and/or modify
19309+ * it under the terms of the GNU General Public License as published by
19310+ * the Free Software Foundation; either version 2 of the License, or
19311+ * (at your option) any later version.
19312+ *
19313+ * This program is distributed in the hope that it will be useful,
19314+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19315+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19316+ * GNU General Public License for more details.
19317+ *
19318+ * You should have received a copy of the GNU General Public License
19319+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
19320+ */
19321+
19322+/*
19323+ * inode functions
19324+ */
19325+
19326+#include <linux/iversion.h>
19327+#include "aufs.h"
19328+
19329+struct inode *au_igrab(struct inode *inode)
19330+{
19331+ if (inode) {
19332+ AuDebugOn(!atomic_read(&inode->i_count));
19333+ ihold(inode);
19334+ }
19335+ return inode;
19336+}
19337+
19338+static void au_refresh_hinode_attr(struct inode *inode, int do_version)
19339+{
19340+ au_cpup_attr_all(inode, /*force*/0);
19341+ au_update_iigen(inode, /*half*/1);
19342+ if (do_version)
19343+ inode_inc_iversion(inode);
19344+}
19345+
19346+static int au_ii_refresh(struct inode *inode, int *update)
19347+{
19348+ int err, e, nbr;
19349+ umode_t type;
19350+ aufs_bindex_t bindex, new_bindex;
19351+ struct super_block *sb;
19352+ struct au_iinfo *iinfo;
19353+ struct au_hinode *p, *q, tmp;
19354+
19355+ AuDebugOn(au_is_bad_inode(inode));
19356+ IiMustWriteLock(inode);
19357+
19358+ *update = 0;
19359+ sb = inode->i_sb;
19360+ nbr = au_sbbot(sb) + 1;
19361+ type = inode->i_mode & S_IFMT;
19362+ iinfo = au_ii(inode);
19363+ err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
19364+ if (unlikely(err))
19365+ goto out;
19366+
19367+ AuDebugOn(iinfo->ii_btop < 0);
19368+ p = au_hinode(iinfo, iinfo->ii_btop);
19369+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
19370+ bindex++, p++) {
19371+ if (!p->hi_inode)
19372+ continue;
19373+
19374+ AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
19375+ new_bindex = au_br_index(sb, p->hi_id);
19376+ if (new_bindex == bindex)
19377+ continue;
19378+
19379+ if (new_bindex < 0) {
19380+ *update = 1;
19381+ au_hiput(p);
19382+ p->hi_inode = NULL;
19383+ continue;
19384+ }
19385+
19386+ if (new_bindex < iinfo->ii_btop)
19387+ iinfo->ii_btop = new_bindex;
19388+ if (iinfo->ii_bbot < new_bindex)
19389+ iinfo->ii_bbot = new_bindex;
19390+ /* swap two lower inode, and loop again */
19391+ q = au_hinode(iinfo, new_bindex);
19392+ tmp = *q;
19393+ *q = *p;
19394+ *p = tmp;
19395+ if (tmp.hi_inode) {
19396+ bindex--;
19397+ p--;
19398+ }
19399+ }
19400+ au_update_ibrange(inode, /*do_put_zero*/0);
19401+ au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
19402+ e = au_dy_irefresh(inode);
19403+ if (unlikely(e && !err))
19404+ err = e;
19405+
19406+out:
19407+ AuTraceErr(err);
19408+ return err;
19409+}
19410+
19411+void au_refresh_iop(struct inode *inode, int force_getattr)
19412+{
19413+ int type;
19414+ struct au_sbinfo *sbi = au_sbi(inode->i_sb);
19415+ const struct inode_operations *iop
19416+ = force_getattr ? aufs_iop : sbi->si_iop_array;
19417+
19418+ if (inode->i_op == iop)
19419+ return;
19420+
19421+ switch (inode->i_mode & S_IFMT) {
19422+ case S_IFDIR:
19423+ type = AuIop_DIR;
19424+ break;
19425+ case S_IFLNK:
19426+ type = AuIop_SYMLINK;
19427+ break;
19428+ default:
19429+ type = AuIop_OTHER;
19430+ break;
19431+ }
19432+
19433+ inode->i_op = iop + type;
19434+ /* unnecessary smp_wmb() */
19435+}
19436+
19437+int au_refresh_hinode_self(struct inode *inode)
19438+{
19439+ int err, update;
19440+
19441+ err = au_ii_refresh(inode, &update);
19442+ if (!err)
19443+ au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
19444+
19445+ AuTraceErr(err);
19446+ return err;
19447+}
19448+
19449+int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
19450+{
19451+ int err, e, update;
19452+ unsigned int flags;
19453+ umode_t mode;
19454+ aufs_bindex_t bindex, bbot;
19455+ unsigned char isdir;
19456+ struct au_hinode *p;
19457+ struct au_iinfo *iinfo;
19458+
19459+ err = au_ii_refresh(inode, &update);
19460+ if (unlikely(err))
19461+ goto out;
19462+
19463+ update = 0;
19464+ iinfo = au_ii(inode);
19465+ p = au_hinode(iinfo, iinfo->ii_btop);
19466+ mode = (inode->i_mode & S_IFMT);
19467+ isdir = S_ISDIR(mode);
19468+ flags = au_hi_flags(inode, isdir);
19469+ bbot = au_dbbot(dentry);
19470+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
19471+ struct inode *h_i, *h_inode;
19472+ struct dentry *h_d;
19473+
19474+ h_d = au_h_dptr(dentry, bindex);
19475+ if (!h_d || d_is_negative(h_d))
19476+ continue;
19477+
19478+ h_inode = d_inode(h_d);
19479+ AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
19480+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
19481+ h_i = au_h_iptr(inode, bindex);
19482+ if (h_i) {
19483+ if (h_i == h_inode)
19484+ continue;
19485+ err = -EIO;
19486+ break;
19487+ }
19488+ }
19489+ if (bindex < iinfo->ii_btop)
19490+ iinfo->ii_btop = bindex;
19491+ if (iinfo->ii_bbot < bindex)
19492+ iinfo->ii_bbot = bindex;
19493+ au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
19494+ update = 1;
19495+ }
19496+ au_update_ibrange(inode, /*do_put_zero*/0);
19497+ e = au_dy_irefresh(inode);
19498+ if (unlikely(e && !err))
19499+ err = e;
19500+ if (!err)
19501+ au_refresh_hinode_attr(inode, update && isdir);
19502+
19503+out:
19504+ AuTraceErr(err);
19505+ return err;
19506+}
19507+
19508+static int set_inode(struct inode *inode, struct dentry *dentry)
19509+{
19510+ int err;
19511+ unsigned int flags;
19512+ umode_t mode;
19513+ aufs_bindex_t bindex, btop, btail;
19514+ unsigned char isdir;
19515+ struct dentry *h_dentry;
19516+ struct inode *h_inode;
19517+ struct au_iinfo *iinfo;
19518+ const struct inode_operations *iop;
19519+
19520+ IiMustWriteLock(inode);
19521+
19522+ err = 0;
19523+ isdir = 0;
19524+ iop = au_sbi(inode->i_sb)->si_iop_array;
19525+ btop = au_dbtop(dentry);
19526+ h_dentry = au_h_dptr(dentry, btop);
19527+ h_inode = d_inode(h_dentry);
19528+ mode = h_inode->i_mode;
19529+ switch (mode & S_IFMT) {
19530+ case S_IFREG:
19531+ btail = au_dbtail(dentry);
19532+ inode->i_op = iop + AuIop_OTHER;
19533+ inode->i_fop = &aufs_file_fop;
19534+ err = au_dy_iaop(inode, btop, h_inode);
19535+ if (unlikely(err))
19536+ goto out;
19537+ break;
19538+ case S_IFDIR:
19539+ isdir = 1;
19540+ btail = au_dbtaildir(dentry);
19541+ inode->i_op = iop + AuIop_DIR;
19542+ inode->i_fop = &aufs_dir_fop;
19543+ break;
19544+ case S_IFLNK:
19545+ btail = au_dbtail(dentry);
19546+ inode->i_op = iop + AuIop_SYMLINK;
19547+ break;
19548+ case S_IFBLK:
19549+ case S_IFCHR:
19550+ case S_IFIFO:
19551+ case S_IFSOCK:
19552+ btail = au_dbtail(dentry);
19553+ inode->i_op = iop + AuIop_OTHER;
19554+ init_special_inode(inode, mode, h_inode->i_rdev);
19555+ break;
19556+ default:
19557+ AuIOErr("Unknown file type 0%o\n", mode);
19558+ err = -EIO;
19559+ goto out;
19560+ }
19561+
19562+ /* do not set hnotify for whiteouted dirs (SHWH mode) */
19563+ flags = au_hi_flags(inode, isdir);
19564+ if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
19565+ && au_ftest_hi(flags, HNOTIFY)
19566+ && dentry->d_name.len > AUFS_WH_PFX_LEN
19567+ && !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
19568+ au_fclr_hi(flags, HNOTIFY);
19569+ iinfo = au_ii(inode);
19570+ iinfo->ii_btop = btop;
19571+ iinfo->ii_bbot = btail;
19572+ for (bindex = btop; bindex <= btail; bindex++) {
19573+ h_dentry = au_h_dptr(dentry, bindex);
19574+ if (h_dentry)
19575+ au_set_h_iptr(inode, bindex,
19576+ au_igrab(d_inode(h_dentry)), flags);
19577+ }
19578+ au_cpup_attr_all(inode, /*force*/1);
19579+ /*
19580+ * to force calling aufs_get_inode_acl() every time,
19581+ * do not call cache_no_acl() for aufs inode.
19582+ */
19583+
19584+out:
19585+ return err;
19586+}
19587+
19588+/*
19589+ * successful returns with iinfo write_locked
19590+ * minus: errno
19591+ * zero: success, matched
19592+ * plus: no error, but unmatched
19593+ */
19594+static int reval_inode(struct inode *inode, struct dentry *dentry)
19595+{
19596+ int err;
19597+ unsigned int gen, igflags;
19598+ aufs_bindex_t bindex, bbot;
19599+ struct inode *h_inode, *h_dinode;
19600+ struct dentry *h_dentry;
19601+
19602+ /*
19603+ * before this function, if aufs got any iinfo lock, it must be only
19604+ * one, the parent dir.
19605+ * it can happen by UDBA and the obsoleted inode number.
19606+ */
19607+ err = -EIO;
19608+ if (unlikely(inode->i_ino == parent_ino(dentry)))
19609+ goto out;
19610+
19611+ err = 1;
19612+ ii_write_lock_new_child(inode);
19613+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
19614+ h_dinode = d_inode(h_dentry);
19615+ bbot = au_ibbot(inode);
19616+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
19617+ h_inode = au_h_iptr(inode, bindex);
19618+ if (!h_inode || h_inode != h_dinode)
19619+ continue;
19620+
19621+ err = 0;
19622+ gen = au_iigen(inode, &igflags);
19623+ if (gen == au_digen(dentry)
19624+ && !au_ig_ftest(igflags, HALF_REFRESHED))
19625+ break;
19626+
19627+ /* fully refresh inode using dentry */
19628+ err = au_refresh_hinode(inode, dentry);
19629+ if (!err)
19630+ au_update_iigen(inode, /*half*/0);
19631+ break;
19632+ }
19633+
19634+ if (unlikely(err))
19635+ ii_write_unlock(inode);
19636+out:
19637+ return err;
19638+}
19639+
19640+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
19641+ unsigned int d_type, ino_t *ino)
19642+{
19643+ int err, idx;
19644+ const int isnondir = d_type != DT_DIR;
19645+
19646+ /* prevent hardlinked inode number from race condition */
19647+ if (isnondir) {
19648+ err = au_xinondir_enter(sb, bindex, h_ino, &idx);
19649+ if (unlikely(err))
19650+ goto out;
19651+ }
19652+
19653+ err = au_xino_read(sb, bindex, h_ino, ino);
19654+ if (unlikely(err))
19655+ goto out_xinondir;
19656+
19657+ if (!*ino) {
19658+ err = -EIO;
19659+ *ino = au_xino_new_ino(sb);
19660+ if (unlikely(!*ino))
19661+ goto out_xinondir;
19662+ err = au_xino_write(sb, bindex, h_ino, *ino);
19663+ if (unlikely(err))
19664+ goto out_xinondir;
19665+ }
19666+
19667+out_xinondir:
19668+ if (isnondir && idx >= 0)
19669+ au_xinondir_leave(sb, bindex, h_ino, idx);
19670+out:
19671+ return err;
19672+}
19673+
19674+/* successful returns with iinfo write_locked */
19675+/* todo: return with unlocked? */
19676+struct inode *au_new_inode(struct dentry *dentry, int must_new)
19677+{
19678+ struct inode *inode, *h_inode;
19679+ struct dentry *h_dentry;
19680+ struct super_block *sb;
19681+ ino_t h_ino, ino;
19682+ int err, idx, hlinked;
19683+ aufs_bindex_t btop;
19684+
19685+ sb = dentry->d_sb;
19686+ btop = au_dbtop(dentry);
19687+ h_dentry = au_h_dptr(dentry, btop);
19688+ h_inode = d_inode(h_dentry);
19689+ h_ino = h_inode->i_ino;
19690+ hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1;
19691+
19692+new_ino:
19693+ /*
19694+ * stop 'race'-ing between hardlinks under different
19695+ * parents.
19696+ */
19697+ if (hlinked) {
19698+ err = au_xinondir_enter(sb, btop, h_ino, &idx);
19699+ inode = ERR_PTR(err);
19700+ if (unlikely(err))
19701+ goto out;
19702+ }
19703+
19704+ err = au_xino_read(sb, btop, h_ino, &ino);
19705+ inode = ERR_PTR(err);
19706+ if (unlikely(err))
19707+ goto out_xinondir;
19708+
19709+ if (!ino) {
19710+ ino = au_xino_new_ino(sb);
19711+ if (unlikely(!ino)) {
19712+ inode = ERR_PTR(-EIO);
19713+ goto out_xinondir;
19714+ }
19715+ }
19716+
19717+ AuDbg("i%lu\n", (unsigned long)ino);
19718+ inode = au_iget_locked(sb, ino);
19719+ err = PTR_ERR(inode);
19720+ if (IS_ERR(inode))
19721+ goto out_xinondir;
19722+
19723+ AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
19724+ if (inode->i_state & I_NEW) {
19725+ ii_write_lock_new_child(inode);
19726+ err = set_inode(inode, dentry);
19727+ if (!err) {
19728+ unlock_new_inode(inode);
19729+ goto out_xinondir; /* success */
19730+ }
19731+
19732+ /*
19733+ * iget_failed() calls iput(), but we need to call
19734+ * ii_write_unlock() after iget_failed(). so dirty hack for
19735+ * i_count.
19736+ */
19737+ atomic_inc(&inode->i_count);
19738+ iget_failed(inode);
19739+ ii_write_unlock(inode);
19740+ au_xino_write(sb, btop, h_ino, /*ino*/0);
19741+ /* ignore this error */
19742+ goto out_iput;
19743+ } else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
19744+ /*
19745+ * horrible race condition between lookup, readdir and copyup
19746+ * (or something).
19747+ */
19748+ if (hlinked && idx >= 0)
19749+ au_xinondir_leave(sb, btop, h_ino, idx);
19750+ err = reval_inode(inode, dentry);
19751+ if (unlikely(err < 0)) {
19752+ hlinked = 0;
19753+ goto out_iput;
19754+ }
19755+ if (!err)
19756+ goto out; /* success */
19757+ else if (hlinked && idx >= 0) {
19758+ err = au_xinondir_enter(sb, btop, h_ino, &idx);
19759+ if (unlikely(err)) {
19760+ iput(inode);
19761+ inode = ERR_PTR(err);
19762+ goto out;
19763+ }
19764+ }
19765+ }
19766+
19767+ if (unlikely(au_test_fs_unique_ino(h_inode)))
19768+ AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
19769+ " b%d, %s, %pd, hi%lu, i%lu.\n",
19770+ btop, au_sbtype(h_dentry->d_sb), dentry,
19771+ (unsigned long)h_ino, (unsigned long)ino);
19772+ ino = 0;
19773+ err = au_xino_write(sb, btop, h_ino, /*ino*/0);
19774+ if (!err) {
19775+ iput(inode);
19776+ if (hlinked && idx >= 0)
19777+ au_xinondir_leave(sb, btop, h_ino, idx);
19778+ goto new_ino;
19779+ }
19780+
19781+out_iput:
19782+ iput(inode);
19783+ inode = ERR_PTR(err);
19784+out_xinondir:
19785+ if (hlinked && idx >= 0)
19786+ au_xinondir_leave(sb, btop, h_ino, idx);
19787+out:
19788+ return inode;
19789+}
19790+
19791+/* ---------------------------------------------------------------------- */
19792+
19793+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
19794+ struct inode *inode)
19795+{
19796+ int err;
19797+ struct inode *hi;
19798+
19799+ err = au_br_rdonly(au_sbr(sb, bindex));
19800+
19801+ /* pseudo-link after flushed may happen out of bounds */
19802+ if (!err
19803+ && inode
19804+ && au_ibtop(inode) <= bindex
19805+ && bindex <= au_ibbot(inode)) {
19806+ /*
19807+ * permission check is unnecessary since vfsub routine
19808+ * will be called later
19809+ */
19810+ hi = au_h_iptr(inode, bindex);
19811+ if (hi)
19812+ err = IS_IMMUTABLE(hi) ? -EROFS : 0;
19813+ }
19814+
19815+ return err;
19816+}
19817+
19818+int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode,
19819+ int mask)
19820+{
19821+ if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
19822+ return 0;
19823+ return inode_permission(h_userns, h_inode, mask);
19824+}
19825+
19826+int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode,
19827+ int mask)
19828+{
19829+ if (au_test_nfs(h_inode->i_sb)
19830+ && (mask & MAY_WRITE)
19831+ && S_ISDIR(h_inode->i_mode))
19832+ mask |= MAY_READ; /* force permission check */
19833+ return au_test_h_perm(h_userns, h_inode, mask);
19834+}
19835diff -urN /usr/share/empty/fs/aufs/inode.h linux/fs/aufs/inode.h
19836--- /usr/share/empty/fs/aufs/inode.h 1970-01-01 01:00:00.000000000 +0100
19837+++ linux/fs/aufs/inode.h 2023-02-20 21:05:51.959693785 +0100
19838@@ -0,0 +1,707 @@
19839+/* SPDX-License-Identifier: GPL-2.0 */
19840+/*
19841+ * Copyright (C) 2005-2022 Junjiro R. Okajima
19842+ *
19843+ * This program is free software; you can redistribute it and/or modify
19844+ * it under the terms of the GNU General Public License as published by
19845+ * the Free Software Foundation; either version 2 of the License, or
19846+ * (at your option) any later version.
19847+ *
19848+ * This program is distributed in the hope that it will be useful,
19849+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
19850+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19851+ * GNU General Public License for more details.
19852+ *
19853+ * You should have received a copy of the GNU General Public License
19854+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
19855+ */
19856+
19857+/*
19858+ * inode operations
19859+ */
19860+
19861+#ifndef __AUFS_INODE_H__
19862+#define __AUFS_INODE_H__
19863+
19864+#ifdef __KERNEL__
19865+
19866+#include <linux/fsnotify.h>
19867+#include "rwsem.h"
19868+
19869+struct vfsmount;
19870+
19871+struct au_hnotify {
19872+#ifdef CONFIG_AUFS_HNOTIFY
19873+#ifdef CONFIG_AUFS_HFSNOTIFY
19874+ /* never use fsnotify_add_vfsmount_mark() */
19875+ struct fsnotify_mark hn_mark;
19876+#endif
19877+ struct inode *hn_aufs_inode; /* no get/put */
19878+ struct rcu_head rcu;
19879+#endif
19880+} ____cacheline_aligned_in_smp;
19881+
19882+struct au_hinode {
19883+ struct inode *hi_inode;
19884+ aufs_bindex_t hi_id;
19885+#ifdef CONFIG_AUFS_HNOTIFY
19886+ struct au_hnotify *hi_notify;
19887+#endif
19888+
19889+ /* reference to the copied-up whiteout with get/put */
19890+ struct dentry *hi_whdentry;
19891+};
19892+
19893+/* ig_flags */
19894+#define AuIG_HALF_REFRESHED 1
19895+#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
19896+#define au_ig_fset(flags, name) \
19897+ do { (flags) |= AuIG_##name; } while (0)
19898+#define au_ig_fclr(flags, name) \
19899+ do { (flags) &= ~AuIG_##name; } while (0)
19900+
19901+struct au_iigen {
19902+ spinlock_t ig_spin;
19903+ __u32 ig_generation, ig_flags;
19904+};
19905+
19906+struct au_vdir;
19907+struct au_iinfo {
19908+ struct au_iigen ii_generation;
19909+ struct super_block *ii_hsb1; /* no get/put */
19910+
19911+ struct au_rwsem ii_rwsem;
19912+ aufs_bindex_t ii_btop, ii_bbot;
19913+ __u32 ii_higen;
19914+ struct au_hinode *ii_hinode;
19915+ struct au_vdir *ii_vdir;
19916+};
19917+
19918+struct au_icntnr {
19919+ struct au_iinfo iinfo;
19920+ struct inode vfs_inode;
19921+ struct hlist_bl_node plink;
19922+ struct rcu_head rcu;
19923+} ____cacheline_aligned_in_smp;
19924+
19925+/* au_pin flags */
19926+#define AuPin_DI_LOCKED 1
19927+#define AuPin_MNT_WRITE (1 << 1)
19928+#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
19929+#define au_fset_pin(flags, name) \
19930+ do { (flags) |= AuPin_##name; } while (0)
19931+#define au_fclr_pin(flags, name) \
19932+ do { (flags) &= ~AuPin_##name; } while (0)
19933+
19934+struct au_pin {
19935+ /* input */
19936+ struct dentry *dentry;
19937+ unsigned int udba;
19938+ unsigned char lsc_di, lsc_hi, flags;
19939+ aufs_bindex_t bindex;
19940+
19941+ /* output */
19942+ struct dentry *parent;
19943+ struct au_hinode *hdir;
19944+ struct vfsmount *h_mnt;
19945+
19946+ /* temporary unlock/relock for copyup */
19947+ struct dentry *h_dentry, *h_parent;
19948+ struct au_branch *br;
19949+ struct task_struct *task;
19950+};
19951+
19952+void au_pin_hdir_unlock(struct au_pin *p);
19953+int au_pin_hdir_lock(struct au_pin *p);
19954+int au_pin_hdir_relock(struct au_pin *p);
19955+void au_pin_hdir_acquire_nest(struct au_pin *p);
19956+void au_pin_hdir_release(struct au_pin *p);
19957+
19958+/* ---------------------------------------------------------------------- */
19959+
19960+static inline struct au_iinfo *au_ii(struct inode *inode)
19961+{
19962+ BUG_ON(is_bad_inode(inode));
19963+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
19964+}
19965+
19966+/* ---------------------------------------------------------------------- */
19967+
19968+/* inode.c */
19969+struct inode *au_igrab(struct inode *inode);
19970+void au_refresh_iop(struct inode *inode, int force_getattr);
19971+int au_refresh_hinode_self(struct inode *inode);
19972+int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
19973+int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
19974+ unsigned int d_type, ino_t *ino);
19975+struct inode *au_new_inode(struct dentry *dentry, int must_new);
19976+int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
19977+ struct inode *inode);
19978+int au_test_h_perm(struct user_namespace *h_userns, struct inode *h_inode,
19979+ int mask);
19980+int au_test_h_perm_sio(struct user_namespace *h_userns, struct inode *h_inode,
19981+ int mask);
19982+
19983+static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
19984+ ino_t h_ino, unsigned int d_type, ino_t *ino)
19985+{
19986+#ifdef CONFIG_AUFS_SHWH
19987+ return au_ino(sb, bindex, h_ino, d_type, ino);
19988+#else
19989+ return 0;
19990+#endif
19991+}
19992+
19993+/* i_op.c */
19994+enum {
19995+ AuIop_SYMLINK,
19996+ AuIop_DIR,
19997+ AuIop_OTHER,
19998+ AuIop_Last
19999+};
20000+extern struct inode_operations aufs_iop[AuIop_Last], /* not const */
20001+ aufs_iop_nogetattr[AuIop_Last];
20002+
20003+/* au_wr_dir flags */
20004+#define AuWrDir_ADD_ENTRY 1
20005+#define AuWrDir_ISDIR (1 << 1)
20006+#define AuWrDir_TMPFILE (1 << 2)
20007+#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
20008+#define au_fset_wrdir(flags, name) \
20009+ do { (flags) |= AuWrDir_##name; } while (0)
20010+#define au_fclr_wrdir(flags, name) \
20011+ do { (flags) &= ~AuWrDir_##name; } while (0)
20012+
20013+struct au_wr_dir_args {
20014+ aufs_bindex_t force_btgt;
20015+ unsigned char flags;
20016+};
20017+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
20018+ struct au_wr_dir_args *args);
20019+
20020+struct dentry *au_pinned_h_parent(struct au_pin *pin);
20021+void au_pin_init(struct au_pin *pin, struct dentry *dentry,
20022+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
20023+ unsigned int udba, unsigned char flags);
20024+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
20025+ unsigned int udba, unsigned char flags) __must_check;
20026+int au_do_pin(struct au_pin *pin) __must_check;
20027+void au_unpin(struct au_pin *pin);
20028+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
20029+
20030+#define AuIcpup_DID_CPUP 1
20031+#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
20032+#define au_fset_icpup(flags, name) \
20033+ do { (flags) |= AuIcpup_##name; } while (0)
20034+#define au_fclr_icpup(flags, name) \
20035+ do { (flags) &= ~AuIcpup_##name; } while (0)
20036+
20037+struct au_icpup_args {
20038+ unsigned char flags;
20039+ unsigned char pin_flags;
20040+ aufs_bindex_t btgt;
20041+ unsigned int udba;
20042+ struct au_pin pin;
20043+ struct path h_path;
20044+ struct inode *h_inode;
20045+};
20046+
20047+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
20048+ struct au_icpup_args *a);
20049+
20050+int au_h_path_getattr(struct dentry *dentry, struct inode *inode, int force,
20051+ struct path *h_path, int locked);
20052+
20053+/* i_op_add.c */
20054+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
20055+ struct dentry *h_parent, int isdir);
20056+int aufs_mknod(struct user_namespace *userns, struct inode *dir,
20057+ struct dentry *dentry, umode_t mode, dev_t dev);
20058+int aufs_symlink(struct user_namespace *userns, struct inode *dir,
20059+ struct dentry *dentry, const char *symname);
20060+int aufs_create(struct user_namespace *userns, struct inode *dir,
20061+ struct dentry *dentry, umode_t mode, bool want_excl);
20062+struct vfsub_aopen_args;
20063+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
20064+ struct vfsub_aopen_args *args);
20065+int aufs_tmpfile(struct user_namespace *userns, struct inode *dir,
20066+ struct file *file, umode_t mode);
20067+int aufs_link(struct dentry *src_dentry, struct inode *dir,
20068+ struct dentry *dentry);
20069+int aufs_mkdir(struct user_namespace *userns, struct inode *dir,
20070+ struct dentry *dentry, umode_t mode);
20071+
20072+/* i_op_del.c */
20073+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
20074+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
20075+ struct dentry *h_parent, int isdir);
20076+int aufs_unlink(struct inode *dir, struct dentry *dentry);
20077+int aufs_rmdir(struct inode *dir, struct dentry *dentry);
20078+
20079+/* i_op_ren.c */
20080+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
20081+int aufs_rename(struct user_namespace *userns,
20082+ struct inode *_src_dir, struct dentry *_src_dentry,
20083+ struct inode *_dst_dir, struct dentry *_dst_dentry,
20084+ unsigned int _flags);
20085+
20086+/* iinfo.c */
20087+struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
20088+void au_hiput(struct au_hinode *hinode);
20089+void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
20090+ struct dentry *h_wh);
20091+unsigned int au_hi_flags(struct inode *inode, int isdir);
20092+
20093+/* hinode flags */
20094+#define AuHi_XINO 1
20095+#define AuHi_HNOTIFY (1 << 1)
20096+#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
20097+#define au_fset_hi(flags, name) \
20098+ do { (flags) |= AuHi_##name; } while (0)
20099+#define au_fclr_hi(flags, name) \
20100+ do { (flags) &= ~AuHi_##name; } while (0)
20101+
20102+#ifndef CONFIG_AUFS_HNOTIFY
20103+#undef AuHi_HNOTIFY
20104+#define AuHi_HNOTIFY 0
20105+#endif
20106+
20107+void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
20108+ struct inode *h_inode, unsigned int flags);
20109+
20110+void au_update_iigen(struct inode *inode, int half);
20111+void au_update_ibrange(struct inode *inode, int do_put_zero);
20112+
20113+void au_icntnr_init_once(void *_c);
20114+void au_hinode_init(struct au_hinode *hinode);
20115+int au_iinfo_init(struct inode *inode);
20116+void au_iinfo_fin(struct inode *inode);
20117+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
20118+
20119+#ifdef CONFIG_PROC_FS
20120+/* plink.c */
20121+int au_plink_maint(struct super_block *sb, int flags);
20122+struct au_sbinfo;
20123+void au_plink_maint_leave(struct au_sbinfo *sbinfo);
20124+int au_plink_maint_enter(struct super_block *sb);
20125+#ifdef CONFIG_AUFS_DEBUG
20126+void au_plink_list(struct super_block *sb);
20127+#else
20128+AuStubVoid(au_plink_list, struct super_block *sb)
20129+#endif
20130+int au_plink_test(struct inode *inode);
20131+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
20132+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
20133+ struct dentry *h_dentry);
20134+void au_plink_put(struct super_block *sb, int verbose);
20135+void au_plink_clean(struct super_block *sb, int verbose);
20136+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
20137+#else
20138+AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
20139+AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
20140+AuStubInt0(au_plink_maint_enter, struct super_block *sb);
20141+AuStubVoid(au_plink_list, struct super_block *sb);
20142+AuStubInt0(au_plink_test, struct inode *inode);
20143+AuStub(struct dentry *, au_plink_lkup, return NULL,
20144+ struct inode *inode, aufs_bindex_t bindex);
20145+AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
20146+ struct dentry *h_dentry);
20147+AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
20148+AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
20149+AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
20150+#endif /* CONFIG_PROC_FS */
20151+
20152+#ifdef CONFIG_AUFS_XATTR
20153+/* xattr.c */
20154+int au_cpup_xattr(struct path *h_dst, struct path *h_src, int ignore_flags,
20155+ unsigned int verbose);
20156+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
20157+void au_xattr_init(struct super_block *sb);
20158+#else
20159+AuStubInt0(au_cpup_xattr, struct path *h_dst, struct path *h_src,
20160+ int ignore_flags, unsigned int verbose);
20161+AuStubVoid(au_xattr_init, struct super_block *sb);
20162+#endif
20163+
20164+#ifdef CONFIG_FS_POSIX_ACL
20165+struct posix_acl *aufs_get_inode_acl(struct inode *inode, int type, bool rcu);
20166+struct posix_acl *aufs_get_acl(struct user_namespace *userns,
20167+ struct dentry *dentry, int type);
20168+int aufs_set_acl(struct user_namespace *userns, struct dentry *dentry,
20169+ struct posix_acl *acl, int type);
20170+#endif
20171+
20172+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
20173+enum {
20174+ AU_XATTR_SET,
20175+ AU_ACL_SET
20176+};
20177+
20178+struct au_sxattr {
20179+ int type;
20180+ union {
20181+ struct {
20182+ const char *name;
20183+ const void *value;
20184+ size_t size;
20185+ int flags;
20186+ } set;
20187+ struct {
20188+ struct posix_acl *acl;
20189+ int type;
20190+ } acl_set;
20191+ } u;
20192+};
20193+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
20194+ struct au_sxattr *arg);
20195+#endif
20196+
20197+/* ---------------------------------------------------------------------- */
20198+
20199+/* lock subclass for iinfo */
20200+enum {
20201+ AuLsc_II_CHILD, /* child first */
20202+ AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
20203+ AuLsc_II_CHILD3, /* copyup dirs */
20204+ AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
20205+ AuLsc_II_PARENT2,
20206+ AuLsc_II_PARENT3, /* copyup dirs */
20207+ AuLsc_II_NEW_CHILD
20208+};
20209+
20210+/*
20211+ * ii_read_lock_child, ii_write_lock_child,
20212+ * ii_read_lock_child2, ii_write_lock_child2,
20213+ * ii_read_lock_child3, ii_write_lock_child3,
20214+ * ii_read_lock_parent, ii_write_lock_parent,
20215+ * ii_read_lock_parent2, ii_write_lock_parent2,
20216+ * ii_read_lock_parent3, ii_write_lock_parent3,
20217+ * ii_read_lock_new_child, ii_write_lock_new_child,
20218+ */
20219+#define AuReadLockFunc(name, lsc) \
20220+static inline void ii_read_lock_##name(struct inode *i) \
20221+{ \
20222+ au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
20223+}
20224+
20225+#define AuWriteLockFunc(name, lsc) \
20226+static inline void ii_write_lock_##name(struct inode *i) \
20227+{ \
20228+ au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
20229+}
20230+
20231+#define AuRWLockFuncs(name, lsc) \
20232+ AuReadLockFunc(name, lsc) \
20233+ AuWriteLockFunc(name, lsc)
20234+
20235+AuRWLockFuncs(child, CHILD);
20236+AuRWLockFuncs(child2, CHILD2);
20237+AuRWLockFuncs(child3, CHILD3);
20238+AuRWLockFuncs(parent, PARENT);
20239+AuRWLockFuncs(parent2, PARENT2);
20240+AuRWLockFuncs(parent3, PARENT3);
20241+AuRWLockFuncs(new_child, NEW_CHILD);
20242+
20243+#undef AuReadLockFunc
20244+#undef AuWriteLockFunc
20245+#undef AuRWLockFuncs
20246+
20247+#define ii_read_unlock(i) au_rw_read_unlock(&au_ii(i)->ii_rwsem)
20248+#define ii_write_unlock(i) au_rw_write_unlock(&au_ii(i)->ii_rwsem)
20249+#define ii_downgrade_lock(i) au_rw_dgrade_lock(&au_ii(i)->ii_rwsem)
20250+
20251+#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
20252+#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
20253+#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
20254+
20255+/* ---------------------------------------------------------------------- */
20256+
20257+static inline void au_icntnr_init(struct au_icntnr *c)
20258+{
20259+#ifdef CONFIG_AUFS_DEBUG
20260+ c->vfs_inode.i_mode = 0;
20261+#endif
20262+}
20263+
20264+static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
20265+{
20266+ unsigned int gen;
20267+ struct au_iinfo *iinfo;
20268+ struct au_iigen *iigen;
20269+
20270+ iinfo = au_ii(inode);
20271+ iigen = &iinfo->ii_generation;
20272+ spin_lock(&iigen->ig_spin);
20273+ if (igflags)
20274+ *igflags = iigen->ig_flags;
20275+ gen = iigen->ig_generation;
20276+ spin_unlock(&iigen->ig_spin);
20277+
20278+ return gen;
20279+}
20280+
20281+/* tiny test for inode number */
20282+/* tmpfs generation is too rough */
20283+static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
20284+{
20285+ struct au_iinfo *iinfo;
20286+
20287+ iinfo = au_ii(inode);
20288+ AuRwMustAnyLock(&iinfo->ii_rwsem);
20289+ return !(iinfo->ii_hsb1 == h_inode->i_sb
20290+ && iinfo->ii_higen == h_inode->i_generation);
20291+}
20292+
20293+static inline void au_iigen_dec(struct inode *inode)
20294+{
20295+ struct au_iinfo *iinfo;
20296+ struct au_iigen *iigen;
20297+
20298+ iinfo = au_ii(inode);
20299+ iigen = &iinfo->ii_generation;
20300+ spin_lock(&iigen->ig_spin);
20301+ iigen->ig_generation--;
20302+ spin_unlock(&iigen->ig_spin);
20303+}
20304+
20305+static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
20306+{
20307+ int err;
20308+
20309+ err = 0;
20310+ if (unlikely(inode && au_iigen(inode, NULL) != sigen))
20311+ err = -EIO;
20312+
20313+ return err;
20314+}
20315+
20316+/* ---------------------------------------------------------------------- */
20317+
20318+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
20319+ aufs_bindex_t bindex)
20320+{
20321+ return iinfo->ii_hinode + bindex;
20322+}
20323+
20324+static inline int au_is_bad_inode(struct inode *inode)
20325+{
20326+ return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
20327+}
20328+
20329+static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
20330+ aufs_bindex_t bindex)
20331+{
20332+ IiMustAnyLock(inode);
20333+ return au_hinode(au_ii(inode), bindex)->hi_id;
20334+}
20335+
20336+static inline aufs_bindex_t au_ibtop(struct inode *inode)
20337+{
20338+ IiMustAnyLock(inode);
20339+ return au_ii(inode)->ii_btop;
20340+}
20341+
20342+static inline aufs_bindex_t au_ibbot(struct inode *inode)
20343+{
20344+ IiMustAnyLock(inode);
20345+ return au_ii(inode)->ii_bbot;
20346+}
20347+
20348+static inline struct au_vdir *au_ivdir(struct inode *inode)
20349+{
20350+ IiMustAnyLock(inode);
20351+ return au_ii(inode)->ii_vdir;
20352+}
20353+
20354+static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
20355+{
20356+ IiMustAnyLock(inode);
20357+ return au_hinode(au_ii(inode), bindex)->hi_whdentry;
20358+}
20359+
20360+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
20361+{
20362+ IiMustWriteLock(inode);
20363+ au_ii(inode)->ii_btop = bindex;
20364+}
20365+
20366+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
20367+{
20368+ IiMustWriteLock(inode);
20369+ au_ii(inode)->ii_bbot = bindex;
20370+}
20371+
20372+static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
20373+{
20374+ IiMustWriteLock(inode);
20375+ au_ii(inode)->ii_vdir = vdir;
20376+}
20377+
20378+static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
20379+{
20380+ IiMustAnyLock(inode);
20381+ return au_hinode(au_ii(inode), bindex);
20382+}
20383+
20384+/* ---------------------------------------------------------------------- */
20385+
20386+static inline struct dentry *au_pinned_parent(struct au_pin *pin)
20387+{
20388+ if (pin)
20389+ return pin->parent;
20390+ return NULL;
20391+}
20392+
20393+static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
20394+{
20395+ if (pin && pin->hdir)
20396+ return pin->hdir->hi_inode;
20397+ return NULL;
20398+}
20399+
20400+static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
20401+{
20402+ if (pin)
20403+ return pin->hdir;
20404+ return NULL;
20405+}
20406+
20407+static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
20408+{
20409+ if (pin)
20410+ pin->dentry = dentry;
20411+}
20412+
20413+static inline void au_pin_set_parent_lflag(struct au_pin *pin,
20414+ unsigned char lflag)
20415+{
20416+ if (pin) {
20417+ if (lflag)
20418+ au_fset_pin(pin->flags, DI_LOCKED);
20419+ else
20420+ au_fclr_pin(pin->flags, DI_LOCKED);
20421+ }
20422+}
20423+
20424+#if 0 /* reserved */
20425+static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
20426+{
20427+ if (pin) {
20428+ dput(pin->parent);
20429+ pin->parent = dget(parent);
20430+ }
20431+}
20432+#endif
20433+
20434+/* ---------------------------------------------------------------------- */
20435+
20436+struct au_branch;
20437+#ifdef CONFIG_AUFS_HNOTIFY
20438+struct au_hnotify_op {
20439+ void (*ctl)(struct au_hinode *hinode, int do_set);
20440+ int (*alloc)(struct au_hinode *hinode);
20441+
20442+ /*
20443+ * if it returns true, the caller should free hinode->hi_notify,
20444+ * otherwise ->free() frees it.
20445+ */
20446+ int (*free)(struct au_hinode *hinode,
20447+ struct au_hnotify *hn) __must_check;
20448+
20449+ void (*fin)(void);
20450+ int (*init)(void);
20451+
20452+ int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
20453+ void (*fin_br)(struct au_branch *br);
20454+ int (*init_br)(struct au_branch *br, int perm);
20455+};
20456+
20457+/* hnotify.c */
20458+int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
20459+void au_hn_free(struct au_hinode *hinode);
20460+void au_hn_ctl(struct au_hinode *hinode, int do_set);
20461+void au_hn_reset(struct inode *inode, unsigned int flags);
20462+int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
20463+ const struct qstr *h_child_qstr, struct inode *h_child_inode);
20464+int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
20465+int au_hnotify_init_br(struct au_branch *br, int perm);
20466+void au_hnotify_fin_br(struct au_branch *br);
20467+int __init au_hnotify_init(void);
20468+void au_hnotify_fin(void);
20469+
20470+/* hfsnotify.c */
20471+extern const struct au_hnotify_op au_hnotify_op;
20472+
20473+static inline
20474+void au_hn_init(struct au_hinode *hinode)
20475+{
20476+ hinode->hi_notify = NULL;
20477+}
20478+
20479+static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
20480+{
20481+ return hinode->hi_notify;
20482+}
20483+
20484+#else
20485+AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
20486+ struct au_hinode *hinode __maybe_unused,
20487+ struct inode *inode __maybe_unused)
20488+AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
20489+AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
20490+AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
20491+ int do_set __maybe_unused)
20492+AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
20493+ unsigned int flags __maybe_unused)
20494+AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
20495+ struct au_branch *br __maybe_unused,
20496+ int perm __maybe_unused)
20497+AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
20498+ int perm __maybe_unused)
20499+AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
20500+AuStubInt0(__init au_hnotify_init, void)
20501+AuStubVoid(au_hnotify_fin, void)
20502+AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
20503+#endif /* CONFIG_AUFS_HNOTIFY */
20504+
20505+static inline void au_hn_suspend(struct au_hinode *hdir)
20506+{
20507+ au_hn_ctl(hdir, /*do_set*/0);
20508+}
20509+
20510+static inline void au_hn_resume(struct au_hinode *hdir)
20511+{
20512+ au_hn_ctl(hdir, /*do_set*/1);
20513+}
20514+
20515+static inline void au_hn_inode_lock(struct au_hinode *hdir)
20516+{
20517+ inode_lock(hdir->hi_inode);
20518+ au_hn_suspend(hdir);
20519+}
20520+
20521+static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
20522+ unsigned int sc __maybe_unused)
20523+{
20524+ inode_lock_nested(hdir->hi_inode, sc);
20525+ au_hn_suspend(hdir);
20526+}
20527+
20528+#if 0 /* unused */
20529+#include "vfsub.h"
20530+static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir,
20531+ unsigned int sc)
20532+{
20533+ inode_lock_shared_nested(hdir->hi_inode, sc);
20534+ au_hn_suspend(hdir);
20535+}
20536+#endif
20537+
20538+static inline void au_hn_inode_unlock(struct au_hinode *hdir)
20539+{
20540+ au_hn_resume(hdir);
20541+ inode_unlock(hdir->hi_inode);
20542+}
20543+
20544+#endif /* __KERNEL__ */
20545+#endif /* __AUFS_INODE_H__ */
20546diff -urN /usr/share/empty/fs/aufs/ioctl.c linux/fs/aufs/ioctl.c
20547--- /usr/share/empty/fs/aufs/ioctl.c 1970-01-01 01:00:00.000000000 +0100
20548+++ linux/fs/aufs/ioctl.c 2022-11-05 23:02:18.965889284 +0100
20549@@ -0,0 +1,220 @@
20550+// SPDX-License-Identifier: GPL-2.0
20551+/*
20552+ * Copyright (C) 2005-2022 Junjiro R. Okajima
20553+ *
20554+ * This program is free software; you can redistribute it and/or modify
20555+ * it under the terms of the GNU General Public License as published by
20556+ * the Free Software Foundation; either version 2 of the License, or
20557+ * (at your option) any later version.
20558+ *
20559+ * This program is distributed in the hope that it will be useful,
20560+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20561+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20562+ * GNU General Public License for more details.
20563+ *
20564+ * You should have received a copy of the GNU General Public License
20565+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
20566+ */
20567+
20568+/*
20569+ * ioctl
20570+ * plink-management and readdir in userspace.
20571+ * assist the pathconf(3) wrapper library.
20572+ * move-down
20573+ * File-based Hierarchical Storage Management.
20574+ */
20575+
20576+#include <linux/compat.h>
20577+#include <linux/file.h>
20578+#include "aufs.h"
20579+
20580+static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
20581+{
20582+ int err, fd;
20583+ aufs_bindex_t wbi, bindex, bbot;
20584+ struct file *h_file;
20585+ struct super_block *sb;
20586+ struct dentry *root;
20587+ struct au_branch *br;
20588+ struct aufs_wbr_fd wbrfd = {
20589+ .oflags = au_dir_roflags,
20590+ .brid = -1
20591+ };
20592+ const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
20593+ | O_NOATIME | O_CLOEXEC;
20594+
20595+ AuDebugOn(wbrfd.oflags & ~valid);
20596+
20597+ if (arg) {
20598+ err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
20599+ if (unlikely(err)) {
20600+ err = -EFAULT;
20601+ goto out;
20602+ }
20603+
20604+ err = -EINVAL;
20605+ AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
20606+ wbrfd.oflags |= au_dir_roflags;
20607+ AuDbg("0%o\n", wbrfd.oflags);
20608+ if (unlikely(wbrfd.oflags & ~valid))
20609+ goto out;
20610+ }
20611+
20612+ fd = get_unused_fd_flags(0);
20613+ err = fd;
20614+ if (unlikely(fd < 0))
20615+ goto out;
20616+
20617+ h_file = ERR_PTR(-EINVAL);
20618+ wbi = 0;
20619+ br = NULL;
20620+ sb = path->dentry->d_sb;
20621+ root = sb->s_root;
20622+ aufs_read_lock(root, AuLock_IR);
20623+ bbot = au_sbbot(sb);
20624+ if (wbrfd.brid >= 0) {
20625+ wbi = au_br_index(sb, wbrfd.brid);
20626+ if (unlikely(wbi < 0 || wbi > bbot))
20627+ goto out_unlock;
20628+ }
20629+
20630+ h_file = ERR_PTR(-ENOENT);
20631+ br = au_sbr(sb, wbi);
20632+ if (!au_br_writable(br->br_perm)) {
20633+ if (arg)
20634+ goto out_unlock;
20635+
20636+ bindex = wbi + 1;
20637+ wbi = -1;
20638+ for (; bindex <= bbot; bindex++) {
20639+ br = au_sbr(sb, bindex);
20640+ if (au_br_writable(br->br_perm)) {
20641+ wbi = bindex;
20642+ br = au_sbr(sb, wbi);
20643+ break;
20644+ }
20645+ }
20646+ }
20647+ AuDbg("wbi %d\n", wbi);
20648+ if (wbi >= 0)
20649+ h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
20650+ /*force_wr*/0);
20651+
20652+out_unlock:
20653+ aufs_read_unlock(root, AuLock_IR);
20654+ err = PTR_ERR(h_file);
20655+ if (IS_ERR(h_file))
20656+ goto out_fd;
20657+
20658+ au_lcnt_dec(&br->br_nfiles); /* cf. au_h_open() */
20659+ fd_install(fd, h_file);
20660+ err = fd;
20661+ goto out; /* success */
20662+
20663+out_fd:
20664+ put_unused_fd(fd);
20665+out:
20666+ AuTraceErr(err);
20667+ return err;
20668+}
20669+
20670+/* ---------------------------------------------------------------------- */
20671+
20672+long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
20673+{
20674+ long err;
20675+ struct dentry *dentry;
20676+
20677+ switch (cmd) {
20678+ case AUFS_CTL_RDU:
20679+ case AUFS_CTL_RDU_INO:
20680+ err = au_rdu_ioctl(file, cmd, arg);
20681+ break;
20682+
20683+ case AUFS_CTL_WBR_FD:
20684+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
20685+ break;
20686+
20687+ case AUFS_CTL_IBUSY:
20688+ err = au_ibusy_ioctl(file, arg);
20689+ break;
20690+
20691+ case AUFS_CTL_BRINFO:
20692+ err = au_brinfo_ioctl(file, arg);
20693+ break;
20694+
20695+ case AUFS_CTL_FHSM_FD:
20696+ dentry = file->f_path.dentry;
20697+ if (IS_ROOT(dentry))
20698+ err = au_fhsm_fd(dentry->d_sb, arg);
20699+ else
20700+ err = -ENOTTY;
20701+ break;
20702+
20703+ default:
20704+ /* do not call the lower */
20705+ AuDbg("0x%x\n", cmd);
20706+ err = -ENOTTY;
20707+ }
20708+
20709+ AuTraceErr(err);
20710+ return err;
20711+}
20712+
20713+long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
20714+{
20715+ long err;
20716+
20717+ switch (cmd) {
20718+ case AUFS_CTL_MVDOWN:
20719+ err = au_mvdown(file->f_path.dentry, (void __user *)arg);
20720+ break;
20721+
20722+ case AUFS_CTL_WBR_FD:
20723+ err = au_wbr_fd(&file->f_path, (void __user *)arg);
20724+ break;
20725+
20726+ default:
20727+ /* do not call the lower */
20728+ AuDbg("0x%x\n", cmd);
20729+ err = -ENOTTY;
20730+ }
20731+
20732+ AuTraceErr(err);
20733+ return err;
20734+}
20735+
20736+#ifdef CONFIG_COMPAT
20737+long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
20738+ unsigned long arg)
20739+{
20740+ long err;
20741+
20742+ switch (cmd) {
20743+ case AUFS_CTL_RDU:
20744+ case AUFS_CTL_RDU_INO:
20745+ err = au_rdu_compat_ioctl(file, cmd, arg);
20746+ break;
20747+
20748+ case AUFS_CTL_IBUSY:
20749+ err = au_ibusy_compat_ioctl(file, arg);
20750+ break;
20751+
20752+ case AUFS_CTL_BRINFO:
20753+ err = au_brinfo_compat_ioctl(file, arg);
20754+ break;
20755+
20756+ default:
20757+ err = aufs_ioctl_dir(file, cmd, arg);
20758+ }
20759+
20760+ AuTraceErr(err);
20761+ return err;
20762+}
20763+
20764+long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
20765+ unsigned long arg)
20766+{
20767+ return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
20768+}
20769+#endif
20770diff -urN /usr/share/empty/fs/aufs/i_op_add.c linux/fs/aufs/i_op_add.c
20771--- /usr/share/empty/fs/aufs/i_op_add.c 1970-01-01 01:00:00.000000000 +0100
20772+++ linux/fs/aufs/i_op_add.c 2023-02-20 21:05:51.959693785 +0100
20773@@ -0,0 +1,972 @@
20774+// SPDX-License-Identifier: GPL-2.0
20775+/*
20776+ * Copyright (C) 2005-2022 Junjiro R. Okajima
20777+ *
20778+ * This program is free software; you can redistribute it and/or modify
20779+ * it under the terms of the GNU General Public License as published by
20780+ * the Free Software Foundation; either version 2 of the License, or
20781+ * (at your option) any later version.
20782+ *
20783+ * This program is distributed in the hope that it will be useful,
20784+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
20785+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20786+ * GNU General Public License for more details.
20787+ *
20788+ * You should have received a copy of the GNU General Public License
20789+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
20790+ */
20791+
20792+/*
20793+ * inode operations (add entry)
20794+ */
20795+
20796+#include <linux/iversion.h>
20797+#include "aufs.h"
20798+
20799+/*
20800+ * final procedure of adding a new entry, except link(2).
20801+ * remove whiteout, instantiate, copyup the parent dir's times and size
20802+ * and update version.
20803+ * if it failed, re-create the removed whiteout.
20804+ */
20805+static int epilog(struct inode *dir, aufs_bindex_t bindex,
20806+ struct dentry *wh_dentry, struct dentry *dentry)
20807+{
20808+ int err, rerr;
20809+ aufs_bindex_t bwh;
20810+ struct path h_path;
20811+ struct super_block *sb;
20812+ struct inode *inode, *h_dir;
20813+ struct dentry *wh;
20814+
20815+ bwh = -1;
20816+ sb = dir->i_sb;
20817+ if (wh_dentry) {
20818+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
20819+ IMustLock(h_dir);
20820+ AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
20821+ bwh = au_dbwh(dentry);
20822+ h_path.dentry = wh_dentry;
20823+ h_path.mnt = au_sbr_mnt(sb, bindex);
20824+ err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
20825+ dentry);
20826+ if (unlikely(err))
20827+ goto out;
20828+ }
20829+
20830+ inode = au_new_inode(dentry, /*must_new*/1);
20831+ if (!IS_ERR(inode)) {
20832+ d_instantiate(dentry, inode);
20833+ dir = d_inode(dentry->d_parent); /* dir inode is locked */
20834+ IMustLock(dir);
20835+ au_dir_ts(dir, bindex);
20836+ inode_inc_iversion(dir);
20837+ au_fhsm_wrote(sb, bindex, /*force*/0);
20838+ return 0; /* success */
20839+ }
20840+
20841+ err = PTR_ERR(inode);
20842+ if (!wh_dentry)
20843+ goto out;
20844+
20845+ /* revert */
20846+ /* dir inode is locked */
20847+ wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
20848+ rerr = PTR_ERR(wh);
20849+ if (IS_ERR(wh)) {
20850+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
20851+ dentry, err, rerr);
20852+ err = -EIO;
20853+ } else
20854+ dput(wh);
20855+
20856+out:
20857+ return err;
20858+}
20859+
20860+static int au_d_may_add(struct dentry *dentry)
20861+{
20862+ int err;
20863+
20864+ err = 0;
20865+ if (unlikely(d_unhashed(dentry)))
20866+ err = -ENOENT;
20867+ if (unlikely(d_really_is_positive(dentry)))
20868+ err = -EEXIST;
20869+ return err;
20870+}
20871+
20872+/*
20873+ * simple tests for the adding inode operations.
20874+ * following the checks in vfs, plus the parent-child relationship.
20875+ */
20876+int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
20877+ struct dentry *h_parent, int isdir)
20878+{
20879+ int err;
20880+ umode_t h_mode;
20881+ struct dentry *h_dentry;
20882+ struct inode *h_inode;
20883+
20884+ err = -ENAMETOOLONG;
20885+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
20886+ goto out;
20887+
20888+ h_dentry = au_h_dptr(dentry, bindex);
20889+ if (d_really_is_negative(dentry)) {
20890+ err = -EEXIST;
20891+ if (unlikely(d_is_positive(h_dentry)))
20892+ goto out;
20893+ } else {
20894+ /* rename(2) case */
20895+ err = -EIO;
20896+ if (unlikely(d_is_negative(h_dentry)))
20897+ goto out;
20898+ h_inode = d_inode(h_dentry);
20899+ if (unlikely(!h_inode->i_nlink))
20900+ goto out;
20901+
20902+ h_mode = h_inode->i_mode;
20903+ if (!isdir) {
20904+ err = -EISDIR;
20905+ if (unlikely(S_ISDIR(h_mode)))
20906+ goto out;
20907+ } else if (unlikely(!S_ISDIR(h_mode))) {
20908+ err = -ENOTDIR;
20909+ goto out;
20910+ }
20911+ }
20912+
20913+ err = 0;
20914+ /* expected parent dir is locked */
20915+ if (unlikely(h_parent != h_dentry->d_parent))
20916+ err = -EIO;
20917+
20918+out:
20919+ AuTraceErr(err);
20920+ return err;
20921+}
20922+
20923+/*
20924+ * initial procedure of adding a new entry.
20925+ * prepare writable branch and the parent dir, lock it,
20926+ * and lookup whiteout for the new entry.
20927+ */
20928+static struct dentry*
20929+lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
20930+ struct dentry *src_dentry, struct au_pin *pin,
20931+ struct au_wr_dir_args *wr_dir_args)
20932+{
20933+ struct dentry *wh_dentry, *h_parent;
20934+ struct super_block *sb;
20935+ struct au_branch *br;
20936+ int err;
20937+ unsigned int udba;
20938+ aufs_bindex_t bcpup;
20939+
20940+ AuDbg("%pd\n", dentry);
20941+
20942+ err = au_wr_dir(dentry, src_dentry, wr_dir_args);
20943+ bcpup = err;
20944+ wh_dentry = ERR_PTR(err);
20945+ if (unlikely(err < 0))
20946+ goto out;
20947+
20948+ sb = dentry->d_sb;
20949+ udba = au_opt_udba(sb);
20950+ err = au_pin(pin, dentry, bcpup, udba,
20951+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
20952+ wh_dentry = ERR_PTR(err);
20953+ if (unlikely(err))
20954+ goto out;
20955+
20956+ h_parent = au_pinned_h_parent(pin);
20957+ if (udba != AuOpt_UDBA_NONE
20958+ && au_dbtop(dentry) == bcpup)
20959+ err = au_may_add(dentry, bcpup, h_parent,
20960+ au_ftest_wrdir(wr_dir_args->flags, ISDIR));
20961+ else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
20962+ err = -ENAMETOOLONG;
20963+ wh_dentry = ERR_PTR(err);
20964+ if (unlikely(err))
20965+ goto out_unpin;
20966+
20967+ br = au_sbr(sb, bcpup);
20968+ if (dt) {
20969+ struct path tmp = {
20970+ .dentry = h_parent,
20971+ .mnt = au_br_mnt(br)
20972+ };
20973+ au_dtime_store(dt, au_pinned_parent(pin), &tmp);
20974+ }
20975+
20976+ wh_dentry = NULL;
20977+ if (bcpup != au_dbwh(dentry))
20978+ goto out; /* success */
20979+
20980+ /*
20981+ * ENAMETOOLONG here means that if we allowed create such name, then it
20982+ * would not be able to removed in the future. So we don't allow such
20983+ * name here and we don't handle ENAMETOOLONG differently here.
20984+ */
20985+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
20986+
20987+out_unpin:
20988+ if (IS_ERR(wh_dentry))
20989+ au_unpin(pin);
20990+out:
20991+ return wh_dentry;
20992+}
20993+
20994+/* ---------------------------------------------------------------------- */
20995+
20996+enum { Mknod, Symlink, Creat };
20997+struct simple_arg {
20998+ int type;
20999+ union {
21000+ struct {
21001+ umode_t mode;
21002+ bool want_excl;
21003+ bool try_aopen;
21004+ struct vfsub_aopen_args *aopen;
21005+ } c;
21006+ struct {
21007+ const char *symname;
21008+ } s;
21009+ struct {
21010+ umode_t mode;
21011+ dev_t dev;
21012+ } m;
21013+ } u;
21014+};
21015+
21016+static int add_simple(struct inode *dir, struct dentry *dentry,
21017+ struct simple_arg *arg)
21018+{
21019+ int err, rerr;
21020+ aufs_bindex_t btop;
21021+ unsigned char created;
21022+ const unsigned char try_aopen
21023+ = (arg->type == Creat && arg->u.c.try_aopen);
21024+ struct vfsub_aopen_args *aopen = arg->u.c.aopen;
21025+ struct dentry *wh_dentry, *parent;
21026+ struct inode *h_dir;
21027+ struct super_block *sb;
21028+ struct au_branch *br;
21029+ /* to reduce stack size */
21030+ struct {
21031+ struct au_dtime dt;
21032+ struct au_pin pin;
21033+ struct path h_path;
21034+ struct au_wr_dir_args wr_dir_args;
21035+ } *a;
21036+
21037+ AuDbg("%pd\n", dentry);
21038+ IMustLock(dir);
21039+
21040+ err = -ENOMEM;
21041+ a = kmalloc(sizeof(*a), GFP_NOFS);
21042+ if (unlikely(!a))
21043+ goto out;
21044+ a->wr_dir_args.force_btgt = -1;
21045+ a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
21046+
21047+ parent = dentry->d_parent; /* dir inode is locked */
21048+ if (!try_aopen) {
21049+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
21050+ if (unlikely(err))
21051+ goto out_free;
21052+ }
21053+ err = au_d_may_add(dentry);
21054+ if (unlikely(err))
21055+ goto out_unlock;
21056+ if (!try_aopen)
21057+ di_write_lock_parent(parent);
21058+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
21059+ &a->pin, &a->wr_dir_args);
21060+ err = PTR_ERR(wh_dentry);
21061+ if (IS_ERR(wh_dentry))
21062+ goto out_parent;
21063+
21064+ btop = au_dbtop(dentry);
21065+ sb = dentry->d_sb;
21066+ br = au_sbr(sb, btop);
21067+ a->h_path.dentry = au_h_dptr(dentry, btop);
21068+ a->h_path.mnt = au_br_mnt(br);
21069+ h_dir = au_pinned_h_dir(&a->pin);
21070+ switch (arg->type) {
21071+ case Creat:
21072+ if (!try_aopen || !h_dir->i_op->atomic_open) {
21073+ err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
21074+ arg->u.c.want_excl);
21075+ created = !err;
21076+ if (!err && try_aopen)
21077+ aopen->file->f_mode |= FMODE_CREATED;
21078+ } else {
21079+ aopen->br = br;
21080+ err = vfsub_atomic_open(h_dir, a->h_path.dentry, aopen);
21081+ AuDbg("err %d\n", err);
21082+ AuDbgFile(aopen->file);
21083+ created = err >= 0
21084+ && !!(aopen->file->f_mode & FMODE_CREATED);
21085+ }
21086+ break;
21087+ case Symlink:
21088+ err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
21089+ created = !err;
21090+ break;
21091+ case Mknod:
21092+ err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
21093+ arg->u.m.dev);
21094+ created = !err;
21095+ break;
21096+ default:
21097+ BUG();
21098+ }
21099+ if (unlikely(err < 0))
21100+ goto out_unpin;
21101+
21102+ err = epilog(dir, btop, wh_dentry, dentry);
21103+ if (!err)
21104+ goto out_unpin; /* success */
21105+
21106+ /* revert */
21107+ if (created /* && d_is_positive(a->h_path.dentry) */) {
21108+ /* no delegation since it is just created */
21109+ rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
21110+ /*force*/0);
21111+ if (rerr) {
21112+ AuIOErr("%pd revert failure(%d, %d)\n",
21113+ dentry, err, rerr);
21114+ err = -EIO;
21115+ }
21116+ au_dtime_revert(&a->dt);
21117+ }
21118+ if (try_aopen && h_dir->i_op->atomic_open
21119+ && (aopen->file->f_mode & FMODE_OPENED))
21120+ /* aopen->file is still opened */
21121+ au_lcnt_dec(&aopen->br->br_nfiles);
21122+
21123+out_unpin:
21124+ au_unpin(&a->pin);
21125+ dput(wh_dentry);
21126+out_parent:
21127+ if (!try_aopen)
21128+ di_write_unlock(parent);
21129+out_unlock:
21130+ if (unlikely(err)) {
21131+ au_update_dbtop(dentry);
21132+ d_drop(dentry);
21133+ }
21134+ if (!try_aopen)
21135+ aufs_read_unlock(dentry, AuLock_DW);
21136+out_free:
21137+ au_kfree_rcu(a);
21138+out:
21139+ return err;
21140+}
21141+
21142+int aufs_mknod(struct user_namespace *userns, struct inode *dir,
21143+ struct dentry *dentry, umode_t mode, dev_t dev)
21144+{
21145+ struct simple_arg arg = {
21146+ .type = Mknod,
21147+ .u.m = {
21148+ .mode = mode,
21149+ .dev = dev
21150+ }
21151+ };
21152+ return add_simple(dir, dentry, &arg);
21153+}
21154+
21155+int aufs_symlink(struct user_namespace *userns, struct inode *dir,
21156+ struct dentry *dentry, const char *symname)
21157+{
21158+ struct simple_arg arg = {
21159+ .type = Symlink,
21160+ .u.s.symname = symname
21161+ };
21162+ return add_simple(dir, dentry, &arg);
21163+}
21164+
21165+int aufs_create(struct user_namespace *userns, struct inode *dir,
21166+ struct dentry *dentry, umode_t mode, bool want_excl)
21167+{
21168+ struct simple_arg arg = {
21169+ .type = Creat,
21170+ .u.c = {
21171+ .mode = mode,
21172+ .want_excl = want_excl
21173+ }
21174+ };
21175+ return add_simple(dir, dentry, &arg);
21176+}
21177+
21178+int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
21179+ struct vfsub_aopen_args *aopen_args)
21180+{
21181+ struct simple_arg arg = {
21182+ .type = Creat,
21183+ .u.c = {
21184+ .mode = aopen_args->create_mode,
21185+ .want_excl = aopen_args->open_flag & O_EXCL,
21186+ .try_aopen = true,
21187+ .aopen = aopen_args
21188+ }
21189+ };
21190+ return add_simple(dir, dentry, &arg);
21191+}
21192+
21193+int aufs_tmpfile(struct user_namespace *userns, struct inode *dir,
21194+ struct file *file, umode_t mode)
21195+{
21196+ int err;
21197+ aufs_bindex_t bindex;
21198+ struct path h_ppath;
21199+ struct super_block *sb;
21200+ struct au_branch *br;
21201+ struct dentry *dentry, *parent, *h_parent, *h_dentry;
21202+ struct inode *h_dir, *inode;
21203+ struct vfsmount *h_mnt;
21204+ struct user_namespace *h_userns;
21205+ struct file *h_file;
21206+ struct au_wr_dir_args wr_dir_args = {
21207+ .force_btgt = -1,
21208+ .flags = AuWrDir_TMPFILE
21209+ };
21210+
21211+ /* copy-up may happen */
21212+ inode_lock(dir);
21213+
21214+ h_file = NULL;
21215+ sb = dir->i_sb;
21216+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
21217+ if (unlikely(err))
21218+ goto out;
21219+
21220+ dentry = file->f_path.dentry;
21221+ err = au_di_init(dentry);
21222+ if (unlikely(err))
21223+ goto out_si;
21224+
21225+ err = -EBUSY;
21226+ parent = d_find_any_alias(dir);
21227+ AuDebugOn(!parent);
21228+ di_write_lock_parent(parent);
21229+ if (unlikely(d_inode(parent) != dir))
21230+ goto out_parent;
21231+
21232+ err = au_digen_test(parent, au_sigen(sb));
21233+ if (unlikely(err))
21234+ goto out_parent;
21235+
21236+ bindex = au_dbtop(parent);
21237+ au_set_dbtop(dentry, bindex);
21238+ au_set_dbbot(dentry, bindex);
21239+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
21240+ bindex = err;
21241+ if (unlikely(err < 0))
21242+ goto out_parent;
21243+
21244+ err = -EOPNOTSUPP;
21245+ h_dir = au_h_iptr(dir, bindex);
21246+ if (unlikely(!h_dir->i_op->tmpfile))
21247+ goto out_parent;
21248+
21249+ br = au_sbr(sb, bindex);
21250+ h_mnt = au_br_mnt(br);
21251+ err = vfsub_mnt_want_write(h_mnt);
21252+ if (unlikely(err))
21253+ goto out_parent;
21254+
21255+ h_userns = mnt_user_ns(h_mnt);
21256+ h_parent = au_h_dptr(parent, bindex);
21257+ h_ppath.mnt = h_mnt;
21258+ h_ppath.dentry = h_parent;
21259+ h_file = vfs_tmpfile_open(h_userns, &h_ppath, mode, /*open_flag*/0,
21260+ current_cred());
21261+ if (IS_ERR(h_file)) {
21262+ err = PTR_ERR(h_file);
21263+ h_file = NULL;
21264+ goto out_mnt;
21265+ }
21266+
21267+ h_dentry = h_file->f_path.dentry;
21268+ au_set_dbtop(dentry, bindex);
21269+ au_set_dbbot(dentry, bindex);
21270+ au_set_h_dptr(dentry, bindex, dget(h_dentry));
21271+ inode = au_new_inode(dentry, /*must_new*/1);
21272+ if (IS_ERR(inode)) {
21273+ err = PTR_ERR(inode);
21274+ au_set_h_dptr(dentry, bindex, NULL);
21275+ au_set_dbtop(dentry, -1);
21276+ au_set_dbbot(dentry, -1);
21277+ goto out_h_file;
21278+ }
21279+
21280+ if (!inode->i_nlink)
21281+ set_nlink(inode, 1);
21282+ d_tmpfile(file, inode);
21283+ au_di(dentry)->di_tmpfile = 1;
21284+ get_file(h_file);
21285+ au_di(dentry)->di_htmpfile = h_file;
21286+
21287+ /* update without i_mutex */
21288+ if (au_ibtop(dir) == au_dbtop(dentry))
21289+ au_cpup_attr_timesizes(dir);
21290+
21291+out_h_file:
21292+ fput(h_file);
21293+out_mnt:
21294+ vfsub_mnt_drop_write(h_mnt);
21295+out_parent:
21296+ di_write_unlock(parent);
21297+ dput(parent);
21298+ di_write_unlock(dentry);
21299+ if (!err)
21300+ goto out_si;
21301+ if (h_file)
21302+ fput(h_file);
21303+ au_di(dentry)->di_htmpfile = NULL;
21304+ au_di_fin(dentry);
21305+ dentry->d_fsdata = NULL;
21306+out_si:
21307+ si_read_unlock(sb);
21308+ if (!err && h_file) {
21309+ /* finally... */
21310+ err = finish_open_simple(file, err);
21311+ if (!err)
21312+ au_lcnt_inc(&br->br_nfiles);
21313+ else {
21314+ fput(h_file);
21315+ au_di(dentry)->di_htmpfile = NULL;
21316+ au_di_fin(dentry);
21317+ dentry->d_fsdata = NULL;
21318+ }
21319+ }
21320+out:
21321+ inode_unlock(dir);
21322+ AuTraceErr(err);
21323+ return err;
21324+}
21325+
21326+/* ---------------------------------------------------------------------- */
21327+
21328+struct au_link_args {
21329+ aufs_bindex_t bdst, bsrc;
21330+ struct au_pin pin;
21331+ struct path h_path;
21332+ struct dentry *src_parent, *parent;
21333+};
21334+
21335+static int au_cpup_before_link(struct dentry *src_dentry,
21336+ struct au_link_args *a)
21337+{
21338+ int err;
21339+ struct dentry *h_src_dentry;
21340+ struct au_cp_generic cpg = {
21341+ .dentry = src_dentry,
21342+ .bdst = a->bdst,
21343+ .bsrc = a->bsrc,
21344+ .len = -1,
21345+ .pin = &a->pin,
21346+ .flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
21347+ };
21348+
21349+ di_read_lock_parent(a->src_parent, AuLock_IR);
21350+ err = au_test_and_cpup_dirs(src_dentry, a->bdst);
21351+ if (unlikely(err))
21352+ goto out;
21353+
21354+ h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
21355+ err = au_pin(&a->pin, src_dentry, a->bdst,
21356+ au_opt_udba(src_dentry->d_sb),
21357+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21358+ if (unlikely(err))
21359+ goto out;
21360+
21361+ err = au_sio_cpup_simple(&cpg);
21362+ au_unpin(&a->pin);
21363+
21364+out:
21365+ di_read_unlock(a->src_parent, AuLock_IR);
21366+ return err;
21367+}
21368+
21369+static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
21370+ struct au_link_args *a)
21371+{
21372+ int err;
21373+ unsigned char plink;
21374+ aufs_bindex_t bbot;
21375+ struct dentry *h_src_dentry;
21376+ struct inode *h_inode, *inode, *delegated;
21377+ struct super_block *sb;
21378+ struct file *h_file;
21379+
21380+ plink = 0;
21381+ h_inode = NULL;
21382+ sb = src_dentry->d_sb;
21383+ inode = d_inode(src_dentry);
21384+ if (au_ibtop(inode) <= a->bdst)
21385+ h_inode = au_h_iptr(inode, a->bdst);
21386+ if (!h_inode || !h_inode->i_nlink) {
21387+ /* copyup src_dentry as the name of dentry. */
21388+ bbot = au_dbbot(dentry);
21389+ if (bbot < a->bsrc)
21390+ au_set_dbbot(dentry, a->bsrc);
21391+ au_set_h_dptr(dentry, a->bsrc,
21392+ dget(au_h_dptr(src_dentry, a->bsrc)));
21393+ dget(a->h_path.dentry);
21394+ au_set_h_dptr(dentry, a->bdst, NULL);
21395+ AuDbg("temporary d_inode...\n");
21396+ spin_lock(&dentry->d_lock);
21397+ dentry->d_inode = d_inode(src_dentry); /* tmp */
21398+ spin_unlock(&dentry->d_lock);
21399+ h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
21400+ if (IS_ERR(h_file))
21401+ err = PTR_ERR(h_file);
21402+ else {
21403+ struct au_cp_generic cpg = {
21404+ .dentry = dentry,
21405+ .bdst = a->bdst,
21406+ .bsrc = -1,
21407+ .len = -1,
21408+ .pin = &a->pin,
21409+ .flags = AuCpup_KEEPLINO
21410+ };
21411+ err = au_sio_cpup_simple(&cpg);
21412+ au_h_open_post(dentry, a->bsrc, h_file);
21413+ if (!err) {
21414+ dput(a->h_path.dentry);
21415+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
21416+ } else
21417+ au_set_h_dptr(dentry, a->bdst,
21418+ a->h_path.dentry);
21419+ }
21420+ spin_lock(&dentry->d_lock);
21421+ dentry->d_inode = NULL; /* restore */
21422+ spin_unlock(&dentry->d_lock);
21423+ AuDbg("temporary d_inode...done\n");
21424+ au_set_h_dptr(dentry, a->bsrc, NULL);
21425+ au_set_dbbot(dentry, bbot);
21426+ } else {
21427+ /* the inode of src_dentry already exists on a.bdst branch */
21428+ h_src_dentry = d_find_alias(h_inode);
21429+ if (!h_src_dentry && au_plink_test(inode)) {
21430+ plink = 1;
21431+ h_src_dentry = au_plink_lkup(inode, a->bdst);
21432+ err = PTR_ERR(h_src_dentry);
21433+ if (IS_ERR(h_src_dentry))
21434+ goto out;
21435+
21436+ if (unlikely(d_is_negative(h_src_dentry))) {
21437+ dput(h_src_dentry);
21438+ h_src_dentry = NULL;
21439+ }
21440+
21441+ }
21442+ if (h_src_dentry) {
21443+ delegated = NULL;
21444+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
21445+ &a->h_path, &delegated);
21446+ if (unlikely(err == -EWOULDBLOCK)) {
21447+ pr_warn("cannot retry for NFSv4 delegation"
21448+ " for an internal link\n");
21449+ iput(delegated);
21450+ }
21451+ dput(h_src_dentry);
21452+ } else {
21453+ AuIOErr("no dentry found for hi%lu on b%d\n",
21454+ h_inode->i_ino, a->bdst);
21455+ err = -EIO;
21456+ }
21457+ }
21458+
21459+ if (!err && !plink)
21460+ au_plink_append(inode, a->bdst, a->h_path.dentry);
21461+
21462+out:
21463+ AuTraceErr(err);
21464+ return err;
21465+}
21466+
21467+int aufs_link(struct dentry *src_dentry, struct inode *dir,
21468+ struct dentry *dentry)
21469+{
21470+ int err, rerr;
21471+ struct au_dtime dt;
21472+ struct au_link_args *a;
21473+ struct dentry *wh_dentry, *h_src_dentry;
21474+ struct inode *inode, *delegated;
21475+ struct super_block *sb;
21476+ struct au_wr_dir_args wr_dir_args = {
21477+ /* .force_btgt = -1, */
21478+ .flags = AuWrDir_ADD_ENTRY
21479+ };
21480+
21481+ IMustLock(dir);
21482+ inode = d_inode(src_dentry);
21483+ IMustLock(inode);
21484+
21485+ err = -ENOMEM;
21486+ a = kzalloc(sizeof(*a), GFP_NOFS);
21487+ if (unlikely(!a))
21488+ goto out;
21489+
21490+ a->parent = dentry->d_parent; /* dir inode is locked */
21491+ err = aufs_read_and_write_lock2(dentry, src_dentry,
21492+ AuLock_NOPLM | AuLock_GEN);
21493+ if (unlikely(err))
21494+ goto out_kfree;
21495+ err = au_d_linkable(src_dentry);
21496+ if (unlikely(err))
21497+ goto out_unlock;
21498+ err = au_d_may_add(dentry);
21499+ if (unlikely(err))
21500+ goto out_unlock;
21501+
21502+ a->src_parent = dget_parent(src_dentry);
21503+ wr_dir_args.force_btgt = au_ibtop(inode);
21504+
21505+ di_write_lock_parent(a->parent);
21506+ wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
21507+ wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
21508+ &wr_dir_args);
21509+ err = PTR_ERR(wh_dentry);
21510+ if (IS_ERR(wh_dentry))
21511+ goto out_parent;
21512+
21513+ err = 0;
21514+ sb = dentry->d_sb;
21515+ a->bdst = au_dbtop(dentry);
21516+ a->h_path.dentry = au_h_dptr(dentry, a->bdst);
21517+ a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
21518+ a->bsrc = au_ibtop(inode);
21519+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
21520+ if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
21521+ h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
21522+ if (!h_src_dentry) {
21523+ a->bsrc = au_dbtop(src_dentry);
21524+ h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
21525+ AuDebugOn(!h_src_dentry);
21526+ } else if (IS_ERR(h_src_dentry)) {
21527+ err = PTR_ERR(h_src_dentry);
21528+ goto out_parent;
21529+ }
21530+
21531+ /*
21532+ * aufs doesn't touch the credential so
21533+ * security_dentry_create_files_as() is unnecessary.
21534+ */
21535+ if (au_opt_test(au_mntflags(sb), PLINK)) {
21536+ if (a->bdst < a->bsrc
21537+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
21538+ err = au_cpup_or_link(src_dentry, dentry, a);
21539+ else {
21540+ delegated = NULL;
21541+ err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
21542+ &a->h_path, &delegated);
21543+ if (unlikely(err == -EWOULDBLOCK)) {
21544+ pr_warn("cannot retry for NFSv4 delegation"
21545+ " for an internal link\n");
21546+ iput(delegated);
21547+ }
21548+ }
21549+ dput(h_src_dentry);
21550+ } else {
21551+ /*
21552+ * copyup src_dentry to the branch we process,
21553+ * and then link(2) to it.
21554+ */
21555+ dput(h_src_dentry);
21556+ if (a->bdst < a->bsrc
21557+ /* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
21558+ au_unpin(&a->pin);
21559+ di_write_unlock(a->parent);
21560+ err = au_cpup_before_link(src_dentry, a);
21561+ di_write_lock_parent(a->parent);
21562+ if (!err)
21563+ err = au_pin(&a->pin, dentry, a->bdst,
21564+ au_opt_udba(sb),
21565+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
21566+ if (unlikely(err))
21567+ goto out_wh;
21568+ }
21569+ if (!err) {
21570+ h_src_dentry = au_h_dptr(src_dentry, a->bdst);
21571+ err = -ENOENT;
21572+ if (h_src_dentry && d_is_positive(h_src_dentry)) {
21573+ delegated = NULL;
21574+ err = vfsub_link(h_src_dentry,
21575+ au_pinned_h_dir(&a->pin),
21576+ &a->h_path, &delegated);
21577+ if (unlikely(err == -EWOULDBLOCK)) {
21578+ pr_warn("cannot retry"
21579+ " for NFSv4 delegation"
21580+ " for an internal link\n");
21581+ iput(delegated);
21582+ }
21583+ }
21584+ }
21585+ }
21586+ if (unlikely(err))
21587+ goto out_unpin;
21588+
21589+ if (wh_dentry) {
21590+ a->h_path.dentry = wh_dentry;
21591+ err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
21592+ dentry);
21593+ if (unlikely(err))
21594+ goto out_revert;
21595+ }
21596+
21597+ au_dir_ts(dir, a->bdst);
21598+ inode_inc_iversion(dir);
21599+ inc_nlink(inode);
21600+ inode->i_ctime = dir->i_ctime;
21601+ d_instantiate(dentry, au_igrab(inode));
21602+ if (d_unhashed(a->h_path.dentry))
21603+ /* some filesystem calls d_drop() */
21604+ d_drop(dentry);
21605+ /* some filesystems consume an inode even hardlink */
21606+ au_fhsm_wrote(sb, a->bdst, /*force*/0);
21607+ goto out_unpin; /* success */
21608+
21609+out_revert:
21610+ /* no delegation since it is just created */
21611+ rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
21612+ /*delegated*/NULL, /*force*/0);
21613+ if (unlikely(rerr)) {
21614+ AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
21615+ err = -EIO;
21616+ }
21617+ au_dtime_revert(&dt);
21618+out_unpin:
21619+ au_unpin(&a->pin);
21620+out_wh:
21621+ dput(wh_dentry);
21622+out_parent:
21623+ di_write_unlock(a->parent);
21624+ dput(a->src_parent);
21625+out_unlock:
21626+ if (unlikely(err)) {
21627+ au_update_dbtop(dentry);
21628+ d_drop(dentry);
21629+ }
21630+ aufs_read_and_write_unlock2(dentry, src_dentry);
21631+out_kfree:
21632+ au_kfree_rcu(a);
21633+out:
21634+ AuTraceErr(err);
21635+ return err;
21636+}
21637+
21638+int aufs_mkdir(struct user_namespace *userns, struct inode *dir,
21639+ struct dentry *dentry, umode_t mode)
21640+{
21641+ int err, rerr;
21642+ aufs_bindex_t bindex;
21643+ unsigned char diropq;
21644+ struct path h_path;
21645+ struct dentry *wh_dentry, *parent, *opq_dentry;
21646+ struct inode *h_inode;
21647+ struct super_block *sb;
21648+ struct {
21649+ struct au_pin pin;
21650+ struct au_dtime dt;
21651+ } *a; /* reduce the stack usage */
21652+ struct au_wr_dir_args wr_dir_args = {
21653+ .force_btgt = -1,
21654+ .flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
21655+ };
21656+
21657+ IMustLock(dir);
21658+
21659+ err = -ENOMEM;
21660+ a = kmalloc(sizeof(*a), GFP_NOFS);
21661+ if (unlikely(!a))
21662+ goto out;
21663+
21664+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
21665+ if (unlikely(err))
21666+ goto out_free;
21667+ err = au_d_may_add(dentry);
21668+ if (unlikely(err))
21669+ goto out_unlock;
21670+
21671+ parent = dentry->d_parent; /* dir inode is locked */
21672+ di_write_lock_parent(parent);
21673+ wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
21674+ &a->pin, &wr_dir_args);
21675+ err = PTR_ERR(wh_dentry);
21676+ if (IS_ERR(wh_dentry))
21677+ goto out_parent;
21678+
21679+ sb = dentry->d_sb;
21680+ bindex = au_dbtop(dentry);
21681+ h_path.dentry = au_h_dptr(dentry, bindex);
21682+ h_path.mnt = au_sbr_mnt(sb, bindex);
21683+ err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
21684+ if (unlikely(err))
21685+ goto out_unpin;
21686+
21687+ /* make the dir opaque */
21688+ diropq = 0;
21689+ h_inode = d_inode(h_path.dentry);
21690+ if (wh_dentry
21691+ || au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
21692+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
21693+ opq_dentry = au_diropq_create(dentry, bindex);
21694+ inode_unlock(h_inode);
21695+ err = PTR_ERR(opq_dentry);
21696+ if (IS_ERR(opq_dentry))
21697+ goto out_dir;
21698+ dput(opq_dentry);
21699+ diropq = 1;
21700+ }
21701+
21702+ err = epilog(dir, bindex, wh_dentry, dentry);
21703+ if (!err) {
21704+ inc_nlink(dir);
21705+ goto out_unpin; /* success */
21706+ }
21707+
21708+ /* revert */
21709+ if (diropq) {
21710+ AuLabel(revert opq);
21711+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
21712+ rerr = au_diropq_remove(dentry, bindex);
21713+ inode_unlock(h_inode);
21714+ if (rerr) {
21715+ AuIOErr("%pd reverting diropq failed(%d, %d)\n",
21716+ dentry, err, rerr);
21717+ err = -EIO;
21718+ }
21719+ }
21720+
21721+out_dir:
21722+ AuLabel(revert dir);
21723+ rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
21724+ if (rerr) {
21725+ AuIOErr("%pd reverting dir failed(%d, %d)\n",
21726+ dentry, err, rerr);
21727+ err = -EIO;
21728+ }
21729+ au_dtime_revert(&a->dt);
21730+out_unpin:
21731+ au_unpin(&a->pin);
21732+ dput(wh_dentry);
21733+out_parent:
21734+ di_write_unlock(parent);
21735+out_unlock:
21736+ if (unlikely(err)) {
21737+ au_update_dbtop(dentry);
21738+ d_drop(dentry);
21739+ }
21740+ aufs_read_unlock(dentry, AuLock_DW);
21741+out_free:
21742+ au_kfree_rcu(a);
21743+out:
21744+ return err;
21745+}
21746diff -urN /usr/share/empty/fs/aufs/i_op.c linux/fs/aufs/i_op.c
21747--- /usr/share/empty/fs/aufs/i_op.c 1970-01-01 01:00:00.000000000 +0100
21748+++ linux/fs/aufs/i_op.c 2023-02-20 21:05:51.959693785 +0100
21749@@ -0,0 +1,1516 @@
21750+// SPDX-License-Identifier: GPL-2.0
21751+/*
21752+ * Copyright (C) 2005-2022 Junjiro R. Okajima
21753+ *
21754+ * This program is free software; you can redistribute it and/or modify
21755+ * it under the terms of the GNU General Public License as published by
21756+ * the Free Software Foundation; either version 2 of the License, or
21757+ * (at your option) any later version.
21758+ *
21759+ * This program is distributed in the hope that it will be useful,
21760+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
21761+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21762+ * GNU General Public License for more details.
21763+ *
21764+ * You should have received a copy of the GNU General Public License
21765+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
21766+ */
21767+
21768+/*
21769+ * inode operations (except add/del/rename)
21770+ */
21771+
21772+#include <linux/device_cgroup.h>
21773+#include <linux/fs_stack.h>
21774+#include <linux/iversion.h>
21775+#include <linux/security.h>
21776+#include "aufs.h"
21777+
21778+static int h_permission(struct inode *h_inode, int mask,
21779+ struct path *h_path, int brperm)
21780+{
21781+ int err;
21782+ const unsigned char write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
21783+ struct user_namespace *h_userns;
21784+
21785+ err = -EPERM;
21786+ if (write_mask && IS_IMMUTABLE(h_inode))
21787+ goto out;
21788+
21789+ err = -EACCES;
21790+ if (((mask & MAY_EXEC)
21791+ && S_ISREG(h_inode->i_mode)
21792+ && (path_noexec(h_path)
21793+ || !(h_inode->i_mode & 0111))))
21794+ goto out;
21795+
21796+ /*
21797+ * - skip the lower fs test in the case of write to ro branch.
21798+ * - nfs dir permission write check is optimized, but a policy for
21799+ * link/rename requires a real check.
21800+ * - nfs always sets SB_POSIXACL regardless its mount option 'noacl.'
21801+ * in this case, generic_permission() returns -EOPNOTSUPP.
21802+ */
21803+ h_userns = mnt_user_ns(h_path->mnt);
21804+ if ((write_mask && !au_br_writable(brperm))
21805+ || (au_test_nfs(h_inode->i_sb) && S_ISDIR(h_inode->i_mode)
21806+ && write_mask && !(mask & MAY_READ))
21807+ || !h_inode->i_op->permission) {
21808+ /* AuLabel(generic_permission); */
21809+ /* AuDbg("get_inode_acl %ps\n",
21810+ h_inode->i_op->get_inode_acl); */
21811+ err = generic_permission(h_userns, h_inode, mask);
21812+ if (err == -EOPNOTSUPP && au_test_nfs_noacl(h_inode))
21813+ err = h_inode->i_op->permission(h_userns, h_inode,
21814+ mask);
21815+ AuTraceErr(err);
21816+ } else {
21817+ /* AuLabel(h_inode->permission); */
21818+ err = h_inode->i_op->permission(h_userns, h_inode, mask);
21819+ AuTraceErr(err);
21820+ }
21821+
21822+ if (!err)
21823+ err = devcgroup_inode_permission(h_inode, mask);
21824+ if (!err)
21825+ err = security_inode_permission(h_inode, mask);
21826+
21827+out:
21828+ return err;
21829+}
21830+
21831+static int aufs_permission(struct user_namespace *userns, struct inode *inode,
21832+ int mask)
21833+{
21834+ int err;
21835+ aufs_bindex_t bindex, bbot;
21836+ const unsigned char isdir = !!S_ISDIR(inode->i_mode),
21837+ write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
21838+ struct inode *h_inode;
21839+ struct super_block *sb;
21840+ struct au_branch *br;
21841+
21842+ /* todo: support rcu-walk? */
21843+ if (mask & MAY_NOT_BLOCK)
21844+ return -ECHILD;
21845+
21846+ sb = inode->i_sb;
21847+ si_read_lock(sb, AuLock_FLUSH);
21848+ ii_read_lock_child(inode);
21849+#if 0 /* reserved for future use */
21850+ /*
21851+ * This test may be rather 'too much' since the test is essentially done
21852+ * in the aufs_lookup(). Theoretically it is possible that the inode
21853+ * generation doesn't match to the superblock's here. But it isn't a
21854+ * big deal I suppose.
21855+ */
21856+ err = au_iigen_test(inode, au_sigen(sb));
21857+ if (unlikely(err))
21858+ goto out;
21859+#endif
21860+
21861+ if (!isdir
21862+ || write_mask
21863+ || au_opt_test(au_mntflags(sb), DIRPERM1)) {
21864+ err = au_busy_or_stale();
21865+ h_inode = au_h_iptr(inode, au_ibtop(inode));
21866+ if (unlikely(!h_inode
21867+ || (h_inode->i_mode & S_IFMT)
21868+ != (inode->i_mode & S_IFMT)))
21869+ goto out;
21870+
21871+ err = 0;
21872+ bindex = au_ibtop(inode);
21873+ br = au_sbr(sb, bindex);
21874+ err = h_permission(h_inode, mask, &br->br_path, br->br_perm);
21875+ if (write_mask
21876+ && !err
21877+ && !special_file(h_inode->i_mode)) {
21878+ /* test whether the upper writable branch exists */
21879+ err = -EROFS;
21880+ for (; bindex >= 0; bindex--)
21881+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
21882+ err = 0;
21883+ break;
21884+ }
21885+ }
21886+ goto out;
21887+ }
21888+
21889+ /* non-write to dir */
21890+ err = 0;
21891+ bbot = au_ibbot(inode);
21892+ for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
21893+ h_inode = au_h_iptr(inode, bindex);
21894+ if (h_inode) {
21895+ err = au_busy_or_stale();
21896+ if (unlikely(!S_ISDIR(h_inode->i_mode)))
21897+ break;
21898+
21899+ br = au_sbr(sb, bindex);
21900+ err = h_permission(h_inode, mask, &br->br_path,
21901+ br->br_perm);
21902+ }
21903+ }
21904+
21905+out:
21906+ ii_read_unlock(inode);
21907+ si_read_unlock(sb);
21908+ return err;
21909+}
21910+
21911+/* ---------------------------------------------------------------------- */
21912+
21913+static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
21914+ unsigned int flags)
21915+{
21916+ struct dentry *ret, *parent;
21917+ struct inode *inode;
21918+ struct super_block *sb;
21919+ int err, npositive;
21920+
21921+ IMustLock(dir);
21922+
21923+ /* todo: support rcu-walk? */
21924+ ret = ERR_PTR(-ECHILD);
21925+ if (flags & LOOKUP_RCU)
21926+ goto out;
21927+
21928+ ret = ERR_PTR(-ENAMETOOLONG);
21929+ if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
21930+ goto out;
21931+
21932+ sb = dir->i_sb;
21933+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
21934+ ret = ERR_PTR(err);
21935+ if (unlikely(err))
21936+ goto out;
21937+
21938+ err = au_di_init(dentry);
21939+ ret = ERR_PTR(err);
21940+ if (unlikely(err))
21941+ goto out_si;
21942+
21943+ inode = NULL;
21944+ npositive = 0; /* suppress a warning */
21945+ parent = dentry->d_parent; /* dir inode is locked */
21946+ di_read_lock_parent(parent, AuLock_IR);
21947+ err = au_alive_dir(parent);
21948+ if (!err)
21949+ err = au_digen_test(parent, au_sigen(sb));
21950+ if (!err) {
21951+ /* regardless LOOKUP_CREATE, always ALLOW_NEG */
21952+ npositive = au_lkup_dentry(dentry, au_dbtop(parent),
21953+ AuLkup_ALLOW_NEG);
21954+ err = npositive;
21955+ }
21956+ di_read_unlock(parent, AuLock_IR);
21957+ ret = ERR_PTR(err);
21958+ if (unlikely(err < 0))
21959+ goto out_unlock;
21960+
21961+ if (npositive) {
21962+ inode = au_new_inode(dentry, /*must_new*/0);
21963+ if (IS_ERR(inode)) {
21964+ ret = (void *)inode;
21965+ inode = NULL;
21966+ goto out_unlock;
21967+ }
21968+ }
21969+
21970+ if (inode)
21971+ atomic_inc(&inode->i_count);
21972+ ret = d_splice_alias(inode, dentry);
21973+#if 0 /* reserved for future use */
21974+ if (unlikely(d_need_lookup(dentry))) {
21975+ spin_lock(&dentry->d_lock);
21976+ dentry->d_flags &= ~DCACHE_NEED_LOOKUP;
21977+ spin_unlock(&dentry->d_lock);
21978+ } else
21979+#endif
21980+ if (inode) {
21981+ if (!IS_ERR(ret)) {
21982+ iput(inode);
21983+ if (ret && ret != dentry)
21984+ ii_write_unlock(inode);
21985+ } else {
21986+ ii_write_unlock(inode);
21987+ iput(inode);
21988+ inode = NULL;
21989+ }
21990+ }
21991+
21992+out_unlock:
21993+ di_write_unlock(dentry);
21994+out_si:
21995+ si_read_unlock(sb);
21996+out:
21997+ return ret;
21998+}
21999+
22000+/* ---------------------------------------------------------------------- */
22001+
22002+/*
22003+ * very dirty and complicated aufs ->atomic_open().
22004+ * aufs_atomic_open()
22005+ * + au_aopen_or_create()
22006+ * + add_simple()
22007+ * + vfsub_atomic_open()
22008+ * + branch fs ->atomic_open()
22009+ * may call the actual 'open' for h_file
22010+ * + inc br_nfiles only if opened
22011+ * + au_aopen_no_open() or au_aopen_do_open()
22012+ *
22013+ * au_aopen_do_open()
22014+ * + finish_open()
22015+ * + au_do_aopen()
22016+ * + au_do_open() the body of all 'open'
22017+ * + au_do_open_nondir()
22018+ * set the passed h_file
22019+ *
22020+ * au_aopen_no_open()
22021+ * + finish_no_open()
22022+ */
22023+
22024+struct aopen_node {
22025+ struct hlist_bl_node hblist;
22026+ struct file *file, *h_file;
22027+};
22028+
22029+static int au_do_aopen(struct inode *inode, struct file *file)
22030+{
22031+ struct hlist_bl_head *aopen;
22032+ struct hlist_bl_node *pos;
22033+ struct aopen_node *node;
22034+ struct au_do_open_args args = {
22035+ .aopen = 1,
22036+ .open = au_do_open_nondir
22037+ };
22038+
22039+ aopen = &au_sbi(inode->i_sb)->si_aopen;
22040+ hlist_bl_lock(aopen);
22041+ hlist_bl_for_each_entry(node, pos, aopen, hblist)
22042+ if (node->file == file) {
22043+ args.h_file = node->h_file;
22044+ break;
22045+ }
22046+ hlist_bl_unlock(aopen);
22047+ /* AuDebugOn(!args.h_file); */
22048+
22049+ return au_do_open(file, &args);
22050+}
22051+
22052+static int au_aopen_do_open(struct file *file, struct dentry *dentry,
22053+ struct aopen_node *aopen_node)
22054+{
22055+ int err;
22056+ struct hlist_bl_head *aopen;
22057+
22058+ AuLabel(here);
22059+ aopen = &au_sbi(dentry->d_sb)->si_aopen;
22060+ au_hbl_add(&aopen_node->hblist, aopen);
22061+ err = finish_open(file, dentry, au_do_aopen);
22062+ au_hbl_del(&aopen_node->hblist, aopen);
22063+ /* AuDbgFile(file); */
22064+ AuDbg("%pd%s%s\n", dentry,
22065+ (file->f_mode & FMODE_CREATED) ? " created" : "",
22066+ (file->f_mode & FMODE_OPENED) ? " opened" : "");
22067+
22068+ AuTraceErr(err);
22069+ return err;
22070+}
22071+
22072+static int au_aopen_no_open(struct file *file, struct dentry *dentry)
22073+{
22074+ int err;
22075+
22076+ AuLabel(here);
22077+ dget(dentry);
22078+ err = finish_no_open(file, dentry);
22079+
22080+ AuTraceErr(err);
22081+ return err;
22082+}
22083+
22084+static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
22085+ struct file *file, unsigned int open_flag,
22086+ umode_t create_mode)
22087+{
22088+ int err, did_open;
22089+ unsigned int lkup_flags;
22090+ aufs_bindex_t bindex;
22091+ struct super_block *sb;
22092+ struct dentry *parent, *d;
22093+ struct vfsub_aopen_args args = {
22094+ .open_flag = open_flag,
22095+ .create_mode = create_mode
22096+ };
22097+ struct aopen_node aopen_node = {
22098+ .file = file
22099+ };
22100+
22101+ IMustLock(dir);
22102+ AuDbg("open_flag 0%o\n", open_flag);
22103+ AuDbgDentry(dentry);
22104+
22105+ err = 0;
22106+ if (!au_di(dentry)) {
22107+ lkup_flags = LOOKUP_OPEN;
22108+ if (open_flag & O_CREAT)
22109+ lkup_flags |= LOOKUP_CREATE;
22110+ d = aufs_lookup(dir, dentry, lkup_flags);
22111+ if (IS_ERR(d)) {
22112+ err = PTR_ERR(d);
22113+ AuTraceErr(err);
22114+ goto out;
22115+ } else if (d) {
22116+ /*
22117+ * obsoleted dentry found.
22118+ * another error will be returned later.
22119+ */
22120+ d_drop(d);
22121+ AuDbgDentry(d);
22122+ dput(d);
22123+ }
22124+ AuDbgDentry(dentry);
22125+ }
22126+
22127+ if (d_is_positive(dentry)
22128+ || d_unhashed(dentry)
22129+ || d_unlinked(dentry)
22130+ || !(open_flag & O_CREAT)) {
22131+ err = au_aopen_no_open(file, dentry);
22132+ goto out; /* success */
22133+ }
22134+
22135+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
22136+ if (unlikely(err))
22137+ goto out;
22138+
22139+ sb = dentry->d_sb;
22140+ parent = dentry->d_parent; /* dir is locked */
22141+ di_write_lock_parent(parent);
22142+ err = au_lkup_dentry(dentry, /*btop*/0, AuLkup_ALLOW_NEG);
22143+ if (unlikely(err < 0))
22144+ goto out_parent;
22145+
22146+ AuDbgDentry(dentry);
22147+ if (d_is_positive(dentry)) {
22148+ err = au_aopen_no_open(file, dentry);
22149+ goto out_parent; /* success */
22150+ }
22151+
22152+ args.file = alloc_empty_file(file->f_flags, current_cred());
22153+ err = PTR_ERR(args.file);
22154+ if (IS_ERR(args.file))
22155+ goto out_parent;
22156+
22157+ bindex = au_dbtop(dentry);
22158+ err = au_aopen_or_create(dir, dentry, &args);
22159+ AuTraceErr(err);
22160+ AuDbgFile(args.file);
22161+ file->f_mode = args.file->f_mode & ~FMODE_OPENED;
22162+ did_open = !!(args.file->f_mode & FMODE_OPENED);
22163+ if (!did_open) {
22164+ fput(args.file);
22165+ args.file = NULL;
22166+ }
22167+ di_write_unlock(parent);
22168+ di_write_unlock(dentry);
22169+ if (unlikely(err < 0)) {
22170+ if (args.file)
22171+ fput(args.file);
22172+ goto out_sb;
22173+ }
22174+
22175+ if (!did_open)
22176+ err = au_aopen_no_open(file, dentry);
22177+ else {
22178+ aopen_node.h_file = args.file;
22179+ err = au_aopen_do_open(file, dentry, &aopen_node);
22180+ }
22181+ if (unlikely(err < 0)) {
22182+ if (args.file)
22183+ fput(args.file);
22184+ if (did_open)
22185+ au_lcnt_dec(&args.br->br_nfiles);
22186+ }
22187+ goto out_sb; /* success */
22188+
22189+out_parent:
22190+ di_write_unlock(parent);
22191+ di_write_unlock(dentry);
22192+out_sb:
22193+ si_read_unlock(sb);
22194+out:
22195+ AuTraceErr(err);
22196+ AuDbgFile(file);
22197+ return err;
22198+}
22199+
22200+
22201+/* ---------------------------------------------------------------------- */
22202+
22203+static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
22204+ const unsigned char add_entry, aufs_bindex_t bcpup,
22205+ aufs_bindex_t btop)
22206+{
22207+ int err;
22208+ struct dentry *h_parent;
22209+ struct inode *h_dir;
22210+
22211+ if (add_entry)
22212+ IMustLock(d_inode(parent));
22213+ else
22214+ di_write_lock_parent(parent);
22215+
22216+ err = 0;
22217+ if (!au_h_dptr(parent, bcpup)) {
22218+ if (btop > bcpup)
22219+ err = au_cpup_dirs(dentry, bcpup);
22220+ else if (btop < bcpup)
22221+ err = au_cpdown_dirs(dentry, bcpup);
22222+ else
22223+ BUG();
22224+ }
22225+ if (!err && add_entry && !au_ftest_wrdir(add_entry, TMPFILE)) {
22226+ h_parent = au_h_dptr(parent, bcpup);
22227+ h_dir = d_inode(h_parent);
22228+ inode_lock_shared_nested(h_dir, AuLsc_I_PARENT);
22229+ err = au_lkup_neg(dentry, bcpup, /*wh*/0);
22230+ /* todo: no unlock here */
22231+ inode_unlock_shared(h_dir);
22232+
22233+ AuDbg("bcpup %d\n", bcpup);
22234+ if (!err) {
22235+ if (d_really_is_negative(dentry))
22236+ au_set_h_dptr(dentry, btop, NULL);
22237+ au_update_dbrange(dentry, /*do_put_zero*/0);
22238+ }
22239+ }
22240+
22241+ if (!add_entry)
22242+ di_write_unlock(parent);
22243+ if (!err)
22244+ err = bcpup; /* success */
22245+
22246+ AuTraceErr(err);
22247+ return err;
22248+}
22249+
22250+/*
22251+ * decide the branch and the parent dir where we will create a new entry.
22252+ * returns new bindex or an error.
22253+ * copyup the parent dir if needed.
22254+ */
22255+int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
22256+ struct au_wr_dir_args *args)
22257+{
22258+ int err;
22259+ unsigned int flags;
22260+ aufs_bindex_t bcpup, btop, src_btop;
22261+ const unsigned char add_entry
22262+ = au_ftest_wrdir(args->flags, ADD_ENTRY)
22263+ | au_ftest_wrdir(args->flags, TMPFILE);
22264+ struct super_block *sb;
22265+ struct dentry *parent;
22266+ struct au_sbinfo *sbinfo;
22267+
22268+ sb = dentry->d_sb;
22269+ sbinfo = au_sbi(sb);
22270+ parent = dget_parent(dentry);
22271+ btop = au_dbtop(dentry);
22272+ bcpup = btop;
22273+ if (args->force_btgt < 0) {
22274+ if (src_dentry) {
22275+ src_btop = au_dbtop(src_dentry);
22276+ if (src_btop < btop)
22277+ bcpup = src_btop;
22278+ } else if (add_entry) {
22279+ flags = 0;
22280+ if (au_ftest_wrdir(args->flags, ISDIR))
22281+ au_fset_wbr(flags, DIR);
22282+ err = AuWbrCreate(sbinfo, dentry, flags);
22283+ bcpup = err;
22284+ }
22285+
22286+ if (bcpup < 0 || au_test_ro(sb, bcpup, d_inode(dentry))) {
22287+ if (add_entry)
22288+ err = AuWbrCopyup(sbinfo, dentry);
22289+ else {
22290+ if (!IS_ROOT(dentry)) {
22291+ di_read_lock_parent(parent, !AuLock_IR);
22292+ err = AuWbrCopyup(sbinfo, dentry);
22293+ di_read_unlock(parent, !AuLock_IR);
22294+ } else
22295+ err = AuWbrCopyup(sbinfo, dentry);
22296+ }
22297+ bcpup = err;
22298+ if (unlikely(err < 0))
22299+ goto out;
22300+ }
22301+ } else {
22302+ bcpup = args->force_btgt;
22303+ AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
22304+ }
22305+
22306+ AuDbg("btop %d, bcpup %d\n", btop, bcpup);
22307+ err = bcpup;
22308+ if (bcpup == btop)
22309+ goto out; /* success */
22310+
22311+ /* copyup the new parent into the branch we process */
22312+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
22313+ if (err >= 0) {
22314+ if (d_really_is_negative(dentry)) {
22315+ au_set_h_dptr(dentry, btop, NULL);
22316+ au_set_dbtop(dentry, bcpup);
22317+ au_set_dbbot(dentry, bcpup);
22318+ }
22319+ AuDebugOn(add_entry
22320+ && !au_ftest_wrdir(args->flags, TMPFILE)
22321+ && !au_h_dptr(dentry, bcpup));
22322+ }
22323+
22324+out:
22325+ dput(parent);
22326+ return err;
22327+}
22328+
22329+/* ---------------------------------------------------------------------- */
22330+
22331+void au_pin_hdir_unlock(struct au_pin *p)
22332+{
22333+ if (p->hdir)
22334+ au_hn_inode_unlock(p->hdir);
22335+}
22336+
22337+int au_pin_hdir_lock(struct au_pin *p)
22338+{
22339+ int err;
22340+
22341+ err = 0;
22342+ if (!p->hdir)
22343+ goto out;
22344+
22345+ /* even if an error happens later, keep this lock */
22346+ au_hn_inode_lock_nested(p->hdir, p->lsc_hi);
22347+
22348+ err = -EBUSY;
22349+ if (unlikely(p->hdir->hi_inode != d_inode(p->h_parent)))
22350+ goto out;
22351+
22352+ err = 0;
22353+ if (p->h_dentry)
22354+ err = au_h_verify(p->h_dentry, p->udba, p->hdir->hi_inode,
22355+ p->h_parent, p->br);
22356+
22357+out:
22358+ return err;
22359+}
22360+
22361+int au_pin_hdir_relock(struct au_pin *p)
22362+{
22363+ int err, i;
22364+ struct inode *h_i;
22365+ struct dentry *h_d[] = {
22366+ p->h_dentry,
22367+ p->h_parent
22368+ };
22369+
22370+ err = au_pin_hdir_lock(p);
22371+ if (unlikely(err))
22372+ goto out;
22373+
22374+ for (i = 0; !err && i < sizeof(h_d)/sizeof(*h_d); i++) {
22375+ if (!h_d[i])
22376+ continue;
22377+ if (d_is_positive(h_d[i])) {
22378+ h_i = d_inode(h_d[i]);
22379+ err = !h_i->i_nlink;
22380+ }
22381+ }
22382+
22383+out:
22384+ return err;
22385+}
22386+
22387+static void au_pin_hdir_set_owner(struct au_pin *p, struct task_struct *task)
22388+{
22389+ atomic_long_set(&p->hdir->hi_inode->i_rwsem.owner, (long)task);
22390+}
22391+
22392+void au_pin_hdir_acquire_nest(struct au_pin *p)
22393+{
22394+ if (p->hdir) {
22395+ rwsem_acquire_nest(&p->hdir->hi_inode->i_rwsem.dep_map,
22396+ p->lsc_hi, 0, NULL, _RET_IP_);
22397+ au_pin_hdir_set_owner(p, current);
22398+ }
22399+}
22400+
22401+void au_pin_hdir_release(struct au_pin *p)
22402+{
22403+ if (p->hdir) {
22404+ au_pin_hdir_set_owner(p, p->task);
22405+ rwsem_release(&p->hdir->hi_inode->i_rwsem.dep_map, _RET_IP_);
22406+ }
22407+}
22408+
22409+struct dentry *au_pinned_h_parent(struct au_pin *pin)
22410+{
22411+ if (pin && pin->parent)
22412+ return au_h_dptr(pin->parent, pin->bindex);
22413+ return NULL;
22414+}
22415+
22416+void au_unpin(struct au_pin *p)
22417+{
22418+ if (p->hdir)
22419+ au_pin_hdir_unlock(p);
22420+ if (p->h_mnt && au_ftest_pin(p->flags, MNT_WRITE))
22421+ vfsub_mnt_drop_write(p->h_mnt);
22422+ if (!p->hdir)
22423+ return;
22424+
22425+ if (!au_ftest_pin(p->flags, DI_LOCKED))
22426+ di_read_unlock(p->parent, AuLock_IR);
22427+ iput(p->hdir->hi_inode);
22428+ dput(p->parent);
22429+ p->parent = NULL;
22430+ p->hdir = NULL;
22431+ p->h_mnt = NULL;
22432+ /* do not clear p->task */
22433+}
22434+
22435+int au_do_pin(struct au_pin *p)
22436+{
22437+ int err;
22438+ struct super_block *sb;
22439+ struct inode *h_dir;
22440+
22441+ err = 0;
22442+ sb = p->dentry->d_sb;
22443+ p->br = au_sbr(sb, p->bindex);
22444+ if (IS_ROOT(p->dentry)) {
22445+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
22446+ p->h_mnt = au_br_mnt(p->br);
22447+ err = vfsub_mnt_want_write(p->h_mnt);
22448+ if (unlikely(err)) {
22449+ au_fclr_pin(p->flags, MNT_WRITE);
22450+ goto out_err;
22451+ }
22452+ }
22453+ goto out;
22454+ }
22455+
22456+ p->h_dentry = NULL;
22457+ if (p->bindex <= au_dbbot(p->dentry))
22458+ p->h_dentry = au_h_dptr(p->dentry, p->bindex);
22459+
22460+ p->parent = dget_parent(p->dentry);
22461+ if (!au_ftest_pin(p->flags, DI_LOCKED))
22462+ di_read_lock(p->parent, AuLock_IR, p->lsc_di);
22463+
22464+ h_dir = NULL;
22465+ p->h_parent = au_h_dptr(p->parent, p->bindex);
22466+ p->hdir = au_hi(d_inode(p->parent), p->bindex);
22467+ if (p->hdir)
22468+ h_dir = p->hdir->hi_inode;
22469+
22470+ /*
22471+ * udba case, or
22472+ * if DI_LOCKED is not set, then p->parent may be different
22473+ * and h_parent can be NULL.
22474+ */
22475+ if (unlikely(!p->hdir || !h_dir || !p->h_parent)) {
22476+ err = -EBUSY;
22477+ if (!au_ftest_pin(p->flags, DI_LOCKED))
22478+ di_read_unlock(p->parent, AuLock_IR);
22479+ dput(p->parent);
22480+ p->parent = NULL;
22481+ goto out_err;
22482+ }
22483+
22484+ if (au_ftest_pin(p->flags, MNT_WRITE)) {
22485+ p->h_mnt = au_br_mnt(p->br);
22486+ err = vfsub_mnt_want_write(p->h_mnt);
22487+ if (unlikely(err)) {
22488+ au_fclr_pin(p->flags, MNT_WRITE);
22489+ if (!au_ftest_pin(p->flags, DI_LOCKED))
22490+ di_read_unlock(p->parent, AuLock_IR);
22491+ dput(p->parent);
22492+ p->parent = NULL;
22493+ goto out_err;
22494+ }
22495+ }
22496+
22497+ au_igrab(h_dir);
22498+ err = au_pin_hdir_lock(p);
22499+ if (!err)
22500+ goto out; /* success */
22501+
22502+ au_unpin(p);
22503+
22504+out_err:
22505+ pr_err("err %d\n", err);
22506+ err = au_busy_or_stale();
22507+out:
22508+ return err;
22509+}
22510+
22511+void au_pin_init(struct au_pin *p, struct dentry *dentry,
22512+ aufs_bindex_t bindex, int lsc_di, int lsc_hi,
22513+ unsigned int udba, unsigned char flags)
22514+{
22515+ p->dentry = dentry;
22516+ p->udba = udba;
22517+ p->lsc_di = lsc_di;
22518+ p->lsc_hi = lsc_hi;
22519+ p->flags = flags;
22520+ p->bindex = bindex;
22521+
22522+ p->parent = NULL;
22523+ p->hdir = NULL;
22524+ p->h_mnt = NULL;
22525+
22526+ p->h_dentry = NULL;
22527+ p->h_parent = NULL;
22528+ p->br = NULL;
22529+ p->task = current;
22530+}
22531+
22532+int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
22533+ unsigned int udba, unsigned char flags)
22534+{
22535+ au_pin_init(pin, dentry, bindex, AuLsc_DI_PARENT, AuLsc_I_PARENT2,
22536+ udba, flags);
22537+ return au_do_pin(pin);
22538+}
22539+
22540+/* ---------------------------------------------------------------------- */
22541+
22542+/*
22543+ * ->setattr() and ->getattr() are called in various cases.
22544+ * chmod, stat: dentry is revalidated.
22545+ * fchmod, fstat: file and dentry are not revalidated, additionally they may be
22546+ * unhashed.
22547+ * for ->setattr(), ia->ia_file is passed from ftruncate only.
22548+ */
22549+/* todo: consolidate with do_refresh() and simple_reval_dpath() */
22550+int au_reval_for_attr(struct dentry *dentry, unsigned int sigen)
22551+{
22552+ int err;
22553+ struct dentry *parent;
22554+
22555+ err = 0;
22556+ if (au_digen_test(dentry, sigen)) {
22557+ parent = dget_parent(dentry);
22558+ di_read_lock_parent(parent, AuLock_IR);
22559+ err = au_refresh_dentry(dentry, parent);
22560+ di_read_unlock(parent, AuLock_IR);
22561+ dput(parent);
22562+ }
22563+
22564+ AuTraceErr(err);
22565+ return err;
22566+}
22567+
22568+int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
22569+ struct au_icpup_args *a)
22570+{
22571+ int err;
22572+ loff_t sz;
22573+ aufs_bindex_t btop, ibtop;
22574+ struct dentry *hi_wh, *parent;
22575+ struct inode *inode;
22576+ struct au_wr_dir_args wr_dir_args = {
22577+ .force_btgt = -1,
22578+ .flags = 0
22579+ };
22580+
22581+ if (d_is_dir(dentry))
22582+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
22583+ /* plink or hi_wh() case */
22584+ btop = au_dbtop(dentry);
22585+ inode = d_inode(dentry);
22586+ ibtop = au_ibtop(inode);
22587+ if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
22588+ wr_dir_args.force_btgt = ibtop;
22589+ err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
22590+ if (unlikely(err < 0))
22591+ goto out;
22592+ a->btgt = err;
22593+ if (err != btop)
22594+ au_fset_icpup(a->flags, DID_CPUP);
22595+
22596+ err = 0;
22597+ a->pin_flags = AuPin_MNT_WRITE;
22598+ parent = NULL;
22599+ if (!IS_ROOT(dentry)) {
22600+ au_fset_pin(a->pin_flags, DI_LOCKED);
22601+ parent = dget_parent(dentry);
22602+ di_write_lock_parent(parent);
22603+ }
22604+
22605+ err = au_pin(&a->pin, dentry, a->btgt, a->udba, a->pin_flags);
22606+ if (unlikely(err))
22607+ goto out_parent;
22608+
22609+ sz = -1;
22610+ a->h_path.dentry = au_h_dptr(dentry, btop);
22611+ a->h_inode = d_inode(a->h_path.dentry);
22612+ if (ia && (ia->ia_valid & ATTR_SIZE)) {
22613+ inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
22614+ if (ia->ia_size < i_size_read(a->h_inode))
22615+ sz = ia->ia_size;
22616+ inode_unlock_shared(a->h_inode);
22617+ }
22618+
22619+ hi_wh = NULL;
22620+ if (au_ftest_icpup(a->flags, DID_CPUP) && d_unlinked(dentry)) {
22621+ hi_wh = au_hi_wh(inode, a->btgt);
22622+ if (!hi_wh) {
22623+ struct au_cp_generic cpg = {
22624+ .dentry = dentry,
22625+ .bdst = a->btgt,
22626+ .bsrc = -1,
22627+ .len = sz,
22628+ .pin = &a->pin
22629+ };
22630+ err = au_sio_cpup_wh(&cpg, /*file*/NULL);
22631+ if (unlikely(err))
22632+ goto out_unlock;
22633+ hi_wh = au_hi_wh(inode, a->btgt);
22634+ /* todo: revalidate hi_wh? */
22635+ }
22636+ }
22637+
22638+ if (parent) {
22639+ au_pin_set_parent_lflag(&a->pin, /*lflag*/0);
22640+ di_downgrade_lock(parent, AuLock_IR);
22641+ dput(parent);
22642+ parent = NULL;
22643+ }
22644+ if (!au_ftest_icpup(a->flags, DID_CPUP))
22645+ goto out; /* success */
22646+
22647+ if (!d_unhashed(dentry)) {
22648+ struct au_cp_generic cpg = {
22649+ .dentry = dentry,
22650+ .bdst = a->btgt,
22651+ .bsrc = btop,
22652+ .len = sz,
22653+ .pin = &a->pin,
22654+ .flags = AuCpup_DTIME | AuCpup_HOPEN
22655+ };
22656+ err = au_sio_cpup_simple(&cpg);
22657+ if (!err)
22658+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
22659+ } else if (!hi_wh)
22660+ a->h_path.dentry = au_h_dptr(dentry, a->btgt);
22661+ else
22662+ a->h_path.dentry = hi_wh; /* do not dget here */
22663+
22664+out_unlock:
22665+ a->h_inode = d_inode(a->h_path.dentry);
22666+ if (!err)
22667+ goto out; /* success */
22668+ au_unpin(&a->pin);
22669+out_parent:
22670+ if (parent) {
22671+ di_write_unlock(parent);
22672+ dput(parent);
22673+ }
22674+out:
22675+ if (!err)
22676+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
22677+ return err;
22678+}
22679+
22680+static int aufs_setattr(struct user_namespace *userns, struct dentry *dentry,
22681+ struct iattr *ia)
22682+{
22683+ int err;
22684+ struct inode *inode, *delegated;
22685+ struct super_block *sb;
22686+ struct file *file;
22687+ struct au_icpup_args *a;
22688+ struct user_namespace *h_userns;
22689+
22690+ inode = d_inode(dentry);
22691+ IMustLock(inode);
22692+
22693+ err = setattr_prepare(userns, dentry, ia);
22694+ if (unlikely(err))
22695+ goto out;
22696+
22697+ err = -ENOMEM;
22698+ a = kzalloc(sizeof(*a), GFP_NOFS);
22699+ if (unlikely(!a))
22700+ goto out;
22701+
22702+ if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
22703+ ia->ia_valid &= ~ATTR_MODE;
22704+
22705+ file = NULL;
22706+ sb = dentry->d_sb;
22707+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
22708+ if (unlikely(err))
22709+ goto out_kfree;
22710+
22711+ if (ia->ia_valid & ATTR_FILE) {
22712+ /* currently ftruncate(2) only */
22713+ AuDebugOn(!d_is_reg(dentry));
22714+ file = ia->ia_file;
22715+ err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1,
22716+ /*fi_lsc*/0);
22717+ if (unlikely(err))
22718+ goto out_si;
22719+ ia->ia_file = au_hf_top(file);
22720+ a->udba = AuOpt_UDBA_NONE;
22721+ } else {
22722+ /* fchmod() doesn't pass ia_file */
22723+ a->udba = au_opt_udba(sb);
22724+ di_write_lock_child(dentry);
22725+ /* no d_unlinked(), to set UDBA_NONE for root */
22726+ if (d_unhashed(dentry))
22727+ a->udba = AuOpt_UDBA_NONE;
22728+ if (a->udba != AuOpt_UDBA_NONE) {
22729+ AuDebugOn(IS_ROOT(dentry));
22730+ err = au_reval_for_attr(dentry, au_sigen(sb));
22731+ if (unlikely(err))
22732+ goto out_dentry;
22733+ }
22734+ }
22735+
22736+ err = au_pin_and_icpup(dentry, ia, a);
22737+ if (unlikely(err < 0))
22738+ goto out_dentry;
22739+ if (au_ftest_icpup(a->flags, DID_CPUP)) {
22740+ ia->ia_file = NULL;
22741+ ia->ia_valid &= ~ATTR_FILE;
22742+ }
22743+
22744+ a->h_path.mnt = au_sbr_mnt(sb, a->btgt);
22745+ if ((ia->ia_valid & (ATTR_MODE | ATTR_CTIME))
22746+ == (ATTR_MODE | ATTR_CTIME)) {
22747+ err = security_path_chmod(&a->h_path, ia->ia_mode);
22748+ if (unlikely(err))
22749+ goto out_unlock;
22750+ } else if ((ia->ia_valid & (ATTR_UID | ATTR_GID))
22751+ && (ia->ia_valid & ATTR_CTIME)) {
22752+ err = security_path_chown(&a->h_path, ia->ia_uid, ia->ia_gid);
22753+ if (unlikely(err))
22754+ goto out_unlock;
22755+ }
22756+
22757+ if (ia->ia_valid & ATTR_SIZE) {
22758+ struct file *f;
22759+
22760+ if (ia->ia_size < i_size_read(inode))
22761+ /* unmap only */
22762+ truncate_setsize(inode, ia->ia_size);
22763+
22764+ f = NULL;
22765+ if (ia->ia_valid & ATTR_FILE)
22766+ f = ia->ia_file;
22767+ inode_unlock(a->h_inode);
22768+ err = vfsub_trunc(&a->h_path, ia->ia_size, ia->ia_valid, f);
22769+ inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
22770+ } else {
22771+ delegated = NULL;
22772+ while (1) {
22773+ err = vfsub_notify_change(&a->h_path, ia, &delegated);
22774+ if (delegated) {
22775+ err = break_deleg_wait(&delegated);
22776+ if (!err)
22777+ continue;
22778+ }
22779+ break;
22780+ }
22781+ }
22782+ /*
22783+ * regardless aufs 'acl' option setting.
22784+ * why don't all acl-aware fs call this func from their ->setattr()?
22785+ */
22786+ if (!err && (ia->ia_valid & ATTR_MODE)) {
22787+ h_userns = mnt_user_ns(a->h_path.mnt);
22788+ err = vfsub_acl_chmod(h_userns, a->h_path.dentry, ia->ia_mode);
22789+ }
22790+ if (!err)
22791+ au_cpup_attr_changeable(inode);
22792+
22793+out_unlock:
22794+ inode_unlock(a->h_inode);
22795+ au_unpin(&a->pin);
22796+ if (unlikely(err))
22797+ au_update_dbtop(dentry);
22798+out_dentry:
22799+ di_write_unlock(dentry);
22800+ if (file) {
22801+ fi_write_unlock(file);
22802+ ia->ia_file = file;
22803+ ia->ia_valid |= ATTR_FILE;
22804+ }
22805+out_si:
22806+ si_read_unlock(sb);
22807+out_kfree:
22808+ au_kfree_rcu(a);
22809+out:
22810+ AuTraceErr(err);
22811+ return err;
22812+}
22813+
22814+#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
22815+static int au_h_path_to_set_attr(struct dentry *dentry,
22816+ struct au_icpup_args *a, struct path *h_path)
22817+{
22818+ int err;
22819+ struct super_block *sb;
22820+
22821+ sb = dentry->d_sb;
22822+ a->udba = au_opt_udba(sb);
22823+ /* no d_unlinked(), to set UDBA_NONE for root */
22824+ if (d_unhashed(dentry))
22825+ a->udba = AuOpt_UDBA_NONE;
22826+ if (a->udba != AuOpt_UDBA_NONE) {
22827+ AuDebugOn(IS_ROOT(dentry));
22828+ err = au_reval_for_attr(dentry, au_sigen(sb));
22829+ if (unlikely(err))
22830+ goto out;
22831+ }
22832+ err = au_pin_and_icpup(dentry, /*ia*/NULL, a);
22833+ if (unlikely(err < 0))
22834+ goto out;
22835+
22836+ h_path->dentry = a->h_path.dentry;
22837+ h_path->mnt = au_sbr_mnt(sb, a->btgt);
22838+
22839+out:
22840+ return err;
22841+}
22842+
22843+ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
22844+ struct au_sxattr *arg)
22845+{
22846+ int err;
22847+ struct path h_path;
22848+ struct super_block *sb;
22849+ struct au_icpup_args *a;
22850+ struct inode *h_inode;
22851+ struct user_namespace *h_userns;
22852+
22853+ IMustLock(inode);
22854+
22855+ err = -ENOMEM;
22856+ a = kzalloc(sizeof(*a), GFP_NOFS);
22857+ if (unlikely(!a))
22858+ goto out;
22859+
22860+ sb = dentry->d_sb;
22861+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
22862+ if (unlikely(err))
22863+ goto out_kfree;
22864+
22865+ h_path.dentry = NULL; /* silence gcc */
22866+ di_write_lock_child(dentry);
22867+ err = au_h_path_to_set_attr(dentry, a, &h_path);
22868+ if (unlikely(err))
22869+ goto out_di;
22870+ h_userns = mnt_user_ns(h_path.mnt);
22871+
22872+ inode_unlock(a->h_inode);
22873+ switch (arg->type) {
22874+ case AU_XATTR_SET:
22875+ AuDebugOn(d_is_negative(h_path.dentry));
22876+ err = vfsub_setxattr(h_userns, h_path.dentry,
22877+ arg->u.set.name, arg->u.set.value,
22878+ arg->u.set.size, arg->u.set.flags);
22879+ break;
22880+ case AU_ACL_SET:
22881+ err = -EOPNOTSUPP;
22882+ h_inode = d_inode(h_path.dentry);
22883+ if (h_inode->i_op->set_acl) {
22884+ /* this will call posix_acl_update_mode */
22885+ err = h_inode->i_op->set_acl(h_userns, h_path.dentry,
22886+ arg->u.acl_set.acl,
22887+ arg->u.acl_set.type);
22888+ }
22889+ break;
22890+ }
22891+ if (!err)
22892+ au_cpup_attr_timesizes(inode);
22893+
22894+ au_unpin(&a->pin);
22895+ if (unlikely(err))
22896+ au_update_dbtop(dentry);
22897+
22898+out_di:
22899+ di_write_unlock(dentry);
22900+ si_read_unlock(sb);
22901+out_kfree:
22902+ au_kfree_rcu(a);
22903+out:
22904+ AuTraceErr(err);
22905+ return err;
22906+}
22907+#endif
22908+
22909+static void au_refresh_iattr(struct inode *inode, struct kstat *st,
22910+ unsigned int nlink)
22911+{
22912+ unsigned int n;
22913+
22914+ inode->i_mode = st->mode;
22915+ /* don't i_[ug]id_write() here */
22916+ inode->i_uid = st->uid;
22917+ inode->i_gid = st->gid;
22918+ inode->i_atime = st->atime;
22919+ inode->i_mtime = st->mtime;
22920+ inode->i_ctime = st->ctime;
22921+
22922+ au_cpup_attr_nlink(inode, /*force*/0);
22923+ if (S_ISDIR(inode->i_mode)) {
22924+ n = inode->i_nlink;
22925+ n -= nlink;
22926+ n += st->nlink;
22927+ smp_mb(); /* for i_nlink */
22928+ /* 0 can happen */
22929+ set_nlink(inode, n);
22930+ }
22931+
22932+ spin_lock(&inode->i_lock);
22933+ inode->i_blocks = st->blocks;
22934+ i_size_write(inode, st->size);
22935+ spin_unlock(&inode->i_lock);
22936+}
22937+
22938+/*
22939+ * common routine for aufs_getattr() and au_getxattr().
22940+ * returns zero or negative (an error).
22941+ * @dentry will be read-locked in success.
22942+ */
22943+int au_h_path_getattr(struct dentry *dentry, struct inode *inode, int force,
22944+ struct path *h_path, int locked)
22945+{
22946+ int err;
22947+ unsigned int mnt_flags, sigen;
22948+ unsigned char udba_none;
22949+ aufs_bindex_t bindex;
22950+ struct super_block *sb, *h_sb;
22951+
22952+ h_path->mnt = NULL;
22953+ h_path->dentry = NULL;
22954+
22955+ err = 0;
22956+ sb = dentry->d_sb;
22957+ mnt_flags = au_mntflags(sb);
22958+ udba_none = !!au_opt_test(mnt_flags, UDBA_NONE);
22959+
22960+ if (unlikely(locked))
22961+ goto body; /* skip locking dinfo */
22962+
22963+ /* support fstat(2) */
22964+ if (!d_unlinked(dentry) && !udba_none) {
22965+ sigen = au_sigen(sb);
22966+ err = au_digen_test(dentry, sigen);
22967+ if (!err) {
22968+ di_read_lock_child(dentry, AuLock_IR);
22969+ err = au_dbrange_test(dentry);
22970+ if (unlikely(err)) {
22971+ di_read_unlock(dentry, AuLock_IR);
22972+ goto out;
22973+ }
22974+ } else {
22975+ AuDebugOn(IS_ROOT(dentry));
22976+ di_write_lock_child(dentry);
22977+ err = au_dbrange_test(dentry);
22978+ if (!err)
22979+ err = au_reval_for_attr(dentry, sigen);
22980+ if (!err)
22981+ di_downgrade_lock(dentry, AuLock_IR);
22982+ else {
22983+ di_write_unlock(dentry);
22984+ goto out;
22985+ }
22986+ }
22987+ } else
22988+ di_read_lock_child(dentry, AuLock_IR);
22989+
22990+body:
22991+ if (!inode) {
22992+ inode = d_inode(dentry);
22993+ if (unlikely(!inode))
22994+ goto out;
22995+ }
22996+ bindex = au_ibtop(inode);
22997+ h_path->mnt = au_sbr_mnt(sb, bindex);
22998+ h_sb = h_path->mnt->mnt_sb;
22999+ if (!force
23000+ && !au_test_fs_bad_iattr(h_sb)
23001+ && udba_none)
23002+ goto out; /* success */
23003+
23004+ if (au_dbtop(dentry) == bindex)
23005+ h_path->dentry = au_h_dptr(dentry, bindex);
23006+ else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
23007+ h_path->dentry = au_plink_lkup(inode, bindex);
23008+ if (IS_ERR(h_path->dentry))
23009+ /* pretending success */
23010+ h_path->dentry = NULL;
23011+ else
23012+ dput(h_path->dentry);
23013+ }
23014+
23015+out:
23016+ return err;
23017+}
23018+
23019+static int aufs_getattr(struct user_namespace *userns, const struct path *path,
23020+ struct kstat *st, u32 request, unsigned int query)
23021+{
23022+ int err;
23023+ unsigned char positive;
23024+ struct path h_path;
23025+ struct dentry *dentry;
23026+ struct inode *inode;
23027+ struct super_block *sb;
23028+
23029+ dentry = path->dentry;
23030+ inode = d_inode(dentry);
23031+ sb = dentry->d_sb;
23032+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
23033+ if (unlikely(err))
23034+ goto out;
23035+ err = au_h_path_getattr(dentry, /*inode*/NULL, /*force*/0, &h_path,
23036+ /*locked*/0);
23037+ if (unlikely(err))
23038+ goto out_si;
23039+ if (unlikely(!h_path.dentry))
23040+ /* illegally overlapped or something */
23041+ goto out_fill; /* pretending success */
23042+
23043+ positive = d_is_positive(h_path.dentry);
23044+ if (positive)
23045+ /* no vfsub version */
23046+ err = vfs_getattr(&h_path, st, request, query);
23047+ if (!err) {
23048+ if (positive)
23049+ au_refresh_iattr(inode, st,
23050+ d_inode(h_path.dentry)->i_nlink);
23051+ goto out_fill; /* success */
23052+ }
23053+ AuTraceErr(err);
23054+ goto out_di;
23055+
23056+out_fill:
23057+ generic_fillattr(userns, inode, st);
23058+out_di:
23059+ di_read_unlock(dentry, AuLock_IR);
23060+out_si:
23061+ si_read_unlock(sb);
23062+out:
23063+ AuTraceErr(err);
23064+ return err;
23065+}
23066+
23067+/* ---------------------------------------------------------------------- */
23068+
23069+static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
23070+ struct delayed_call *done)
23071+{
23072+ const char *ret;
23073+ struct dentry *h_dentry;
23074+ struct inode *h_inode;
23075+ int err;
23076+ aufs_bindex_t bindex;
23077+
23078+ ret = NULL; /* suppress a warning */
23079+ err = -ECHILD;
23080+ if (!dentry)
23081+ goto out;
23082+
23083+ err = aufs_read_lock(dentry, AuLock_IR | AuLock_GEN);
23084+ if (unlikely(err))
23085+ goto out;
23086+
23087+ err = au_d_hashed_positive(dentry);
23088+ if (unlikely(err))
23089+ goto out_unlock;
23090+
23091+ err = -EINVAL;
23092+ inode = d_inode(dentry);
23093+ bindex = au_ibtop(inode);
23094+ h_inode = au_h_iptr(inode, bindex);
23095+ if (unlikely(!h_inode->i_op->get_link))
23096+ goto out_unlock;
23097+
23098+ err = -EBUSY;
23099+ h_dentry = NULL;
23100+ if (au_dbtop(dentry) <= bindex) {
23101+ h_dentry = au_h_dptr(dentry, bindex);
23102+ if (h_dentry)
23103+ dget(h_dentry);
23104+ }
23105+ if (!h_dentry) {
23106+ h_dentry = d_find_any_alias(h_inode);
23107+ if (IS_ERR(h_dentry)) {
23108+ err = PTR_ERR(h_dentry);
23109+ goto out_unlock;
23110+ }
23111+ }
23112+ if (unlikely(!h_dentry))
23113+ goto out_unlock;
23114+
23115+ err = 0;
23116+ AuDbg("%ps\n", h_inode->i_op->get_link);
23117+ AuDbgDentry(h_dentry);
23118+ ret = vfs_get_link(h_dentry, done);
23119+ dput(h_dentry);
23120+ if (IS_ERR(ret))
23121+ err = PTR_ERR(ret);
23122+
23123+out_unlock:
23124+ aufs_read_unlock(dentry, AuLock_IR);
23125+out:
23126+ if (unlikely(err))
23127+ ret = ERR_PTR(err);
23128+ AuTraceErrPtr(ret);
23129+ return ret;
23130+}
23131+
23132+/* ---------------------------------------------------------------------- */
23133+
23134+static int au_is_special(struct inode *inode)
23135+{
23136+ return (inode->i_mode & (S_IFBLK | S_IFCHR | S_IFIFO | S_IFSOCK));
23137+}
23138+
23139+static int aufs_update_time(struct inode *inode, struct timespec64 *ts,
23140+ int flags)
23141+{
23142+ int err;
23143+ aufs_bindex_t bindex;
23144+ struct super_block *sb;
23145+ struct inode *h_inode;
23146+ struct vfsmount *h_mnt;
23147+
23148+ sb = inode->i_sb;
23149+ WARN_ONCE((flags & S_ATIME) && !IS_NOATIME(inode),
23150+ "unexpected s_flags 0x%lx", sb->s_flags);
23151+
23152+ /* mmap_sem might be acquired already, cf. aufs_mmap() */
23153+ lockdep_off();
23154+ si_read_lock(sb, AuLock_FLUSH);
23155+ ii_write_lock_child(inode);
23156+
23157+ err = 0;
23158+ bindex = au_ibtop(inode);
23159+ h_inode = au_h_iptr(inode, bindex);
23160+ if (!au_test_ro(sb, bindex, inode)) {
23161+ h_mnt = au_sbr_mnt(sb, bindex);
23162+ err = vfsub_mnt_want_write(h_mnt);
23163+ if (!err) {
23164+ err = vfsub_update_time(h_inode, ts, flags);
23165+ vfsub_mnt_drop_write(h_mnt);
23166+ }
23167+ } else if (au_is_special(h_inode)) {
23168+ /*
23169+ * Never copy-up here.
23170+ * These special files may already be opened and used for
23171+ * communicating. If we copied it up, then the communication
23172+ * would be corrupted.
23173+ */
23174+ AuWarn1("timestamps for i%lu are ignored "
23175+ "since it is on readonly branch (hi%lu).\n",
23176+ inode->i_ino, h_inode->i_ino);
23177+ } else if (flags & ~S_ATIME) {
23178+ err = -EIO;
23179+ AuIOErr1("unexpected flags 0x%x\n", flags);
23180+ AuDebugOn(1);
23181+ }
23182+
23183+ if (!err)
23184+ au_cpup_attr_timesizes(inode);
23185+ ii_write_unlock(inode);
23186+ si_read_unlock(sb);
23187+ lockdep_on();
23188+
23189+ if (!err && (flags & S_VERSION))
23190+ inode_inc_iversion(inode);
23191+
23192+ return err;
23193+}
23194+
23195+/* ---------------------------------------------------------------------- */
23196+
23197+/* no getattr version will be set by module.c:aufs_init() */
23198+struct inode_operations aufs_iop_nogetattr[AuIop_Last],
23199+ aufs_iop[] = {
23200+ [AuIop_SYMLINK] = {
23201+ .permission = aufs_permission,
23202+#ifdef CONFIG_FS_POSIX_ACL
23203+ .get_inode_acl = aufs_get_inode_acl,
23204+ .get_acl = aufs_get_acl,
23205+ .set_acl = aufs_set_acl, /* unsupport for symlink? */
23206+#endif
23207+
23208+ .setattr = aufs_setattr,
23209+ .getattr = aufs_getattr,
23210+
23211+#ifdef CONFIG_AUFS_XATTR
23212+ .listxattr = aufs_listxattr,
23213+#endif
23214+
23215+ .get_link = aufs_get_link
23216+
23217+ /* .update_time = aufs_update_time */
23218+ },
23219+ [AuIop_DIR] = {
23220+ .create = aufs_create,
23221+ .lookup = aufs_lookup,
23222+ .link = aufs_link,
23223+ .unlink = aufs_unlink,
23224+ .symlink = aufs_symlink,
23225+ .mkdir = aufs_mkdir,
23226+ .rmdir = aufs_rmdir,
23227+ .mknod = aufs_mknod,
23228+ .rename = aufs_rename,
23229+
23230+ .permission = aufs_permission,
23231+#ifdef CONFIG_FS_POSIX_ACL
23232+ .get_inode_acl = aufs_get_inode_acl,
23233+ .get_acl = aufs_get_acl,
23234+ .set_acl = aufs_set_acl,
23235+#endif
23236+
23237+ .setattr = aufs_setattr,
23238+ .getattr = aufs_getattr,
23239+
23240+#ifdef CONFIG_AUFS_XATTR
23241+ .listxattr = aufs_listxattr,
23242+#endif
23243+
23244+ .update_time = aufs_update_time,
23245+ .atomic_open = aufs_atomic_open,
23246+ .tmpfile = aufs_tmpfile
23247+ },
23248+ [AuIop_OTHER] = {
23249+ .permission = aufs_permission,
23250+#ifdef CONFIG_FS_POSIX_ACL
23251+ .get_inode_acl = aufs_get_inode_acl,
23252+ .get_acl = aufs_get_acl,
23253+ .set_acl = aufs_set_acl,
23254+#endif
23255+
23256+ .setattr = aufs_setattr,
23257+ .getattr = aufs_getattr,
23258+
23259+#ifdef CONFIG_AUFS_XATTR
23260+ .listxattr = aufs_listxattr,
23261+#endif
23262+
23263+ .update_time = aufs_update_time
23264+ }
23265+};
23266diff -urN /usr/share/empty/fs/aufs/i_op_del.c linux/fs/aufs/i_op_del.c
23267--- /usr/share/empty/fs/aufs/i_op_del.c 1970-01-01 01:00:00.000000000 +0100
23268+++ linux/fs/aufs/i_op_del.c 2022-11-05 23:02:18.965889284 +0100
23269@@ -0,0 +1,522 @@
23270+// SPDX-License-Identifier: GPL-2.0
23271+/*
23272+ * Copyright (C) 2005-2022 Junjiro R. Okajima
23273+ *
23274+ * This program is free software; you can redistribute it and/or modify
23275+ * it under the terms of the GNU General Public License as published by
23276+ * the Free Software Foundation; either version 2 of the License, or
23277+ * (at your option) any later version.
23278+ *
23279+ * This program is distributed in the hope that it will be useful,
23280+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23281+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23282+ * GNU General Public License for more details.
23283+ *
23284+ * You should have received a copy of the GNU General Public License
23285+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
23286+ */
23287+
23288+/*
23289+ * inode operations (del entry)
23290+ */
23291+
23292+#include <linux/iversion.h>
23293+#include "aufs.h"
23294+
23295+/*
23296+ * decide if a new whiteout for @dentry is necessary or not.
23297+ * when it is necessary, prepare the parent dir for the upper branch whose
23298+ * branch index is @bcpup for creation. the actual creation of the whiteout will
23299+ * be done by caller.
23300+ * return value:
23301+ * 0: wh is unnecessary
23302+ * plus: wh is necessary
23303+ * minus: error
23304+ */
23305+int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
23306+{
23307+ int need_wh, err;
23308+ aufs_bindex_t btop;
23309+ struct super_block *sb;
23310+
23311+ sb = dentry->d_sb;
23312+ btop = au_dbtop(dentry);
23313+ if (*bcpup < 0) {
23314+ *bcpup = btop;
23315+ if (au_test_ro(sb, btop, d_inode(dentry))) {
23316+ err = AuWbrCopyup(au_sbi(sb), dentry);
23317+ *bcpup = err;
23318+ if (unlikely(err < 0))
23319+ goto out;
23320+ }
23321+ } else
23322+ AuDebugOn(btop < *bcpup
23323+ || au_test_ro(sb, *bcpup, d_inode(dentry)));
23324+ AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
23325+
23326+ if (*bcpup != btop) {
23327+ err = au_cpup_dirs(dentry, *bcpup);
23328+ if (unlikely(err))
23329+ goto out;
23330+ need_wh = 1;
23331+ } else {
23332+ struct au_dinfo *dinfo, *tmp;
23333+
23334+ need_wh = -ENOMEM;
23335+ dinfo = au_di(dentry);
23336+ tmp = au_di_alloc(sb, AuLsc_DI_TMP);
23337+ if (tmp) {
23338+ au_di_cp(tmp, dinfo);
23339+ au_di_swap(tmp, dinfo);
23340+ /* returns the number of positive dentries */
23341+ need_wh = au_lkup_dentry(dentry, btop + 1,
23342+ /* AuLkup_IGNORE_PERM */ 0);
23343+ au_di_swap(tmp, dinfo);
23344+ au_rw_write_unlock(&tmp->di_rwsem);
23345+ au_di_free(tmp);
23346+ }
23347+ }
23348+ AuDbg("need_wh %d\n", need_wh);
23349+ err = need_wh;
23350+
23351+out:
23352+ return err;
23353+}
23354+
23355+/*
23356+ * simple tests for the del-entry operations.
23357+ * following the checks in vfs, plus the parent-child relationship.
23358+ */
23359+int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
23360+ struct dentry *h_parent, int isdir)
23361+{
23362+ int err;
23363+ umode_t h_mode;
23364+ struct dentry *h_dentry, *h_latest;
23365+ struct inode *h_inode;
23366+ struct path h_ppath;
23367+ struct super_block *sb;
23368+ struct au_branch *br;
23369+ struct user_namespace *h_userns;
23370+
23371+ h_dentry = au_h_dptr(dentry, bindex);
23372+ if (d_really_is_positive(dentry)) {
23373+ err = -ENOENT;
23374+ if (unlikely(d_is_negative(h_dentry)))
23375+ goto out;
23376+ h_inode = d_inode(h_dentry);
23377+ if (unlikely(!h_inode->i_nlink))
23378+ goto out;
23379+
23380+ h_mode = h_inode->i_mode;
23381+ if (!isdir) {
23382+ err = -EISDIR;
23383+ if (unlikely(S_ISDIR(h_mode)))
23384+ goto out;
23385+ } else if (unlikely(!S_ISDIR(h_mode))) {
23386+ err = -ENOTDIR;
23387+ goto out;
23388+ }
23389+ } else {
23390+ /* rename(2) case */
23391+ err = -EIO;
23392+ if (unlikely(d_is_positive(h_dentry)))
23393+ goto out;
23394+ }
23395+
23396+ err = -ENOENT;
23397+ /* expected parent dir is locked */
23398+ if (unlikely(h_parent != h_dentry->d_parent))
23399+ goto out;
23400+ err = 0;
23401+
23402+ /*
23403+ * rmdir a dir may break the consistency on some filesystem.
23404+ * let's try heavy test.
23405+ */
23406+ err = -EACCES;
23407+ sb = dentry->d_sb;
23408+ br = au_sbr(sb, bindex);
23409+ h_userns = au_br_userns(br);
23410+ if (unlikely(!au_opt_test(au_mntflags(sb), DIRPERM1)
23411+ && au_test_h_perm(h_userns, d_inode(h_parent),
23412+ MAY_EXEC | MAY_WRITE)))
23413+ goto out;
23414+
23415+ h_ppath.dentry = h_parent;
23416+ h_ppath.mnt = au_br_mnt(br);
23417+ h_latest = au_sio_lkup_one(h_userns, &dentry->d_name, &h_ppath);
23418+ err = -EIO;
23419+ if (IS_ERR(h_latest))
23420+ goto out;
23421+ if (h_latest == h_dentry)
23422+ err = 0;
23423+ dput(h_latest);
23424+
23425+out:
23426+ return err;
23427+}
23428+
23429+/*
23430+ * decide the branch where we operate for @dentry. the branch index will be set
23431+ * @rbcpup. after deciding it, 'pin' it and store the timestamps of the parent
23432+ * dir for reverting.
23433+ * when a new whiteout is necessary, create it.
23434+ */
23435+static struct dentry*
23436+lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
23437+ struct au_dtime *dt, struct au_pin *pin)
23438+{
23439+ struct dentry *wh_dentry;
23440+ struct super_block *sb;
23441+ struct path h_path;
23442+ int err, need_wh;
23443+ unsigned int udba;
23444+ aufs_bindex_t bcpup;
23445+
23446+ need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
23447+ wh_dentry = ERR_PTR(need_wh);
23448+ if (unlikely(need_wh < 0))
23449+ goto out;
23450+
23451+ sb = dentry->d_sb;
23452+ udba = au_opt_udba(sb);
23453+ bcpup = *rbcpup;
23454+ err = au_pin(pin, dentry, bcpup, udba,
23455+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
23456+ wh_dentry = ERR_PTR(err);
23457+ if (unlikely(err))
23458+ goto out;
23459+
23460+ h_path.dentry = au_pinned_h_parent(pin);
23461+ if (udba != AuOpt_UDBA_NONE
23462+ && au_dbtop(dentry) == bcpup) {
23463+ err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
23464+ wh_dentry = ERR_PTR(err);
23465+ if (unlikely(err))
23466+ goto out_unpin;
23467+ }
23468+
23469+ h_path.mnt = au_sbr_mnt(sb, bcpup);
23470+ au_dtime_store(dt, au_pinned_parent(pin), &h_path);
23471+ wh_dentry = NULL;
23472+ if (!need_wh)
23473+ goto out; /* success, no need to create whiteout */
23474+
23475+ wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
23476+ if (IS_ERR(wh_dentry))
23477+ goto out_unpin;
23478+
23479+ /* returns with the parent is locked and wh_dentry is dget-ed */
23480+ goto out; /* success */
23481+
23482+out_unpin:
23483+ au_unpin(pin);
23484+out:
23485+ return wh_dentry;
23486+}
23487+
23488+/*
23489+ * when removing a dir, rename it to a unique temporary whiteout-ed name first
23490+ * in order to be revertible and save time for removing many child whiteouts
23491+ * under the dir.
23492+ * returns 1 when there are too many child whiteout and caller should remove
23493+ * them asynchronously. returns 0 when the number of children is enough small to
23494+ * remove now or the branch fs is a remote fs.
23495+ * otherwise return an error.
23496+ */
23497+static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
23498+ struct au_nhash *whlist, struct inode *dir)
23499+{
23500+ int rmdir_later, err, dirwh;
23501+ struct dentry *h_dentry;
23502+ struct super_block *sb;
23503+ struct inode *inode;
23504+
23505+ sb = dentry->d_sb;
23506+ SiMustAnyLock(sb);
23507+ h_dentry = au_h_dptr(dentry, bindex);
23508+ err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
23509+ if (unlikely(err))
23510+ goto out;
23511+
23512+ /* stop monitoring */
23513+ inode = d_inode(dentry);
23514+ au_hn_free(au_hi(inode, bindex));
23515+
23516+ if (!au_test_fs_remote(h_dentry->d_sb)) {
23517+ dirwh = au_sbi(sb)->si_dirwh;
23518+ rmdir_later = (dirwh <= 1);
23519+ if (!rmdir_later)
23520+ rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
23521+ dirwh);
23522+ if (rmdir_later)
23523+ return rmdir_later;
23524+ }
23525+
23526+ err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
23527+ if (unlikely(err)) {
23528+ AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
23529+ h_dentry, bindex, err);
23530+ err = 0;
23531+ }
23532+
23533+out:
23534+ AuTraceErr(err);
23535+ return err;
23536+}
23537+
23538+/*
23539+ * final procedure for deleting a entry.
23540+ * maintain dentry and iattr.
23541+ */
23542+static void epilog(struct inode *dir, struct dentry *dentry,
23543+ aufs_bindex_t bindex)
23544+{
23545+ struct inode *inode;
23546+
23547+ inode = d_inode(dentry);
23548+ d_drop(dentry);
23549+ inode->i_ctime = dir->i_ctime;
23550+
23551+ au_dir_ts(dir, bindex);
23552+ inode_inc_iversion(dir);
23553+}
23554+
23555+/*
23556+ * when an error happened, remove the created whiteout and revert everything.
23557+ */
23558+static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
23559+ aufs_bindex_t bwh, struct dentry *wh_dentry,
23560+ struct dentry *dentry, struct au_dtime *dt)
23561+{
23562+ int rerr;
23563+ struct path h_path = {
23564+ .dentry = wh_dentry,
23565+ .mnt = au_sbr_mnt(dir->i_sb, bindex)
23566+ };
23567+
23568+ rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
23569+ if (!rerr) {
23570+ au_set_dbwh(dentry, bwh);
23571+ au_dtime_revert(dt);
23572+ return 0;
23573+ }
23574+
23575+ AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
23576+ return -EIO;
23577+}
23578+
23579+/* ---------------------------------------------------------------------- */
23580+
23581+int aufs_unlink(struct inode *dir, struct dentry *dentry)
23582+{
23583+ int err;
23584+ aufs_bindex_t bwh, bindex, btop;
23585+ struct inode *inode, *h_dir, *delegated;
23586+ struct dentry *parent, *wh_dentry;
23587+ /* to reduce stack size */
23588+ struct {
23589+ struct au_dtime dt;
23590+ struct au_pin pin;
23591+ struct path h_path;
23592+ } *a;
23593+
23594+ IMustLock(dir);
23595+
23596+ err = -ENOMEM;
23597+ a = kmalloc(sizeof(*a), GFP_NOFS);
23598+ if (unlikely(!a))
23599+ goto out;
23600+
23601+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
23602+ if (unlikely(err))
23603+ goto out_free;
23604+ err = au_d_hashed_positive(dentry);
23605+ if (unlikely(err))
23606+ goto out_unlock;
23607+ inode = d_inode(dentry);
23608+ IMustLock(inode);
23609+ err = -EISDIR;
23610+ if (unlikely(d_is_dir(dentry)))
23611+ goto out_unlock; /* possible? */
23612+
23613+ btop = au_dbtop(dentry);
23614+ bwh = au_dbwh(dentry);
23615+ bindex = -1;
23616+ parent = dentry->d_parent; /* dir inode is locked */
23617+ di_write_lock_parent(parent);
23618+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
23619+ &a->pin);
23620+ err = PTR_ERR(wh_dentry);
23621+ if (IS_ERR(wh_dentry))
23622+ goto out_parent;
23623+
23624+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
23625+ a->h_path.dentry = au_h_dptr(dentry, btop);
23626+ dget(a->h_path.dentry);
23627+ if (bindex == btop) {
23628+ h_dir = au_pinned_h_dir(&a->pin);
23629+ delegated = NULL;
23630+ err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
23631+ if (unlikely(err == -EWOULDBLOCK)) {
23632+ pr_warn("cannot retry for NFSv4 delegation"
23633+ " for an internal unlink\n");
23634+ iput(delegated);
23635+ }
23636+ } else {
23637+ /* dir inode is locked */
23638+ h_dir = d_inode(wh_dentry->d_parent);
23639+ IMustLock(h_dir);
23640+ err = 0;
23641+ }
23642+
23643+ if (!err) {
23644+ vfsub_drop_nlink(inode);
23645+ epilog(dir, dentry, bindex);
23646+
23647+ /* update target timestamps */
23648+ if (bindex == btop) {
23649+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
23650+ /*ignore*/
23651+ inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
23652+ } else
23653+ /* todo: this timestamp may be reverted later */
23654+ inode->i_ctime = h_dir->i_ctime;
23655+ goto out_unpin; /* success */
23656+ }
23657+
23658+ /* revert */
23659+ if (wh_dentry) {
23660+ int rerr;
23661+
23662+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
23663+ &a->dt);
23664+ if (rerr)
23665+ err = rerr;
23666+ }
23667+
23668+out_unpin:
23669+ au_unpin(&a->pin);
23670+ dput(wh_dentry);
23671+ dput(a->h_path.dentry);
23672+out_parent:
23673+ di_write_unlock(parent);
23674+out_unlock:
23675+ aufs_read_unlock(dentry, AuLock_DW);
23676+out_free:
23677+ au_kfree_rcu(a);
23678+out:
23679+ return err;
23680+}
23681+
23682+int aufs_rmdir(struct inode *dir, struct dentry *dentry)
23683+{
23684+ int err, rmdir_later;
23685+ aufs_bindex_t bwh, bindex, btop;
23686+ struct inode *inode;
23687+ struct dentry *parent, *wh_dentry, *h_dentry;
23688+ struct au_whtmp_rmdir *args;
23689+ /* to reduce stack size */
23690+ struct {
23691+ struct au_dtime dt;
23692+ struct au_pin pin;
23693+ } *a;
23694+
23695+ IMustLock(dir);
23696+
23697+ err = -ENOMEM;
23698+ a = kmalloc(sizeof(*a), GFP_NOFS);
23699+ if (unlikely(!a))
23700+ goto out;
23701+
23702+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
23703+ if (unlikely(err))
23704+ goto out_free;
23705+ err = au_alive_dir(dentry);
23706+ if (unlikely(err))
23707+ goto out_unlock;
23708+ inode = d_inode(dentry);
23709+ IMustLock(inode);
23710+ err = -ENOTDIR;
23711+ if (unlikely(!d_is_dir(dentry)))
23712+ goto out_unlock; /* possible? */
23713+
23714+ err = -ENOMEM;
23715+ args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
23716+ if (unlikely(!args))
23717+ goto out_unlock;
23718+
23719+ parent = dentry->d_parent; /* dir inode is locked */
23720+ di_write_lock_parent(parent);
23721+ err = au_test_empty(dentry, &args->whlist);
23722+ if (unlikely(err))
23723+ goto out_parent;
23724+
23725+ btop = au_dbtop(dentry);
23726+ bwh = au_dbwh(dentry);
23727+ bindex = -1;
23728+ wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
23729+ &a->pin);
23730+ err = PTR_ERR(wh_dentry);
23731+ if (IS_ERR(wh_dentry))
23732+ goto out_parent;
23733+
23734+ h_dentry = au_h_dptr(dentry, btop);
23735+ dget(h_dentry);
23736+ rmdir_later = 0;
23737+ if (bindex == btop) {
23738+ err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
23739+ if (err > 0) {
23740+ rmdir_later = err;
23741+ err = 0;
23742+ }
23743+ } else {
23744+ /* stop monitoring */
23745+ au_hn_free(au_hi(inode, btop));
23746+
23747+ /* dir inode is locked */
23748+ IMustLock(d_inode(wh_dentry->d_parent));
23749+ err = 0;
23750+ }
23751+
23752+ if (!err) {
23753+ vfsub_dead_dir(inode);
23754+ au_set_dbdiropq(dentry, -1);
23755+ epilog(dir, dentry, bindex);
23756+
23757+ if (rmdir_later) {
23758+ au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
23759+ args = NULL;
23760+ }
23761+
23762+ goto out_unpin; /* success */
23763+ }
23764+
23765+ /* revert */
23766+ AuLabel(revert);
23767+ if (wh_dentry) {
23768+ int rerr;
23769+
23770+ rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
23771+ &a->dt);
23772+ if (rerr)
23773+ err = rerr;
23774+ }
23775+
23776+out_unpin:
23777+ au_unpin(&a->pin);
23778+ dput(wh_dentry);
23779+ dput(h_dentry);
23780+out_parent:
23781+ di_write_unlock(parent);
23782+ if (args)
23783+ au_whtmp_rmdir_free(args);
23784+out_unlock:
23785+ aufs_read_unlock(dentry, AuLock_DW);
23786+out_free:
23787+ au_kfree_rcu(a);
23788+out:
23789+ AuTraceErr(err);
23790+ return err;
23791+}
23792diff -urN /usr/share/empty/fs/aufs/i_op_ren.c linux/fs/aufs/i_op_ren.c
23793--- /usr/share/empty/fs/aufs/i_op_ren.c 1970-01-01 01:00:00.000000000 +0100
23794+++ linux/fs/aufs/i_op_ren.c 2022-11-05 23:02:18.965889284 +0100
23795@@ -0,0 +1,1257 @@
23796+// SPDX-License-Identifier: GPL-2.0
23797+/*
23798+ * Copyright (C) 2005-2022 Junjiro R. Okajima
23799+ *
23800+ * This program is free software; you can redistribute it and/or modify
23801+ * it under the terms of the GNU General Public License as published by
23802+ * the Free Software Foundation; either version 2 of the License, or
23803+ * (at your option) any later version.
23804+ *
23805+ * This program is distributed in the hope that it will be useful,
23806+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
23807+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23808+ * GNU General Public License for more details.
23809+ *
23810+ * You should have received a copy of the GNU General Public License
23811+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
23812+ */
23813+
23814+/*
23815+ * inode operation (rename entry)
23816+ * todo: this is crazy monster
23817+ */
23818+
23819+#include <linux/iversion.h>
23820+#include "aufs.h"
23821+
23822+enum { AuSRC, AuDST, AuSrcDst };
23823+enum { AuPARENT, AuCHILD, AuParentChild };
23824+
23825+#define AuRen_ISDIR_SRC 1
23826+#define AuRen_ISDIR_DST (1 << 1)
23827+#define AuRen_ISSAMEDIR (1 << 2)
23828+#define AuRen_WHSRC (1 << 3)
23829+#define AuRen_WHDST (1 << 4)
23830+#define AuRen_MNT_WRITE (1 << 5)
23831+#define AuRen_DT_DSTDIR (1 << 6)
23832+#define AuRen_DIROPQ_SRC (1 << 7)
23833+#define AuRen_DIROPQ_DST (1 << 8)
23834+#define AuRen_DIRREN (1 << 9)
23835+#define AuRen_DROPPED_SRC (1 << 10)
23836+#define AuRen_DROPPED_DST (1 << 11)
23837+#define au_ftest_ren(flags, name) ((flags) & AuRen_##name)
23838+#define au_fset_ren(flags, name) \
23839+ do { (flags) |= AuRen_##name; } while (0)
23840+#define au_fclr_ren(flags, name) \
23841+ do { (flags) &= ~AuRen_##name; } while (0)
23842+
23843+#ifndef CONFIG_AUFS_DIRREN
23844+#undef AuRen_DIRREN
23845+#define AuRen_DIRREN 0
23846+#endif
23847+
23848+struct au_ren_args {
23849+ struct {
23850+ struct dentry *dentry, *h_dentry, *parent, *h_parent,
23851+ *wh_dentry;
23852+ struct inode *dir, *inode;
23853+ struct au_hinode *hdir, *hinode;
23854+ struct au_dtime dt[AuParentChild];
23855+ aufs_bindex_t btop, bdiropq;
23856+ } sd[AuSrcDst];
23857+
23858+#define src_dentry sd[AuSRC].dentry
23859+#define src_dir sd[AuSRC].dir
23860+#define src_inode sd[AuSRC].inode
23861+#define src_h_dentry sd[AuSRC].h_dentry
23862+#define src_parent sd[AuSRC].parent
23863+#define src_h_parent sd[AuSRC].h_parent
23864+#define src_wh_dentry sd[AuSRC].wh_dentry
23865+#define src_hdir sd[AuSRC].hdir
23866+#define src_hinode sd[AuSRC].hinode
23867+#define src_h_dir sd[AuSRC].hdir->hi_inode
23868+#define src_dt sd[AuSRC].dt
23869+#define src_btop sd[AuSRC].btop
23870+#define src_bdiropq sd[AuSRC].bdiropq
23871+
23872+#define dst_dentry sd[AuDST].dentry
23873+#define dst_dir sd[AuDST].dir
23874+#define dst_inode sd[AuDST].inode
23875+#define dst_h_dentry sd[AuDST].h_dentry
23876+#define dst_parent sd[AuDST].parent
23877+#define dst_h_parent sd[AuDST].h_parent
23878+#define dst_wh_dentry sd[AuDST].wh_dentry
23879+#define dst_hdir sd[AuDST].hdir
23880+#define dst_hinode sd[AuDST].hinode
23881+#define dst_h_dir sd[AuDST].hdir->hi_inode
23882+#define dst_dt sd[AuDST].dt
23883+#define dst_btop sd[AuDST].btop
23884+#define dst_bdiropq sd[AuDST].bdiropq
23885+
23886+ struct dentry *h_trap;
23887+ struct au_branch *br;
23888+ struct path h_path;
23889+ struct au_nhash whlist;
23890+ aufs_bindex_t btgt, src_bwh;
23891+
23892+ struct {
23893+ unsigned short auren_flags;
23894+ unsigned char flags; /* syscall parameter */
23895+ unsigned char exchange;
23896+ } __packed;
23897+
23898+ struct au_whtmp_rmdir *thargs;
23899+ struct dentry *h_dst;
23900+ struct au_hinode *h_root;
23901+};
23902+
23903+/* ---------------------------------------------------------------------- */
23904+
23905+/*
23906+ * functions for reverting.
23907+ * when an error happened in a single rename systemcall, we should revert
23908+ * everything as if nothing happened.
23909+ * we don't need to revert the copied-up/down the parent dir since they are
23910+ * harmless.
23911+ */
23912+
23913+#define RevertFailure(fmt, ...) do { \
23914+ AuIOErr("revert failure: " fmt " (%d, %d)\n", \
23915+ ##__VA_ARGS__, err, rerr); \
23916+ err = -EIO; \
23917+} while (0)
23918+
23919+static void au_ren_do_rev_diropq(int err, struct au_ren_args *a, int idx)
23920+{
23921+ int rerr;
23922+ struct dentry *d;
23923+#define src_or_dst(member) a->sd[idx].member
23924+
23925+ d = src_or_dst(dentry); /* {src,dst}_dentry */
23926+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
23927+ rerr = au_diropq_remove(d, a->btgt);
23928+ au_hn_inode_unlock(src_or_dst(hinode));
23929+ au_set_dbdiropq(d, src_or_dst(bdiropq));
23930+ if (rerr)
23931+ RevertFailure("remove diropq %pd", d);
23932+
23933+#undef src_or_dst_
23934+}
23935+
23936+static void au_ren_rev_diropq(int err, struct au_ren_args *a)
23937+{
23938+ if (au_ftest_ren(a->auren_flags, DIROPQ_SRC))
23939+ au_ren_do_rev_diropq(err, a, AuSRC);
23940+ if (au_ftest_ren(a->auren_flags, DIROPQ_DST))
23941+ au_ren_do_rev_diropq(err, a, AuDST);
23942+}
23943+
23944+static void au_ren_rev_rename(int err, struct au_ren_args *a)
23945+{
23946+ int rerr;
23947+ struct inode *delegated;
23948+ struct path h_ppath = {
23949+ .dentry = a->src_h_parent,
23950+ .mnt = a->h_path.mnt
23951+ };
23952+
23953+ a->h_path.dentry = vfsub_lkup_one(&a->src_dentry->d_name, &h_ppath);
23954+ rerr = PTR_ERR(a->h_path.dentry);
23955+ if (IS_ERR(a->h_path.dentry)) {
23956+ RevertFailure("lkup one %pd", a->src_dentry);
23957+ return;
23958+ }
23959+
23960+ delegated = NULL;
23961+ rerr = vfsub_rename(a->dst_h_dir,
23962+ au_h_dptr(a->src_dentry, a->btgt),
23963+ a->src_h_dir, &a->h_path, &delegated, a->flags);
23964+ if (unlikely(rerr == -EWOULDBLOCK)) {
23965+ pr_warn("cannot retry for NFSv4 delegation"
23966+ " for an internal rename\n");
23967+ iput(delegated);
23968+ }
23969+ d_drop(a->h_path.dentry);
23970+ dput(a->h_path.dentry);
23971+ /* au_set_h_dptr(a->src_dentry, a->btgt, NULL); */
23972+ if (rerr)
23973+ RevertFailure("rename %pd", a->src_dentry);
23974+}
23975+
23976+static void au_ren_rev_whtmp(int err, struct au_ren_args *a)
23977+{
23978+ int rerr;
23979+ struct inode *delegated;
23980+ struct path h_ppath = {
23981+ .dentry = a->dst_h_parent,
23982+ .mnt = a->h_path.mnt
23983+ };
23984+
23985+ a->h_path.dentry = vfsub_lkup_one(&a->dst_dentry->d_name, &h_ppath);
23986+ rerr = PTR_ERR(a->h_path.dentry);
23987+ if (IS_ERR(a->h_path.dentry)) {
23988+ RevertFailure("lkup one %pd", a->dst_dentry);
23989+ return;
23990+ }
23991+ if (d_is_positive(a->h_path.dentry)) {
23992+ d_drop(a->h_path.dentry);
23993+ dput(a->h_path.dentry);
23994+ return;
23995+ }
23996+
23997+ delegated = NULL;
23998+ rerr = vfsub_rename(a->dst_h_dir, a->h_dst, a->dst_h_dir, &a->h_path,
23999+ &delegated, a->flags);
24000+ if (unlikely(rerr == -EWOULDBLOCK)) {
24001+ pr_warn("cannot retry for NFSv4 delegation"
24002+ " for an internal rename\n");
24003+ iput(delegated);
24004+ }
24005+ d_drop(a->h_path.dentry);
24006+ dput(a->h_path.dentry);
24007+ if (!rerr)
24008+ au_set_h_dptr(a->dst_dentry, a->btgt, dget(a->h_dst));
24009+ else
24010+ RevertFailure("rename %pd", a->h_dst);
24011+}
24012+
24013+static void au_ren_rev_whsrc(int err, struct au_ren_args *a)
24014+{
24015+ int rerr;
24016+
24017+ a->h_path.dentry = a->src_wh_dentry;
24018+ rerr = au_wh_unlink_dentry(a->src_h_dir, &a->h_path, a->src_dentry);
24019+ au_set_dbwh(a->src_dentry, a->src_bwh);
24020+ if (rerr)
24021+ RevertFailure("unlink %pd", a->src_wh_dentry);
24022+}
24023+#undef RevertFailure
24024+
24025+/* ---------------------------------------------------------------------- */
24026+
24027+/*
24028+ * when we have to copyup the renaming entry, do it with the rename-target name
24029+ * in order to minimize the cost (the later actual rename is unnecessary).
24030+ * otherwise rename it on the target branch.
24031+ */
24032+static int au_ren_or_cpup(struct au_ren_args *a)
24033+{
24034+ int err;
24035+ struct dentry *d;
24036+ struct inode *delegated;
24037+
24038+ d = a->src_dentry;
24039+ if (au_dbtop(d) == a->btgt) {
24040+ a->h_path.dentry = a->dst_h_dentry;
24041+ AuDebugOn(au_dbtop(d) != a->btgt);
24042+ delegated = NULL;
24043+ err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
24044+ a->dst_h_dir, &a->h_path, &delegated,
24045+ a->flags);
24046+ if (unlikely(err == -EWOULDBLOCK)) {
24047+ pr_warn("cannot retry for NFSv4 delegation"
24048+ " for an internal rename\n");
24049+ iput(delegated);
24050+ }
24051+ } else
24052+ BUG();
24053+
24054+ if (!err && a->h_dst)
24055+ /* it will be set to dinfo later */
24056+ dget(a->h_dst);
24057+
24058+ return err;
24059+}
24060+
24061+/* cf. aufs_rmdir() */
24062+static int au_ren_del_whtmp(struct au_ren_args *a)
24063+{
24064+ int err;
24065+ struct inode *dir;
24066+
24067+ dir = a->dst_dir;
24068+ SiMustAnyLock(dir->i_sb);
24069+ if (!au_nhash_test_longer_wh(&a->whlist, a->btgt,
24070+ au_sbi(dir->i_sb)->si_dirwh)
24071+ || au_test_fs_remote(a->h_dst->d_sb)) {
24072+ err = au_whtmp_rmdir(dir, a->btgt, a->h_dst, &a->whlist);
24073+ if (unlikely(err))
24074+ pr_warn("failed removing whtmp dir %pd (%d), "
24075+ "ignored.\n", a->h_dst, err);
24076+ } else {
24077+ au_nhash_wh_free(&a->thargs->whlist);
24078+ a->thargs->whlist = a->whlist;
24079+ a->whlist.nh_num = 0;
24080+ au_whtmp_kick_rmdir(dir, a->btgt, a->h_dst, a->thargs);
24081+ dput(a->h_dst);
24082+ a->thargs = NULL;
24083+ }
24084+
24085+ return 0;
24086+}
24087+
24088+/* make it 'opaque' dir. */
24089+static int au_ren_do_diropq(struct au_ren_args *a, int idx)
24090+{
24091+ int err;
24092+ struct dentry *d, *diropq;
24093+#define src_or_dst(member) a->sd[idx].member
24094+
24095+ err = 0;
24096+ d = src_or_dst(dentry); /* {src,dst}_dentry */
24097+ src_or_dst(bdiropq) = au_dbdiropq(d);
24098+ src_or_dst(hinode) = au_hi(src_or_dst(inode), a->btgt);
24099+ au_hn_inode_lock_nested(src_or_dst(hinode), AuLsc_I_CHILD);
24100+ diropq = au_diropq_create(d, a->btgt);
24101+ au_hn_inode_unlock(src_or_dst(hinode));
24102+ if (IS_ERR(diropq))
24103+ err = PTR_ERR(diropq);
24104+ else
24105+ dput(diropq);
24106+
24107+#undef src_or_dst_
24108+ return err;
24109+}
24110+
24111+static int au_ren_diropq(struct au_ren_args *a)
24112+{
24113+ int err;
24114+ unsigned char always;
24115+ struct dentry *d;
24116+
24117+ err = 0;
24118+ d = a->dst_dentry; /* already renamed on the branch */
24119+ always = !!au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ);
24120+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
24121+ && !au_ftest_ren(a->auren_flags, DIRREN)
24122+ && a->btgt != au_dbdiropq(a->src_dentry)
24123+ && (a->dst_wh_dentry
24124+ || a->btgt <= au_dbdiropq(d)
24125+ /* hide the lower to keep xino */
24126+ /* the lowers may not be a dir, but we hide them anyway */
24127+ || a->btgt < au_dbbot(d)
24128+ || always)) {
24129+ AuDbg("here\n");
24130+ err = au_ren_do_diropq(a, AuSRC);
24131+ if (unlikely(err))
24132+ goto out;
24133+ au_fset_ren(a->auren_flags, DIROPQ_SRC);
24134+ }
24135+ if (!a->exchange)
24136+ goto out; /* success */
24137+
24138+ d = a->src_dentry; /* already renamed on the branch */
24139+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
24140+ && a->btgt != au_dbdiropq(a->dst_dentry)
24141+ && (a->btgt < au_dbdiropq(d)
24142+ || a->btgt < au_dbbot(d)
24143+ || always)) {
24144+ AuDbgDentry(a->src_dentry);
24145+ AuDbgDentry(a->dst_dentry);
24146+ err = au_ren_do_diropq(a, AuDST);
24147+ if (unlikely(err))
24148+ goto out_rev_src;
24149+ au_fset_ren(a->auren_flags, DIROPQ_DST);
24150+ }
24151+ goto out; /* success */
24152+
24153+out_rev_src:
24154+ AuDbg("err %d, reverting src\n", err);
24155+ au_ren_rev_diropq(err, a);
24156+out:
24157+ return err;
24158+}
24159+
24160+static int do_rename(struct au_ren_args *a)
24161+{
24162+ int err;
24163+ struct dentry *d, *h_d;
24164+
24165+ if (!a->exchange) {
24166+ /* prepare workqueue args for asynchronous rmdir */
24167+ h_d = a->dst_h_dentry;
24168+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)
24169+ /* && !au_ftest_ren(a->auren_flags, DIRREN) */
24170+ && d_is_positive(h_d)) {
24171+ err = -ENOMEM;
24172+ a->thargs = au_whtmp_rmdir_alloc(a->src_dentry->d_sb,
24173+ GFP_NOFS);
24174+ if (unlikely(!a->thargs))
24175+ goto out;
24176+ a->h_dst = dget(h_d);
24177+ }
24178+
24179+ /* create whiteout for src_dentry */
24180+ if (au_ftest_ren(a->auren_flags, WHSRC)) {
24181+ a->src_bwh = au_dbwh(a->src_dentry);
24182+ AuDebugOn(a->src_bwh >= 0);
24183+ a->src_wh_dentry = au_wh_create(a->src_dentry, a->btgt,
24184+ a->src_h_parent);
24185+ err = PTR_ERR(a->src_wh_dentry);
24186+ if (IS_ERR(a->src_wh_dentry))
24187+ goto out_thargs;
24188+ }
24189+
24190+ /* lookup whiteout for dentry */
24191+ if (au_ftest_ren(a->auren_flags, WHDST)) {
24192+ h_d = au_wh_lkup(a->dst_h_parent,
24193+ &a->dst_dentry->d_name, a->br);
24194+ err = PTR_ERR(h_d);
24195+ if (IS_ERR(h_d))
24196+ goto out_whsrc;
24197+ if (d_is_negative(h_d))
24198+ dput(h_d);
24199+ else
24200+ a->dst_wh_dentry = h_d;
24201+ }
24202+
24203+ /* rename dentry to tmpwh */
24204+ if (a->thargs) {
24205+ err = au_whtmp_ren(a->dst_h_dentry, a->br);
24206+ if (unlikely(err))
24207+ goto out_whdst;
24208+
24209+ d = a->dst_dentry;
24210+ au_set_h_dptr(d, a->btgt, NULL);
24211+ err = au_lkup_neg(d, a->btgt, /*wh*/0);
24212+ if (unlikely(err))
24213+ goto out_whtmp;
24214+ a->dst_h_dentry = au_h_dptr(d, a->btgt);
24215+ }
24216+ }
24217+
24218+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
24219+#if 0 /* debugging */
24220+ BUG_ON(!au_ftest_ren(a->auren_flags, DIRREN)
24221+ && d_is_positive(a->dst_h_dentry)
24222+ && a->src_btop != a->btgt);
24223+#endif
24224+
24225+ /* rename by vfs_rename or cpup */
24226+ err = au_ren_or_cpup(a);
24227+ if (unlikely(err))
24228+ /* leave the copied-up one */
24229+ goto out_whtmp;
24230+
24231+ /* make dir opaque */
24232+ err = au_ren_diropq(a);
24233+ if (unlikely(err))
24234+ goto out_rename;
24235+
24236+ /* update target timestamps */
24237+ if (a->exchange) {
24238+ AuDebugOn(au_dbtop(a->dst_dentry) != a->btgt);
24239+ a->h_path.dentry = au_h_dptr(a->dst_dentry, a->btgt);
24240+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
24241+ a->dst_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
24242+ }
24243+ AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
24244+ a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
24245+ vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
24246+ a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
24247+
24248+ if (!a->exchange) {
24249+ /* remove whiteout for dentry */
24250+ if (a->dst_wh_dentry) {
24251+ a->h_path.dentry = a->dst_wh_dentry;
24252+ err = au_wh_unlink_dentry(a->dst_h_dir, &a->h_path,
24253+ a->dst_dentry);
24254+ if (unlikely(err))
24255+ goto out_diropq;
24256+ }
24257+
24258+ /* remove whtmp */
24259+ if (a->thargs)
24260+ au_ren_del_whtmp(a); /* ignore this error */
24261+
24262+ au_fhsm_wrote(a->src_dentry->d_sb, a->btgt, /*force*/0);
24263+ }
24264+ err = 0;
24265+ goto out_success;
24266+
24267+out_diropq:
24268+ au_ren_rev_diropq(err, a);
24269+out_rename:
24270+ au_ren_rev_rename(err, a);
24271+ dput(a->h_dst);
24272+out_whtmp:
24273+ if (a->thargs)
24274+ au_ren_rev_whtmp(err, a);
24275+out_whdst:
24276+ dput(a->dst_wh_dentry);
24277+ a->dst_wh_dentry = NULL;
24278+out_whsrc:
24279+ if (a->src_wh_dentry)
24280+ au_ren_rev_whsrc(err, a);
24281+out_success:
24282+ dput(a->src_wh_dentry);
24283+ dput(a->dst_wh_dentry);
24284+out_thargs:
24285+ if (a->thargs) {
24286+ dput(a->h_dst);
24287+ au_whtmp_rmdir_free(a->thargs);
24288+ a->thargs = NULL;
24289+ }
24290+out:
24291+ return err;
24292+}
24293+
24294+/* ---------------------------------------------------------------------- */
24295+
24296+/*
24297+ * test if @dentry dir can be rename destination or not.
24298+ * success means, it is a logically empty dir.
24299+ */
24300+static int may_rename_dstdir(struct dentry *dentry, struct au_nhash *whlist)
24301+{
24302+ return au_test_empty(dentry, whlist);
24303+}
24304+
24305+/*
24306+ * test if @a->src_dentry dir can be rename source or not.
24307+ * if it can, return 0.
24308+ * success means,
24309+ * - it is a logically empty dir.
24310+ * - or, it exists on writable branch and has no children including whiteouts
24311+ * on the lower branch unless DIRREN is on.
24312+ */
24313+static int may_rename_srcdir(struct au_ren_args *a)
24314+{
24315+ int err;
24316+ unsigned int rdhash;
24317+ aufs_bindex_t btop, btgt;
24318+ struct dentry *dentry;
24319+ struct super_block *sb;
24320+ struct au_sbinfo *sbinfo;
24321+
24322+ dentry = a->src_dentry;
24323+ sb = dentry->d_sb;
24324+ sbinfo = au_sbi(sb);
24325+ if (au_opt_test(sbinfo->si_mntflags, DIRREN))
24326+ au_fset_ren(a->auren_flags, DIRREN);
24327+
24328+ btgt = a->btgt;
24329+ btop = au_dbtop(dentry);
24330+ if (btop != btgt) {
24331+ struct au_nhash whlist;
24332+
24333+ SiMustAnyLock(sb);
24334+ rdhash = sbinfo->si_rdhash;
24335+ if (!rdhash)
24336+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL,
24337+ dentry));
24338+ err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
24339+ if (unlikely(err))
24340+ goto out;
24341+ err = au_test_empty(dentry, &whlist);
24342+ au_nhash_wh_free(&whlist);
24343+ goto out;
24344+ }
24345+
24346+ if (btop == au_dbtaildir(dentry))
24347+ return 0; /* success */
24348+
24349+ err = au_test_empty_lower(dentry);
24350+
24351+out:
24352+ if (err == -ENOTEMPTY) {
24353+ if (au_ftest_ren(a->auren_flags, DIRREN)) {
24354+ err = 0;
24355+ } else {
24356+ AuWarn1("renaming dir who has child(ren) on multiple "
24357+ "branches, is not supported\n");
24358+ err = -EXDEV;
24359+ }
24360+ }
24361+ return err;
24362+}
24363+
24364+/* side effect: sets whlist and h_dentry */
24365+static int au_ren_may_dir(struct au_ren_args *a)
24366+{
24367+ int err;
24368+ unsigned int rdhash;
24369+ struct dentry *d;
24370+
24371+ d = a->dst_dentry;
24372+ SiMustAnyLock(d->d_sb);
24373+
24374+ err = 0;
24375+ if (au_ftest_ren(a->auren_flags, ISDIR_DST) && a->dst_inode) {
24376+ rdhash = au_sbi(d->d_sb)->si_rdhash;
24377+ if (!rdhash)
24378+ rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, d));
24379+ err = au_nhash_alloc(&a->whlist, rdhash, GFP_NOFS);
24380+ if (unlikely(err))
24381+ goto out;
24382+
24383+ if (!a->exchange) {
24384+ au_set_dbtop(d, a->dst_btop);
24385+ err = may_rename_dstdir(d, &a->whlist);
24386+ au_set_dbtop(d, a->btgt);
24387+ } else
24388+ err = may_rename_srcdir(a);
24389+ }
24390+ a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
24391+ if (unlikely(err))
24392+ goto out;
24393+
24394+ d = a->src_dentry;
24395+ a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
24396+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
24397+ err = may_rename_srcdir(a);
24398+ if (unlikely(err)) {
24399+ au_nhash_wh_free(&a->whlist);
24400+ a->whlist.nh_num = 0;
24401+ }
24402+ }
24403+out:
24404+ return err;
24405+}
24406+
24407+/* ---------------------------------------------------------------------- */
24408+
24409+/*
24410+ * simple tests for rename.
24411+ * following the checks in vfs, plus the parent-child relationship.
24412+ */
24413+static int au_may_ren(struct au_ren_args *a)
24414+{
24415+ int err, isdir;
24416+ struct inode *h_inode;
24417+
24418+ if (a->src_btop == a->btgt) {
24419+ err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
24420+ au_ftest_ren(a->auren_flags, ISDIR_SRC));
24421+ if (unlikely(err))
24422+ goto out;
24423+ err = -EINVAL;
24424+ if (unlikely(a->src_h_dentry == a->h_trap))
24425+ goto out;
24426+ }
24427+
24428+ err = 0;
24429+ if (a->dst_btop != a->btgt)
24430+ goto out;
24431+
24432+ err = -ENOTEMPTY;
24433+ if (unlikely(a->dst_h_dentry == a->h_trap))
24434+ goto out;
24435+
24436+ err = -EIO;
24437+ isdir = !!au_ftest_ren(a->auren_flags, ISDIR_DST);
24438+ if (d_really_is_negative(a->dst_dentry)) {
24439+ if (d_is_negative(a->dst_h_dentry))
24440+ err = au_may_add(a->dst_dentry, a->btgt,
24441+ a->dst_h_parent, isdir);
24442+ } else {
24443+ if (unlikely(d_is_negative(a->dst_h_dentry)))
24444+ goto out;
24445+ h_inode = d_inode(a->dst_h_dentry);
24446+ if (h_inode->i_nlink)
24447+ err = au_may_del(a->dst_dentry, a->btgt,
24448+ a->dst_h_parent, isdir);
24449+ }
24450+
24451+out:
24452+ if (unlikely(err == -ENOENT || err == -EEXIST))
24453+ err = -EIO;
24454+ AuTraceErr(err);
24455+ return err;
24456+}
24457+
24458+/* ---------------------------------------------------------------------- */
24459+
24460+/*
24461+ * locking order
24462+ * (VFS)
24463+ * - src_dir and dir by lock_rename()
24464+ * - inode if exists
24465+ * (aufs)
24466+ * - lock all
24467+ * + src_dentry and dentry by aufs_read_and_write_lock2() which calls,
24468+ * + si_read_lock
24469+ * + di_write_lock2_child()
24470+ * + di_write_lock_child()
24471+ * + ii_write_lock_child()
24472+ * + di_write_lock_child2()
24473+ * + ii_write_lock_child2()
24474+ * + src_parent and parent
24475+ * + di_write_lock_parent()
24476+ * + ii_write_lock_parent()
24477+ * + di_write_lock_parent2()
24478+ * + ii_write_lock_parent2()
24479+ * + lower src_dir and dir by vfsub_lock_rename()
24480+ * + verify the every relationships between child and parent. if any
24481+ * of them failed, unlock all and return -EBUSY.
24482+ */
24483+static void au_ren_unlock(struct au_ren_args *a)
24484+{
24485+ vfsub_unlock_rename(a->src_h_parent, a->src_hdir,
24486+ a->dst_h_parent, a->dst_hdir);
24487+ if (au_ftest_ren(a->auren_flags, DIRREN)
24488+ && a->h_root)
24489+ au_hn_inode_unlock(a->h_root);
24490+ if (au_ftest_ren(a->auren_flags, MNT_WRITE))
24491+ vfsub_mnt_drop_write(au_br_mnt(a->br));
24492+}
24493+
24494+static int au_ren_lock(struct au_ren_args *a)
24495+{
24496+ int err;
24497+ unsigned int udba;
24498+
24499+ err = 0;
24500+ a->src_h_parent = au_h_dptr(a->src_parent, a->btgt);
24501+ a->src_hdir = au_hi(a->src_dir, a->btgt);
24502+ a->dst_h_parent = au_h_dptr(a->dst_parent, a->btgt);
24503+ a->dst_hdir = au_hi(a->dst_dir, a->btgt);
24504+
24505+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
24506+ if (unlikely(err))
24507+ goto out;
24508+ au_fset_ren(a->auren_flags, MNT_WRITE);
24509+ if (au_ftest_ren(a->auren_flags, DIRREN)) {
24510+ struct dentry *root;
24511+ struct inode *dir;
24512+
24513+ /*
24514+ * sbinfo is already locked, so this ii_read_lock is
24515+ * unnecessary. but our debugging feature checks it.
24516+ */
24517+ root = a->src_inode->i_sb->s_root;
24518+ if (root != a->src_parent && root != a->dst_parent) {
24519+ dir = d_inode(root);
24520+ ii_read_lock_parent3(dir);
24521+ a->h_root = au_hi(dir, a->btgt);
24522+ ii_read_unlock(dir);
24523+ au_hn_inode_lock_nested(a->h_root, AuLsc_I_PARENT3);
24524+ }
24525+ }
24526+ a->h_trap = vfsub_lock_rename(a->src_h_parent, a->src_hdir,
24527+ a->dst_h_parent, a->dst_hdir);
24528+ udba = au_opt_udba(a->src_dentry->d_sb);
24529+ if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
24530+ || a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
24531+ err = au_busy_or_stale();
24532+ if (!err && au_dbtop(a->src_dentry) == a->btgt)
24533+ err = au_h_verify(a->src_h_dentry, udba,
24534+ d_inode(a->src_h_parent), a->src_h_parent,
24535+ a->br);
24536+ if (!err && au_dbtop(a->dst_dentry) == a->btgt)
24537+ err = au_h_verify(a->dst_h_dentry, udba,
24538+ d_inode(a->dst_h_parent), a->dst_h_parent,
24539+ a->br);
24540+ if (!err)
24541+ goto out; /* success */
24542+
24543+ err = au_busy_or_stale();
24544+ au_ren_unlock(a);
24545+
24546+out:
24547+ return err;
24548+}
24549+
24550+/* ---------------------------------------------------------------------- */
24551+
24552+static void au_ren_refresh_dir(struct au_ren_args *a)
24553+{
24554+ struct inode *dir;
24555+
24556+ dir = a->dst_dir;
24557+ inode_inc_iversion(dir);
24558+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)) {
24559+ /* is this updating defined in POSIX? */
24560+ au_cpup_attr_timesizes(a->src_inode);
24561+ au_cpup_attr_nlink(dir, /*force*/1);
24562+ }
24563+ au_dir_ts(dir, a->btgt);
24564+
24565+ if (a->exchange) {
24566+ dir = a->src_dir;
24567+ inode_inc_iversion(dir);
24568+ if (au_ftest_ren(a->auren_flags, ISDIR_DST)) {
24569+ /* is this updating defined in POSIX? */
24570+ au_cpup_attr_timesizes(a->dst_inode);
24571+ au_cpup_attr_nlink(dir, /*force*/1);
24572+ }
24573+ au_dir_ts(dir, a->btgt);
24574+ }
24575+
24576+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
24577+ return;
24578+
24579+ dir = a->src_dir;
24580+ inode_inc_iversion(dir);
24581+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC))
24582+ au_cpup_attr_nlink(dir, /*force*/1);
24583+ au_dir_ts(dir, a->btgt);
24584+}
24585+
24586+static void au_ren_refresh(struct au_ren_args *a)
24587+{
24588+ aufs_bindex_t bbot, bindex;
24589+ struct dentry *d, *h_d;
24590+ struct inode *i, *h_i;
24591+ struct super_block *sb;
24592+
24593+ d = a->dst_dentry;
24594+ d_drop(d);
24595+ if (a->h_dst)
24596+ /* already dget-ed by au_ren_or_cpup() */
24597+ au_set_h_dptr(d, a->btgt, a->h_dst);
24598+
24599+ i = a->dst_inode;
24600+ if (i) {
24601+ if (!a->exchange) {
24602+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST))
24603+ vfsub_drop_nlink(i);
24604+ else {
24605+ vfsub_dead_dir(i);
24606+ au_cpup_attr_timesizes(i);
24607+ }
24608+ au_update_dbrange(d, /*do_put_zero*/1);
24609+ } else
24610+ au_cpup_attr_nlink(i, /*force*/1);
24611+ } else {
24612+ bbot = a->btgt;
24613+ for (bindex = au_dbtop(d); bindex < bbot; bindex++)
24614+ au_set_h_dptr(d, bindex, NULL);
24615+ bbot = au_dbbot(d);
24616+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
24617+ au_set_h_dptr(d, bindex, NULL);
24618+ au_update_dbrange(d, /*do_put_zero*/0);
24619+ }
24620+
24621+ if (a->exchange
24622+ || au_ftest_ren(a->auren_flags, DIRREN)) {
24623+ d_drop(a->src_dentry);
24624+ if (au_ftest_ren(a->auren_flags, DIRREN))
24625+ au_set_dbwh(a->src_dentry, -1);
24626+ return;
24627+ }
24628+
24629+ d = a->src_dentry;
24630+ au_set_dbwh(d, -1);
24631+ bbot = au_dbbot(d);
24632+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
24633+ h_d = au_h_dptr(d, bindex);
24634+ if (h_d)
24635+ au_set_h_dptr(d, bindex, NULL);
24636+ }
24637+ au_set_dbbot(d, a->btgt);
24638+
24639+ sb = d->d_sb;
24640+ i = a->src_inode;
24641+ if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
24642+ return; /* success */
24643+
24644+ bbot = au_ibbot(i);
24645+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
24646+ h_i = au_h_iptr(i, bindex);
24647+ if (h_i) {
24648+ au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
24649+ /* ignore this error */
24650+ au_set_h_iptr(i, bindex, NULL, 0);
24651+ }
24652+ }
24653+ au_set_ibbot(i, a->btgt);
24654+}
24655+
24656+/* ---------------------------------------------------------------------- */
24657+
24658+/* mainly for link(2) and rename(2) */
24659+int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
24660+{
24661+ aufs_bindex_t bdiropq, bwh;
24662+ struct dentry *parent;
24663+ struct au_branch *br;
24664+
24665+ parent = dentry->d_parent;
24666+ IMustLock(d_inode(parent)); /* dir is locked */
24667+
24668+ bdiropq = au_dbdiropq(parent);
24669+ bwh = au_dbwh(dentry);
24670+ br = au_sbr(dentry->d_sb, btgt);
24671+ if (au_br_rdonly(br)
24672+ || (0 <= bdiropq && bdiropq < btgt)
24673+ || (0 <= bwh && bwh < btgt))
24674+ btgt = -1;
24675+
24676+ AuDbg("btgt %d\n", btgt);
24677+ return btgt;
24678+}
24679+
24680+/* sets src_btop, dst_btop and btgt */
24681+static int au_ren_wbr(struct au_ren_args *a)
24682+{
24683+ int err;
24684+ struct au_wr_dir_args wr_dir_args = {
24685+ /* .force_btgt = -1, */
24686+ .flags = AuWrDir_ADD_ENTRY
24687+ };
24688+
24689+ a->src_btop = au_dbtop(a->src_dentry);
24690+ a->dst_btop = au_dbtop(a->dst_dentry);
24691+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC)
24692+ || au_ftest_ren(a->auren_flags, ISDIR_DST))
24693+ au_fset_wrdir(wr_dir_args.flags, ISDIR);
24694+ wr_dir_args.force_btgt = a->src_btop;
24695+ if (a->dst_inode && a->dst_btop < a->src_btop)
24696+ wr_dir_args.force_btgt = a->dst_btop;
24697+ wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
24698+ err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
24699+ a->btgt = err;
24700+ if (a->exchange)
24701+ au_update_dbtop(a->dst_dentry);
24702+
24703+ return err;
24704+}
24705+
24706+static void au_ren_dt(struct au_ren_args *a)
24707+{
24708+ a->h_path.dentry = a->src_h_parent;
24709+ au_dtime_store(a->src_dt + AuPARENT, a->src_parent, &a->h_path);
24710+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR)) {
24711+ a->h_path.dentry = a->dst_h_parent;
24712+ au_dtime_store(a->dst_dt + AuPARENT, a->dst_parent, &a->h_path);
24713+ }
24714+
24715+ au_fclr_ren(a->auren_flags, DT_DSTDIR);
24716+ if (!au_ftest_ren(a->auren_flags, ISDIR_SRC)
24717+ && !a->exchange)
24718+ return;
24719+
24720+ a->h_path.dentry = a->src_h_dentry;
24721+ au_dtime_store(a->src_dt + AuCHILD, a->src_dentry, &a->h_path);
24722+ if (d_is_positive(a->dst_h_dentry)) {
24723+ au_fset_ren(a->auren_flags, DT_DSTDIR);
24724+ a->h_path.dentry = a->dst_h_dentry;
24725+ au_dtime_store(a->dst_dt + AuCHILD, a->dst_dentry, &a->h_path);
24726+ }
24727+}
24728+
24729+static void au_ren_rev_dt(int err, struct au_ren_args *a)
24730+{
24731+ struct dentry *h_d;
24732+ struct inode *h_inode;
24733+
24734+ au_dtime_revert(a->src_dt + AuPARENT);
24735+ if (!au_ftest_ren(a->auren_flags, ISSAMEDIR))
24736+ au_dtime_revert(a->dst_dt + AuPARENT);
24737+
24738+ if (au_ftest_ren(a->auren_flags, ISDIR_SRC) && err != -EIO) {
24739+ h_d = a->src_dt[AuCHILD].dt_h_path.dentry;
24740+ h_inode = d_inode(h_d);
24741+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
24742+ au_dtime_revert(a->src_dt + AuCHILD);
24743+ inode_unlock(h_inode);
24744+
24745+ if (au_ftest_ren(a->auren_flags, DT_DSTDIR)) {
24746+ h_d = a->dst_dt[AuCHILD].dt_h_path.dentry;
24747+ h_inode = d_inode(h_d);
24748+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
24749+ au_dtime_revert(a->dst_dt + AuCHILD);
24750+ inode_unlock(h_inode);
24751+ }
24752+ }
24753+}
24754+
24755+/* ---------------------------------------------------------------------- */
24756+
24757+int aufs_rename(struct user_namespace *userns,
24758+ struct inode *_src_dir, struct dentry *_src_dentry,
24759+ struct inode *_dst_dir, struct dentry *_dst_dentry,
24760+ unsigned int _flags)
24761+{
24762+ int err, lock_flags;
24763+ void *rev;
24764+ /* reduce stack space */
24765+ struct au_ren_args *a;
24766+ struct au_pin pin;
24767+
24768+ AuDbg("%pd, %pd, 0x%x\n", _src_dentry, _dst_dentry, _flags);
24769+ IMustLock(_src_dir);
24770+ IMustLock(_dst_dir);
24771+
24772+ err = -EINVAL;
24773+ if (unlikely(_flags & RENAME_WHITEOUT))
24774+ goto out;
24775+
24776+ err = -ENOMEM;
24777+ BUILD_BUG_ON(sizeof(*a) > PAGE_SIZE);
24778+ a = kzalloc(sizeof(*a), GFP_NOFS);
24779+ if (unlikely(!a))
24780+ goto out;
24781+
24782+ a->flags = _flags;
24783+ BUILD_BUG_ON(sizeof(a->exchange) == sizeof(u8)
24784+ && RENAME_EXCHANGE > U8_MAX);
24785+ a->exchange = _flags & RENAME_EXCHANGE;
24786+ a->src_dir = _src_dir;
24787+ a->src_dentry = _src_dentry;
24788+ a->src_inode = NULL;
24789+ if (d_really_is_positive(a->src_dentry))
24790+ a->src_inode = d_inode(a->src_dentry);
24791+ a->src_parent = a->src_dentry->d_parent; /* dir inode is locked */
24792+ a->dst_dir = _dst_dir;
24793+ a->dst_dentry = _dst_dentry;
24794+ a->dst_inode = NULL;
24795+ if (d_really_is_positive(a->dst_dentry))
24796+ a->dst_inode = d_inode(a->dst_dentry);
24797+ a->dst_parent = a->dst_dentry->d_parent; /* dir inode is locked */
24798+ if (a->dst_inode) {
24799+ /*
24800+ * if EXCHANGE && src is non-dir && dst is dir,
24801+ * dst is not locked.
24802+ */
24803+ /* IMustLock(a->dst_inode); */
24804+ au_igrab(a->dst_inode);
24805+ }
24806+
24807+ err = -ENOTDIR;
24808+ lock_flags = AuLock_FLUSH | AuLock_NOPLM | AuLock_GEN;
24809+ if (d_is_dir(a->src_dentry)) {
24810+ au_fset_ren(a->auren_flags, ISDIR_SRC);
24811+ if (unlikely(!a->exchange
24812+ && d_really_is_positive(a->dst_dentry)
24813+ && !d_is_dir(a->dst_dentry)))
24814+ goto out_free;
24815+ lock_flags |= AuLock_DIRS;
24816+ }
24817+ if (a->dst_inode && d_is_dir(a->dst_dentry)) {
24818+ au_fset_ren(a->auren_flags, ISDIR_DST);
24819+ if (unlikely(!a->exchange
24820+ && d_really_is_positive(a->src_dentry)
24821+ && !d_is_dir(a->src_dentry)))
24822+ goto out_free;
24823+ lock_flags |= AuLock_DIRS;
24824+ }
24825+ err = aufs_read_and_write_lock2(a->dst_dentry, a->src_dentry,
24826+ lock_flags);
24827+ if (unlikely(err))
24828+ goto out_free;
24829+
24830+ err = au_d_hashed_positive(a->src_dentry);
24831+ if (unlikely(err))
24832+ goto out_unlock;
24833+ err = -ENOENT;
24834+ if (a->dst_inode) {
24835+ /*
24836+ * If it is a dir, VFS unhash it before this
24837+ * function. It means we cannot rely upon d_unhashed().
24838+ */
24839+ if (unlikely(!a->dst_inode->i_nlink))
24840+ goto out_unlock;
24841+ if (!au_ftest_ren(a->auren_flags, ISDIR_DST)) {
24842+ err = au_d_hashed_positive(a->dst_dentry);
24843+ if (unlikely(err && !a->exchange))
24844+ goto out_unlock;
24845+ } else if (unlikely(IS_DEADDIR(a->dst_inode)))
24846+ goto out_unlock;
24847+ } else if (unlikely(d_unhashed(a->dst_dentry)))
24848+ goto out_unlock;
24849+
24850+ /*
24851+ * is it possible?
24852+ * yes, it happened (in linux-3.3-rcN) but I don't know why.
24853+ * there may exist a problem somewhere else.
24854+ */
24855+ err = -EINVAL;
24856+ if (unlikely(d_inode(a->dst_parent) == d_inode(a->src_dentry)))
24857+ goto out_unlock;
24858+
24859+ au_fset_ren(a->auren_flags, ISSAMEDIR); /* temporary */
24860+ di_write_lock_parent(a->dst_parent);
24861+
24862+ /* which branch we process */
24863+ err = au_ren_wbr(a);
24864+ if (unlikely(err < 0))
24865+ goto out_parent;
24866+ a->br = au_sbr(a->dst_dentry->d_sb, a->btgt);
24867+ a->h_path.mnt = au_br_mnt(a->br);
24868+
24869+ /* are they available to be renamed */
24870+ err = au_ren_may_dir(a);
24871+ if (unlikely(err))
24872+ goto out_children;
24873+
24874+ /* prepare the writable parent dir on the same branch */
24875+ if (a->dst_btop == a->btgt) {
24876+ au_fset_ren(a->auren_flags, WHDST);
24877+ } else {
24878+ err = au_cpup_dirs(a->dst_dentry, a->btgt);
24879+ if (unlikely(err))
24880+ goto out_children;
24881+ }
24882+
24883+ err = 0;
24884+ if (!a->exchange) {
24885+ if (a->src_dir != a->dst_dir) {
24886+ /*
24887+ * this temporary unlock is safe,
24888+ * because both dir->i_mutex are locked.
24889+ */
24890+ di_write_unlock(a->dst_parent);
24891+ di_write_lock_parent(a->src_parent);
24892+ err = au_wr_dir_need_wh(a->src_dentry,
24893+ au_ftest_ren(a->auren_flags,
24894+ ISDIR_SRC),
24895+ &a->btgt);
24896+ di_write_unlock(a->src_parent);
24897+ di_write_lock2_parent(a->src_parent, a->dst_parent,
24898+ /*isdir*/1);
24899+ au_fclr_ren(a->auren_flags, ISSAMEDIR);
24900+ } else
24901+ err = au_wr_dir_need_wh(a->src_dentry,
24902+ au_ftest_ren(a->auren_flags,
24903+ ISDIR_SRC),
24904+ &a->btgt);
24905+ }
24906+ if (unlikely(err < 0))
24907+ goto out_children;
24908+ if (err)
24909+ au_fset_ren(a->auren_flags, WHSRC);
24910+
24911+ /* cpup src */
24912+ if (a->src_btop != a->btgt) {
24913+ err = au_pin(&pin, a->src_dentry, a->btgt,
24914+ au_opt_udba(a->src_dentry->d_sb),
24915+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
24916+ if (!err) {
24917+ struct au_cp_generic cpg = {
24918+ .dentry = a->src_dentry,
24919+ .bdst = a->btgt,
24920+ .bsrc = a->src_btop,
24921+ .len = -1,
24922+ .pin = &pin,
24923+ .flags = AuCpup_DTIME | AuCpup_HOPEN
24924+ };
24925+ AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
24926+ err = au_sio_cpup_simple(&cpg);
24927+ au_unpin(&pin);
24928+ }
24929+ if (unlikely(err))
24930+ goto out_children;
24931+ a->src_btop = a->btgt;
24932+ a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
24933+ if (!a->exchange)
24934+ au_fset_ren(a->auren_flags, WHSRC);
24935+ }
24936+
24937+ /* cpup dst */
24938+ if (a->exchange && a->dst_inode
24939+ && a->dst_btop != a->btgt) {
24940+ err = au_pin(&pin, a->dst_dentry, a->btgt,
24941+ au_opt_udba(a->dst_dentry->d_sb),
24942+ AuPin_DI_LOCKED | AuPin_MNT_WRITE);
24943+ if (!err) {
24944+ struct au_cp_generic cpg = {
24945+ .dentry = a->dst_dentry,
24946+ .bdst = a->btgt,
24947+ .bsrc = a->dst_btop,
24948+ .len = -1,
24949+ .pin = &pin,
24950+ .flags = AuCpup_DTIME | AuCpup_HOPEN
24951+ };
24952+ err = au_sio_cpup_simple(&cpg);
24953+ au_unpin(&pin);
24954+ }
24955+ if (unlikely(err))
24956+ goto out_children;
24957+ a->dst_btop = a->btgt;
24958+ a->dst_h_dentry = au_h_dptr(a->dst_dentry, a->btgt);
24959+ }
24960+
24961+ /* lock them all */
24962+ err = au_ren_lock(a);
24963+ if (unlikely(err))
24964+ /* leave the copied-up one */
24965+ goto out_children;
24966+
24967+ if (!a->exchange) {
24968+ if (!au_opt_test(au_mntflags(a->dst_dir->i_sb), UDBA_NONE))
24969+ err = au_may_ren(a);
24970+ else if (unlikely(a->dst_dentry->d_name.len > AUFS_MAX_NAMELEN))
24971+ err = -ENAMETOOLONG;
24972+ if (unlikely(err))
24973+ goto out_hdir;
24974+ }
24975+
24976+ /* store timestamps to be revertible */
24977+ au_ren_dt(a);
24978+
24979+ /* store dirren info */
24980+ if (au_ftest_ren(a->auren_flags, DIRREN)) {
24981+ err = au_dr_rename(a->src_dentry, a->btgt,
24982+ &a->dst_dentry->d_name, &rev);
24983+ AuTraceErr(err);
24984+ if (unlikely(err))
24985+ goto out_dt;
24986+ }
24987+
24988+ /* here we go */
24989+ err = do_rename(a);
24990+ if (unlikely(err))
24991+ goto out_dirren;
24992+
24993+ if (au_ftest_ren(a->auren_flags, DIRREN))
24994+ au_dr_rename_fin(a->src_dentry, a->btgt, rev);
24995+
24996+ /* update dir attributes */
24997+ au_ren_refresh_dir(a);
24998+
24999+ /* dput/iput all lower dentries */
25000+ au_ren_refresh(a);
25001+
25002+ goto out_hdir; /* success */
25003+
25004+out_dirren:
25005+ if (au_ftest_ren(a->auren_flags, DIRREN))
25006+ au_dr_rename_rev(a->src_dentry, a->btgt, rev);
25007+out_dt:
25008+ au_ren_rev_dt(err, a);
25009+out_hdir:
25010+ au_ren_unlock(a);
25011+out_children:
25012+ au_nhash_wh_free(&a->whlist);
25013+ if (err && a->dst_inode && a->dst_btop != a->btgt) {
25014+ AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
25015+ au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
25016+ au_set_dbtop(a->dst_dentry, a->dst_btop);
25017+ }
25018+out_parent:
25019+ if (!err) {
25020+ if (d_unhashed(a->src_dentry))
25021+ au_fset_ren(a->auren_flags, DROPPED_SRC);
25022+ if (d_unhashed(a->dst_dentry))
25023+ au_fset_ren(a->auren_flags, DROPPED_DST);
25024+ if (!a->exchange)
25025+ d_move(a->src_dentry, a->dst_dentry);
25026+ else {
25027+ d_exchange(a->src_dentry, a->dst_dentry);
25028+ if (au_ftest_ren(a->auren_flags, DROPPED_DST))
25029+ d_drop(a->dst_dentry);
25030+ }
25031+ if (au_ftest_ren(a->auren_flags, DROPPED_SRC))
25032+ d_drop(a->src_dentry);
25033+ } else {
25034+ au_update_dbtop(a->dst_dentry);
25035+ if (!a->dst_inode)
25036+ d_drop(a->dst_dentry);
25037+ }
25038+ if (au_ftest_ren(a->auren_flags, ISSAMEDIR))
25039+ di_write_unlock(a->dst_parent);
25040+ else
25041+ di_write_unlock2(a->src_parent, a->dst_parent);
25042+out_unlock:
25043+ aufs_read_and_write_unlock2(a->dst_dentry, a->src_dentry);
25044+out_free:
25045+ iput(a->dst_inode);
25046+ if (a->thargs)
25047+ au_whtmp_rmdir_free(a->thargs);
25048+ au_kfree_rcu(a);
25049+out:
25050+ AuTraceErr(err);
25051+ return err;
25052+}
25053diff -urN /usr/share/empty/fs/aufs/Kconfig linux/fs/aufs/Kconfig
25054--- /usr/share/empty/fs/aufs/Kconfig 1970-01-01 01:00:00.000000000 +0100
25055+++ linux/fs/aufs/Kconfig 2022-11-05 23:02:18.959222617 +0100
25056@@ -0,0 +1,199 @@
25057+# SPDX-License-Identifier: GPL-2.0
25058+config AUFS_FS
25059+ tristate "Aufs (Advanced multi layered unification filesystem) support"
25060+ help
25061+ Aufs is a stackable unification filesystem such as Unionfs,
25062+ which unifies several directories and provides a merged single
25063+ directory.
25064+ In the early days, aufs was entirely re-designed and
25065+ re-implemented Unionfs Version 1.x series. Introducing many
25066+ original ideas, approaches and improvements, it becomes totally
25067+ different from Unionfs while keeping the basic features.
25068+
25069+if AUFS_FS
25070+choice
25071+ prompt "Maximum number of branches"
25072+ default AUFS_BRANCH_MAX_127
25073+ help
25074+ Specifies the maximum number of branches (or member directories)
25075+ in a single aufs. The larger value consumes more system
25076+ resources and has a minor impact to performance.
25077+config AUFS_BRANCH_MAX_127
25078+ bool "127"
25079+ help
25080+ Specifies the maximum number of branches (or member directories)
25081+ in a single aufs. The larger value consumes more system
25082+ resources and has a minor impact to performance.
25083+config AUFS_BRANCH_MAX_511
25084+ bool "511"
25085+ help
25086+ Specifies the maximum number of branches (or member directories)
25087+ in a single aufs. The larger value consumes more system
25088+ resources and has a minor impact to performance.
25089+config AUFS_BRANCH_MAX_1023
25090+ bool "1023"
25091+ help
25092+ Specifies the maximum number of branches (or member directories)
25093+ in a single aufs. The larger value consumes more system
25094+ resources and has a minor impact to performance.
25095+config AUFS_BRANCH_MAX_32767
25096+ bool "32767"
25097+ help
25098+ Specifies the maximum number of branches (or member directories)
25099+ in a single aufs. The larger value consumes more system
25100+ resources and has a minor impact to performance.
25101+endchoice
25102+
25103+config AUFS_SBILIST
25104+ bool
25105+ depends on AUFS_MAGIC_SYSRQ || PROC_FS
25106+ default y
25107+ help
25108+ Automatic configuration for internal use.
25109+ When aufs supports Magic SysRq or /proc, enabled automatically.
25110+
25111+config AUFS_HNOTIFY
25112+ bool "Detect direct branch access (bypassing aufs)"
25113+ help
25114+ If you want to modify files on branches directly, eg. bypassing aufs,
25115+ and want aufs to detect the changes of them fully, then enable this
25116+ option and use 'udba=notify' mount option.
25117+ Currently there is only one available configuration, "fsnotify".
25118+ It will have a negative impact to the performance.
25119+ See detail in aufs.5.
25120+
25121+choice
25122+ prompt "method" if AUFS_HNOTIFY
25123+ default AUFS_HFSNOTIFY
25124+config AUFS_HFSNOTIFY
25125+ bool "fsnotify"
25126+ select FSNOTIFY
25127+endchoice
25128+
25129+config AUFS_EXPORT
25130+ bool "NFS-exportable aufs"
25131+ depends on EXPORTFS
25132+ help
25133+ If you want to export your mounted aufs via NFS, then enable this
25134+ option. There are several requirements for this configuration.
25135+ See detail in aufs.5.
25136+
25137+config AUFS_INO_T_64
25138+ bool
25139+ depends on AUFS_EXPORT
25140+ depends on 64BIT && !(ALPHA || S390)
25141+ default y
25142+ help
25143+ Automatic configuration for internal use.
25144+ /* typedef unsigned long/int __kernel_ino_t */
25145+ /* alpha and s390x are int */
25146+
25147+config AUFS_XATTR
25148+ bool "support for XATTR/EA (including Security Labels)"
25149+ help
25150+ If your branch fs supports XATTR/EA and you want to make them
25151+ available in aufs too, then enable this opsion and specify the
25152+ branch attributes for EA.
25153+ See detail in aufs.5.
25154+
25155+config AUFS_FHSM
25156+ bool "File-based Hierarchical Storage Management"
25157+ help
25158+ Hierarchical Storage Management (or HSM) is a well-known feature
25159+ in the storage world. Aufs provides this feature as file-based.
25160+ with multiple branches.
25161+ These multiple branches are prioritized, ie. the topmost one
25162+ should be the fastest drive and be used heavily.
25163+
25164+config AUFS_RDU
25165+ bool "Readdir in userspace"
25166+ help
25167+ Aufs has two methods to provide a merged view for a directory,
25168+ by a user-space library and by kernel-space natively. The latter
25169+ is always enabled but sometimes large and slow.
25170+ If you enable this option, install the library in aufs2-util
25171+ package, and set some environment variables for your readdir(3),
25172+ then the work will be handled in user-space which generally
25173+ shows better performance in most cases.
25174+ See detail in aufs.5.
25175+
25176+config AUFS_DIRREN
25177+ bool "Workaround for rename(2)-ing a directory"
25178+ help
25179+ By default, aufs returns EXDEV error in renameing a dir who has
25180+ his child on the lower branch, since it is a bad idea to issue
25181+ rename(2) internally for every lower branch. But user may not
25182+ accept this behaviour. So here is a workaround to allow such
25183+ rename(2) and store some extra information on the writable
25184+ branch. Obviously this costs high (and I don't like it).
25185+ To use this feature, you need to enable this configuration AND
25186+ to specify the mount option `dirren.'
25187+ See details in aufs.5 and the design documents.
25188+
25189+config AUFS_SHWH
25190+ bool "Show whiteouts"
25191+ help
25192+ If you want to make the whiteouts in aufs visible, then enable
25193+ this option and specify 'shwh' mount option. Although it may
25194+ sounds like philosophy or something, but in technically it
25195+ simply shows the name of whiteout with keeping its behaviour.
25196+
25197+config AUFS_BR_RAMFS
25198+ bool "Ramfs (initramfs/rootfs) as an aufs branch"
25199+ help
25200+ If you want to use ramfs as an aufs branch fs, then enable this
25201+ option. Generally tmpfs is recommended.
25202+ Aufs prohibited them to be a branch fs by default, because
25203+ initramfs becomes unusable after switch_root or something
25204+ generally. If you sets initramfs as an aufs branch and boot your
25205+ system by switch_root, you will meet a problem easily since the
25206+ files in initramfs may be inaccessible.
25207+ Unless you are going to use ramfs as an aufs branch fs without
25208+ switch_root or something, leave it N.
25209+
25210+config AUFS_BR_FUSE
25211+ bool "Fuse fs as an aufs branch"
25212+ depends on FUSE_FS
25213+ select AUFS_POLL
25214+ help
25215+ If you want to use fuse-based userspace filesystem as an aufs
25216+ branch fs, then enable this option.
25217+ It implements the internal poll(2) operation which is
25218+ implemented by fuse only (curretnly).
25219+
25220+config AUFS_POLL
25221+ bool
25222+ help
25223+ Automatic configuration for internal use.
25224+
25225+config AUFS_BR_HFSPLUS
25226+ bool "Hfsplus as an aufs branch"
25227+ depends on HFSPLUS_FS
25228+ default y
25229+ help
25230+ If you want to use hfsplus fs as an aufs branch fs, then enable
25231+ this option. This option introduces a small overhead at
25232+ copying-up a file on hfsplus.
25233+
25234+config AUFS_BDEV_LOOP
25235+ bool
25236+ depends on BLK_DEV_LOOP
25237+ default y
25238+ help
25239+ Automatic configuration for internal use.
25240+ Convert =[ym] into =y.
25241+
25242+config AUFS_DEBUG
25243+ bool "Debug aufs"
25244+ help
25245+ Enable this to compile aufs internal debug code.
25246+ It will have a negative impact to the performance.
25247+
25248+config AUFS_MAGIC_SYSRQ
25249+ bool
25250+ depends on AUFS_DEBUG && MAGIC_SYSRQ
25251+ default y
25252+ help
25253+ Automatic configuration for internal use.
25254+ When aufs supports Magic SysRq, enabled automatically.
25255+endif
25256diff -urN /usr/share/empty/fs/aufs/lcnt.h linux/fs/aufs/lcnt.h
25257--- /usr/share/empty/fs/aufs/lcnt.h 1970-01-01 01:00:00.000000000 +0100
25258+++ linux/fs/aufs/lcnt.h 2022-11-05 23:02:18.965889284 +0100
25259@@ -0,0 +1,186 @@
25260+/* SPDX-License-Identifier: GPL-2.0 */
25261+/*
25262+ * Copyright (C) 2018-2022 Junjiro R. Okajima
25263+ *
25264+ * This program is free software; you can redistribute it and/or modify
25265+ * it under the terms of the GNU General Public License as published by
25266+ * the Free Software Foundation; either version 2 of the License, or
25267+ * (at your option) any later version.
25268+ *
25269+ * This program is distributed in the hope that it will be useful,
25270+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25271+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25272+ * GNU General Public License for more details.
25273+ *
25274+ * You should have received a copy of the GNU General Public License
25275+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25276+ */
25277+
25278+/*
25279+ * simple long counter wrapper
25280+ */
25281+
25282+#ifndef __AUFS_LCNT_H__
25283+#define __AUFS_LCNT_H__
25284+
25285+#ifdef __KERNEL__
25286+
25287+#include "debug.h"
25288+
25289+#define AuLCntATOMIC 1
25290+#define AuLCntPCPUCNT 2
25291+/*
25292+ * why does percpu_refcount require extra synchronize_rcu()s in
25293+ * au_br_do_free()
25294+ */
25295+#define AuLCntPCPUREF 3
25296+
25297+/* #define AuLCntChosen AuLCntATOMIC */
25298+#define AuLCntChosen AuLCntPCPUCNT
25299+/* #define AuLCntChosen AuLCntPCPUREF */
25300+
25301+#if AuLCntChosen == AuLCntATOMIC
25302+#include <linux/atomic.h>
25303+
25304+typedef atomic_long_t au_lcnt_t;
25305+
25306+static inline int au_lcnt_init(au_lcnt_t *cnt, void *release __maybe_unused)
25307+{
25308+ atomic_long_set(cnt, 0);
25309+ return 0;
25310+}
25311+
25312+static inline void au_lcnt_wait_for_fin(au_lcnt_t *cnt __maybe_unused)
25313+{
25314+ /* empty */
25315+}
25316+
25317+static inline void au_lcnt_fin(au_lcnt_t *cnt __maybe_unused,
25318+ int do_sync __maybe_unused)
25319+{
25320+ /* empty */
25321+}
25322+
25323+static inline void au_lcnt_inc(au_lcnt_t *cnt)
25324+{
25325+ atomic_long_inc(cnt);
25326+}
25327+
25328+static inline void au_lcnt_dec(au_lcnt_t *cnt)
25329+{
25330+ atomic_long_dec(cnt);
25331+}
25332+
25333+static inline long au_lcnt_read(au_lcnt_t *cnt, int do_rev __maybe_unused)
25334+{
25335+ return atomic_long_read(cnt);
25336+}
25337+#endif
25338+
25339+#if AuLCntChosen == AuLCntPCPUCNT
25340+#include <linux/percpu_counter.h>
25341+
25342+typedef struct percpu_counter au_lcnt_t;
25343+
25344+static inline int au_lcnt_init(au_lcnt_t *cnt, void *release __maybe_unused)
25345+{
25346+ return percpu_counter_init(cnt, 0, GFP_NOFS);
25347+}
25348+
25349+static inline void au_lcnt_wait_for_fin(au_lcnt_t *cnt __maybe_unused)
25350+{
25351+ /* empty */
25352+}
25353+
25354+static inline void au_lcnt_fin(au_lcnt_t *cnt, int do_sync __maybe_unused)
25355+{
25356+ percpu_counter_destroy(cnt);
25357+}
25358+
25359+static inline void au_lcnt_inc(au_lcnt_t *cnt)
25360+{
25361+ percpu_counter_inc(cnt);
25362+}
25363+
25364+static inline void au_lcnt_dec(au_lcnt_t *cnt)
25365+{
25366+ percpu_counter_dec(cnt);
25367+}
25368+
25369+static inline long au_lcnt_read(au_lcnt_t *cnt, int do_rev __maybe_unused)
25370+{
25371+ s64 n;
25372+
25373+ n = percpu_counter_sum(cnt);
25374+ BUG_ON(n < 0);
25375+ if (LONG_MAX != LLONG_MAX
25376+ && n > LONG_MAX)
25377+ AuWarn1("%s\n", "wrap-around");
25378+
25379+ return n;
25380+}
25381+#endif
25382+
25383+#if AuLCntChosen == AuLCntPCPUREF
25384+#include <linux/percpu-refcount.h>
25385+
25386+typedef struct percpu_ref au_lcnt_t;
25387+
25388+static inline int au_lcnt_init(au_lcnt_t *cnt, percpu_ref_func_t *release)
25389+{
25390+ if (!release)
25391+ release = percpu_ref_exit;
25392+ return percpu_ref_init(cnt, release, /*percpu mode*/0, GFP_NOFS);
25393+}
25394+
25395+static inline void au_lcnt_wait_for_fin(au_lcnt_t *cnt __maybe_unused)
25396+{
25397+ synchronize_rcu();
25398+}
25399+
25400+static inline void au_lcnt_fin(au_lcnt_t *cnt, int do_sync)
25401+{
25402+ percpu_ref_kill(cnt);
25403+ if (do_sync)
25404+ au_lcnt_wait_for_fin(cnt);
25405+}
25406+
25407+static inline void au_lcnt_inc(au_lcnt_t *cnt)
25408+{
25409+ percpu_ref_get(cnt);
25410+}
25411+
25412+static inline void au_lcnt_dec(au_lcnt_t *cnt)
25413+{
25414+ percpu_ref_put(cnt);
25415+}
25416+
25417+/*
25418+ * avoid calling this func as possible.
25419+ */
25420+static inline long au_lcnt_read(au_lcnt_t *cnt, int do_rev)
25421+{
25422+ long l;
25423+
25424+ percpu_ref_switch_to_atomic_sync(cnt);
25425+ l = atomic_long_read(&cnt->count);
25426+ if (do_rev)
25427+ percpu_ref_switch_to_percpu(cnt);
25428+
25429+ /* percpu_ref is initialized by 1 instead of 0 */
25430+ return l - 1;
25431+}
25432+#endif
25433+
25434+#ifdef CONFIG_AUFS_DEBUG
25435+#define AuLCntZero(val) do { \
25436+ long l = val; \
25437+ if (l) \
25438+ AuDbg("%s = %ld\n", #val, l); \
25439+} while (0)
25440+#else
25441+#define AuLCntZero(val) do {} while (0)
25442+#endif
25443+
25444+#endif /* __KERNEL__ */
25445+#endif /* __AUFS_LCNT_H__ */
25446diff -urN /usr/share/empty/fs/aufs/loop.c linux/fs/aufs/loop.c
25447--- /usr/share/empty/fs/aufs/loop.c 1970-01-01 01:00:00.000000000 +0100
25448+++ linux/fs/aufs/loop.c 2022-11-05 23:02:18.965889284 +0100
25449@@ -0,0 +1,148 @@
25450+// SPDX-License-Identifier: GPL-2.0
25451+/*
25452+ * Copyright (C) 2005-2022 Junjiro R. Okajima
25453+ *
25454+ * This program is free software; you can redistribute it and/or modify
25455+ * it under the terms of the GNU General Public License as published by
25456+ * the Free Software Foundation; either version 2 of the License, or
25457+ * (at your option) any later version.
25458+ *
25459+ * This program is distributed in the hope that it will be useful,
25460+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25461+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25462+ * GNU General Public License for more details.
25463+ *
25464+ * You should have received a copy of the GNU General Public License
25465+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25466+ */
25467+
25468+/*
25469+ * support for loopback block device as a branch
25470+ */
25471+
25472+#include "aufs.h"
25473+
25474+/* added into drivers/block/loop.c */
25475+static struct file *(*backing_file_func)(struct super_block *sb);
25476+
25477+/*
25478+ * test if two lower dentries have overlapping branches.
25479+ */
25480+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
25481+{
25482+ struct super_block *h_sb;
25483+ struct file *backing_file;
25484+
25485+ if (unlikely(!backing_file_func)) {
25486+ /* don't load "loop" module here */
25487+ backing_file_func = symbol_get(loop_backing_file);
25488+ if (unlikely(!backing_file_func))
25489+ /* "loop" module is not loaded */
25490+ return 0;
25491+ }
25492+
25493+ h_sb = h_adding->d_sb;
25494+ backing_file = backing_file_func(h_sb);
25495+ if (!backing_file)
25496+ return 0;
25497+
25498+ h_adding = backing_file->f_path.dentry;
25499+ /*
25500+ * h_adding can be local NFS.
25501+ * in this case aufs cannot detect the loop.
25502+ */
25503+ if (unlikely(h_adding->d_sb == sb))
25504+ return 1;
25505+ return !!au_test_subdir(h_adding, sb->s_root);
25506+}
25507+
25508+/* true if a kernel thread named 'loop[0-9].*' accesses a file */
25509+int au_test_loopback_kthread(void)
25510+{
25511+ int ret;
25512+ struct task_struct *tsk = current;
25513+ char c, comm[sizeof(tsk->comm)];
25514+
25515+ ret = 0;
25516+ if (tsk->flags & PF_KTHREAD) {
25517+ get_task_comm(comm, tsk);
25518+ c = comm[4];
25519+ ret = ('0' <= c && c <= '9'
25520+ && !strncmp(comm, "loop", 4));
25521+ }
25522+
25523+ return ret;
25524+}
25525+
25526+/* ---------------------------------------------------------------------- */
25527+
25528+#define au_warn_loopback_step 16
25529+static int au_warn_loopback_nelem = au_warn_loopback_step;
25530+static unsigned long *au_warn_loopback_array;
25531+
25532+void au_warn_loopback(struct super_block *h_sb)
25533+{
25534+ int i, new_nelem;
25535+ unsigned long *a, magic;
25536+ static DEFINE_SPINLOCK(spin);
25537+
25538+ magic = h_sb->s_magic;
25539+ spin_lock(&spin);
25540+ a = au_warn_loopback_array;
25541+ for (i = 0; i < au_warn_loopback_nelem && *a; i++)
25542+ if (a[i] == magic) {
25543+ spin_unlock(&spin);
25544+ return;
25545+ }
25546+
25547+ /* h_sb is new to us, print it */
25548+ if (i < au_warn_loopback_nelem) {
25549+ a[i] = magic;
25550+ goto pr;
25551+ }
25552+
25553+ /* expand the array */
25554+ new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
25555+ a = au_kzrealloc(au_warn_loopback_array,
25556+ au_warn_loopback_nelem * sizeof(unsigned long),
25557+ new_nelem * sizeof(unsigned long), GFP_ATOMIC,
25558+ /*may_shrink*/0);
25559+ if (a) {
25560+ au_warn_loopback_nelem = new_nelem;
25561+ au_warn_loopback_array = a;
25562+ a[i] = magic;
25563+ goto pr;
25564+ }
25565+
25566+ spin_unlock(&spin);
25567+ AuWarn1("realloc failed, ignored\n");
25568+ return;
25569+
25570+pr:
25571+ spin_unlock(&spin);
25572+ pr_warn("you may want to try another patch for loopback file "
25573+ "on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
25574+}
25575+
25576+int au_loopback_init(void)
25577+{
25578+ int err;
25579+ struct super_block *sb __maybe_unused;
25580+
25581+ BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(*au_warn_loopback_array));
25582+
25583+ err = 0;
25584+ au_warn_loopback_array = kcalloc(au_warn_loopback_step,
25585+ sizeof(unsigned long), GFP_NOFS);
25586+ if (unlikely(!au_warn_loopback_array))
25587+ err = -ENOMEM;
25588+
25589+ return err;
25590+}
25591+
25592+void au_loopback_fin(void)
25593+{
25594+ if (backing_file_func)
25595+ symbol_put(loop_backing_file);
25596+ au_kfree_try_rcu(au_warn_loopback_array);
25597+}
25598diff -urN /usr/share/empty/fs/aufs/loop.h linux/fs/aufs/loop.h
25599--- /usr/share/empty/fs/aufs/loop.h 1970-01-01 01:00:00.000000000 +0100
25600+++ linux/fs/aufs/loop.h 2022-11-05 23:02:18.965889284 +0100
25601@@ -0,0 +1,55 @@
25602+/* SPDX-License-Identifier: GPL-2.0 */
25603+/*
25604+ * Copyright (C) 2005-2022 Junjiro R. Okajima
25605+ *
25606+ * This program is free software; you can redistribute it and/or modify
25607+ * it under the terms of the GNU General Public License as published by
25608+ * the Free Software Foundation; either version 2 of the License, or
25609+ * (at your option) any later version.
25610+ *
25611+ * This program is distributed in the hope that it will be useful,
25612+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25613+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25614+ * GNU General Public License for more details.
25615+ *
25616+ * You should have received a copy of the GNU General Public License
25617+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25618+ */
25619+
25620+/*
25621+ * support for loopback mount as a branch
25622+ */
25623+
25624+#ifndef __AUFS_LOOP_H__
25625+#define __AUFS_LOOP_H__
25626+
25627+#ifdef __KERNEL__
25628+
25629+struct dentry;
25630+struct super_block;
25631+
25632+#ifdef CONFIG_AUFS_BDEV_LOOP
25633+/* drivers/block/loop.c */
25634+struct file *loop_backing_file(struct super_block *sb);
25635+
25636+/* loop.c */
25637+int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
25638+int au_test_loopback_kthread(void);
25639+void au_warn_loopback(struct super_block *h_sb);
25640+
25641+int au_loopback_init(void);
25642+void au_loopback_fin(void);
25643+#else
25644+AuStub(struct file *, loop_backing_file, return NULL, struct super_block *sb)
25645+
25646+AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
25647+ struct dentry *h_adding)
25648+AuStubInt0(au_test_loopback_kthread, void)
25649+AuStubVoid(au_warn_loopback, struct super_block *h_sb)
25650+
25651+AuStubInt0(au_loopback_init, void)
25652+AuStubVoid(au_loopback_fin, void)
25653+#endif /* BLK_DEV_LOOP */
25654+
25655+#endif /* __KERNEL__ */
25656+#endif /* __AUFS_LOOP_H__ */
25657diff -urN /usr/share/empty/fs/aufs/magic.mk linux/fs/aufs/magic.mk
25658--- /usr/share/empty/fs/aufs/magic.mk 1970-01-01 01:00:00.000000000 +0100
25659+++ linux/fs/aufs/magic.mk 2022-11-05 23:02:18.965889284 +0100
25660@@ -0,0 +1,31 @@
25661+# SPDX-License-Identifier: GPL-2.0
25662+
25663+# defined in ${srctree}/fs/fuse/inode.c
25664+# tristate
25665+ifdef CONFIG_FUSE_FS
25666+ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
25667+endif
25668+
25669+# defined in ${srctree}/fs/xfs/xfs_sb.h
25670+# tristate
25671+ifdef CONFIG_XFS_FS
25672+ccflags-y += -DXFS_SB_MAGIC=0x58465342
25673+endif
25674+
25675+# defined in ${srctree}/fs/configfs/mount.c
25676+# tristate
25677+ifdef CONFIG_CONFIGFS_FS
25678+ccflags-y += -DCONFIGFS_MAGIC=0x62656570
25679+endif
25680+
25681+# defined in ${srctree}/fs/ubifs/ubifs.h
25682+# tristate
25683+ifdef CONFIG_UBIFS_FS
25684+ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
25685+endif
25686+
25687+# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
25688+# tristate
25689+ifdef CONFIG_HFSPLUS_FS
25690+ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
25691+endif
25692diff -urN /usr/share/empty/fs/aufs/Makefile linux/fs/aufs/Makefile
25693--- /usr/share/empty/fs/aufs/Makefile 1970-01-01 01:00:00.000000000 +0100
25694+++ linux/fs/aufs/Makefile 2022-11-05 23:02:18.959222617 +0100
25695@@ -0,0 +1,46 @@
25696+# SPDX-License-Identifier: GPL-2.0
25697+
25698+include ${src}/magic.mk
25699+ifeq (${CONFIG_AUFS_FS},m)
25700+include ${src}/conf.mk
25701+endif
25702+-include ${src}/priv_def.mk
25703+
25704+# cf. include/linux/kernel.h
25705+# enable pr_debug
25706+ccflags-y += -DDEBUG
25707+# sparse requires the full pathname
25708+ifdef M
25709+ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
25710+else
25711+ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
25712+endif
25713+
25714+obj-$(CONFIG_AUFS_FS) += aufs.o
25715+aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o fsctx.o \
25716+ wkq.o vfsub.o dcsub.o \
25717+ cpup.o whout.o wbr_policy.o \
25718+ dinfo.o dentry.o \
25719+ dynop.o \
25720+ finfo.o file.o f_op.o \
25721+ dir.o vdir.o \
25722+ iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
25723+ mvdown.o ioctl.o
25724+
25725+# all are boolean
25726+aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
25727+aufs-$(CONFIG_SYSFS) += sysfs.o
25728+aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
25729+aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
25730+aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
25731+aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
25732+aufs-$(CONFIG_AUFS_EXPORT) += export.o
25733+aufs-$(CONFIG_AUFS_XATTR) += xattr.o
25734+aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
25735+aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
25736+aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
25737+aufs-$(CONFIG_AUFS_POLL) += poll.o
25738+aufs-$(CONFIG_AUFS_RDU) += rdu.o
25739+aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
25740+aufs-$(CONFIG_AUFS_DEBUG) += debug.o
25741+aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o
25742diff -urN /usr/share/empty/fs/aufs/module.c linux/fs/aufs/module.c
25743--- /usr/share/empty/fs/aufs/module.c 1970-01-01 01:00:00.000000000 +0100
25744+++ linux/fs/aufs/module.c 2022-11-05 23:02:18.965889284 +0100
25745@@ -0,0 +1,273 @@
25746+// SPDX-License-Identifier: GPL-2.0
25747+/*
25748+ * Copyright (C) 2005-2022 Junjiro R. Okajima
25749+ *
25750+ * This program is free software; you can redistribute it and/or modify
25751+ * it under the terms of the GNU General Public License as published by
25752+ * the Free Software Foundation; either version 2 of the License, or
25753+ * (at your option) any later version.
25754+ *
25755+ * This program is distributed in the hope that it will be useful,
25756+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
25757+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25758+ * GNU General Public License for more details.
25759+ *
25760+ * You should have received a copy of the GNU General Public License
25761+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
25762+ */
25763+
25764+/*
25765+ * module global variables and operations
25766+ */
25767+
25768+#include <linux/module.h>
25769+#include <linux/seq_file.h>
25770+#include "aufs.h"
25771+
25772+/* shrinkable realloc */
25773+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
25774+{
25775+ size_t sz;
25776+ int diff;
25777+
25778+ sz = 0;
25779+ diff = -1;
25780+ if (p) {
25781+#if 0 /* unused */
25782+ if (!new_sz) {
25783+ au_kfree_rcu(p);
25784+ p = NULL;
25785+ goto out;
25786+ }
25787+#else
25788+ AuDebugOn(!new_sz);
25789+#endif
25790+ sz = ksize(p);
25791+ diff = au_kmidx_sub(sz, new_sz);
25792+ }
25793+ if (sz && !diff)
25794+ goto out;
25795+
25796+ if (sz < new_sz)
25797+ /* expand or SLOB */
25798+ p = krealloc(p, new_sz, gfp);
25799+ else if (new_sz < sz && may_shrink) {
25800+ /* shrink */
25801+ void *q;
25802+
25803+ q = kmalloc(new_sz, gfp);
25804+ if (q) {
25805+ if (p) {
25806+ memcpy(q, p, new_sz);
25807+ au_kfree_try_rcu(p);
25808+ }
25809+ p = q;
25810+ } else
25811+ p = NULL;
25812+ }
25813+
25814+out:
25815+ return p;
25816+}
25817+
25818+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
25819+ int may_shrink)
25820+{
25821+ p = au_krealloc(p, new_sz, gfp, may_shrink);
25822+ if (p && new_sz > nused)
25823+ memset(p + nused, 0, new_sz - nused);
25824+ return p;
25825+}
25826+
25827+/* ---------------------------------------------------------------------- */
25828+/*
25829+ * aufs caches
25830+ */
25831+struct kmem_cache *au_cache[AuCache_Last];
25832+
25833+static void au_cache_fin(void)
25834+{
25835+ int i;
25836+
25837+ /*
25838+ * Make sure all delayed rcu free inodes are flushed before we
25839+ * destroy cache.
25840+ */
25841+ rcu_barrier();
25842+
25843+ /* excluding AuCache_HNOTIFY */
25844+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
25845+ for (i = 0; i < AuCache_HNOTIFY; i++) {
25846+ kmem_cache_destroy(au_cache[i]);
25847+ au_cache[i] = NULL;
25848+ }
25849+}
25850+
25851+static int __init au_cache_init(void)
25852+{
25853+ au_cache[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
25854+ if (au_cache[AuCache_DINFO])
25855+ /* SLAB_DESTROY_BY_RCU */
25856+ au_cache[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
25857+ au_icntnr_init_once);
25858+ if (au_cache[AuCache_ICNTNR])
25859+ au_cache[AuCache_FINFO] = AuCacheCtor(au_finfo,
25860+ au_fi_init_once);
25861+ if (au_cache[AuCache_FINFO])
25862+ au_cache[AuCache_VDIR] = AuCache(au_vdir);
25863+ if (au_cache[AuCache_VDIR])
25864+ au_cache[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
25865+ if (au_cache[AuCache_DEHSTR])
25866+ return 0;
25867+
25868+ au_cache_fin();
25869+ return -ENOMEM;
25870+}
25871+
25872+/* ---------------------------------------------------------------------- */
25873+
25874+int au_dir_roflags;
25875+
25876+#ifdef CONFIG_AUFS_SBILIST
25877+/*
25878+ * iterate_supers_type() doesn't protect us from
25879+ * remounting (branch management)
25880+ */
25881+struct hlist_bl_head au_sbilist;
25882+#endif
25883+
25884+/*
25885+ * functions for module interface.
25886+ */
25887+MODULE_LICENSE("GPL");
25888+/* MODULE_LICENSE("GPL v2"); */
25889+MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
25890+MODULE_DESCRIPTION(AUFS_NAME
25891+ " -- Advanced multi layered unification filesystem");
25892+MODULE_VERSION(AUFS_VERSION);
25893+MODULE_ALIAS_FS(AUFS_NAME);
25894+
25895+/* this module parameter has no meaning when SYSFS is disabled */
25896+int sysaufs_brs = 1;
25897+MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
25898+module_param_named(brs, sysaufs_brs, int, 0444);
25899+
25900+/* this module parameter has no meaning when USER_NS is disabled */
25901+bool au_userns;
25902+MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
25903+module_param_named(allow_userns, au_userns, bool, 0444);
25904+
25905+/* ---------------------------------------------------------------------- */
25906+
25907+static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
25908+
25909+int au_seq_path(struct seq_file *seq, struct path *path)
25910+{
25911+ int err;
25912+
25913+ err = seq_path(seq, path, au_esc_chars);
25914+ if (err >= 0)
25915+ err = 0;
25916+ else
25917+ err = -ENOMEM;
25918+
25919+ return err;
25920+}
25921+
25922+/* ---------------------------------------------------------------------- */
25923+
25924+static int __init aufs_init(void)
25925+{
25926+ int err, i;
25927+ char *p;
25928+
25929+ p = au_esc_chars;
25930+ for (i = 1; i <= ' '; i++)
25931+ *p++ = i;
25932+ *p++ = '\\';
25933+ *p++ = '\x7f';
25934+ *p = 0;
25935+
25936+ au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
25937+
25938+ memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
25939+ for (i = 0; i < AuIop_Last; i++)
25940+ aufs_iop_nogetattr[i].getattr = NULL;
25941+
25942+ memset(au_cache, 0, sizeof(au_cache)); /* including hnotify */
25943+
25944+ au_sbilist_init();
25945+ sysaufs_brs_init();
25946+ au_debug_init();
25947+ au_dy_init();
25948+ err = sysaufs_init();
25949+ if (unlikely(err))
25950+ goto out;
25951+ err = dbgaufs_init();
25952+ if (unlikely(err))
25953+ goto out_sysaufs;
25954+ err = au_procfs_init();
25955+ if (unlikely(err))
25956+ goto out_dbgaufs;
25957+ err = au_wkq_init();
25958+ if (unlikely(err))
25959+ goto out_procfs;
25960+ err = au_loopback_init();
25961+ if (unlikely(err))
25962+ goto out_wkq;
25963+ err = au_hnotify_init();
25964+ if (unlikely(err))
25965+ goto out_loopback;
25966+ err = au_sysrq_init();
25967+ if (unlikely(err))
25968+ goto out_hin;
25969+ err = au_cache_init();
25970+ if (unlikely(err))
25971+ goto out_sysrq;
25972+
25973+ aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
25974+ err = register_filesystem(&aufs_fs_type);
25975+ if (unlikely(err))
25976+ goto out_cache;
25977+
25978+ /* since we define pr_fmt, call printk directly */
25979+ printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
25980+ goto out; /* success */
25981+
25982+out_cache:
25983+ au_cache_fin();
25984+out_sysrq:
25985+ au_sysrq_fin();
25986+out_hin:
25987+ au_hnotify_fin();
25988+out_loopback:
25989+ au_loopback_fin();
25990+out_wkq:
25991+ au_wkq_fin();
25992+out_procfs:
25993+ au_procfs_fin();
25994+out_dbgaufs:
25995+ dbgaufs_fin();
25996+out_sysaufs:
25997+ sysaufs_fin();
25998+ au_dy_fin();
25999+out:
26000+ return err;
26001+}
26002+
26003+static void __exit aufs_exit(void)
26004+{
26005+ unregister_filesystem(&aufs_fs_type);
26006+ au_cache_fin();
26007+ au_sysrq_fin();
26008+ au_hnotify_fin();
26009+ au_loopback_fin();
26010+ au_wkq_fin();
26011+ au_procfs_fin();
26012+ dbgaufs_fin();
26013+ sysaufs_fin();
26014+ au_dy_fin();
26015+}
26016+
26017+module_init(aufs_init);
26018+module_exit(aufs_exit);
26019diff -urN /usr/share/empty/fs/aufs/module.h linux/fs/aufs/module.h
26020--- /usr/share/empty/fs/aufs/module.h 1970-01-01 01:00:00.000000000 +0100
26021+++ linux/fs/aufs/module.h 2022-11-05 23:02:18.969222617 +0100
26022@@ -0,0 +1,180 @@
26023+/* SPDX-License-Identifier: GPL-2.0 */
26024+/*
26025+ * Copyright (C) 2005-2022 Junjiro R. Okajima
26026+ *
26027+ * This program is free software; you can redistribute it and/or modify
26028+ * it under the terms of the GNU General Public License as published by
26029+ * the Free Software Foundation; either version 2 of the License, or
26030+ * (at your option) any later version.
26031+ *
26032+ * This program is distributed in the hope that it will be useful,
26033+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26034+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26035+ * GNU General Public License for more details.
26036+ *
26037+ * You should have received a copy of the GNU General Public License
26038+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26039+ */
26040+
26041+/*
26042+ * module initialization and module-global
26043+ */
26044+
26045+#ifndef __AUFS_MODULE_H__
26046+#define __AUFS_MODULE_H__
26047+
26048+#ifdef __KERNEL__
26049+
26050+#include <linux/slab.h>
26051+#include "debug.h"
26052+#include "dentry.h"
26053+#include "dir.h"
26054+#include "file.h"
26055+#include "inode.h"
26056+
26057+struct path;
26058+struct seq_file;
26059+
26060+/* module parameters */
26061+extern int sysaufs_brs;
26062+extern bool au_userns;
26063+
26064+/* ---------------------------------------------------------------------- */
26065+
26066+extern int au_dir_roflags;
26067+
26068+void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
26069+void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
26070+ int may_shrink);
26071+
26072+/*
26073+ * Comparing the size of the object with sizeof(struct rcu_head)
26074+ * case 1: object is always larger
26075+ * --> au_kfree_rcu() or au_kfree_do_rcu()
26076+ * case 2: object is always smaller
26077+ * --> au_kfree_small()
26078+ * case 3: object can be any size
26079+ * --> au_kfree_try_rcu()
26080+ */
26081+
26082+static inline void au_kfree_do_rcu(const void *p)
26083+{
26084+ struct {
26085+ struct rcu_head rcu;
26086+ } *a = (void *)p;
26087+
26088+ kfree_rcu(a, rcu);
26089+}
26090+
26091+#define au_kfree_rcu(_p) do { \
26092+ typeof(_p) p = (_p); \
26093+ BUILD_BUG_ON(sizeof(*p) < sizeof(struct rcu_head)); \
26094+ if (p) \
26095+ au_kfree_do_rcu(p); \
26096+ } while (0)
26097+
26098+#define au_kfree_do_sz_test(sz) (sz >= sizeof(struct rcu_head))
26099+#define au_kfree_sz_test(p) (p && au_kfree_do_sz_test(ksize(p)))
26100+
26101+static inline void au_kfree_try_rcu(const void *p)
26102+{
26103+ if (!p)
26104+ return;
26105+ if (au_kfree_sz_test(p))
26106+ au_kfree_do_rcu(p);
26107+ else
26108+ kfree(p);
26109+}
26110+
26111+static inline void au_kfree_small(const void *p)
26112+{
26113+ if (!p)
26114+ return;
26115+ AuDebugOn(au_kfree_sz_test(p));
26116+ kfree(p);
26117+}
26118+
26119+static inline int au_kmidx_sub(size_t sz, size_t new_sz)
26120+{
26121+#ifndef CONFIG_SLOB
26122+ return __kmalloc_index(sz, false) - __kmalloc_index(new_sz, false);
26123+#else
26124+ return -1; /* SLOB is untested */
26125+#endif
26126+}
26127+
26128+int au_seq_path(struct seq_file *seq, struct path *path);
26129+
26130+#ifdef CONFIG_PROC_FS
26131+/* procfs.c */
26132+int __init au_procfs_init(void);
26133+void au_procfs_fin(void);
26134+#else
26135+AuStubInt0(au_procfs_init, void);
26136+AuStubVoid(au_procfs_fin, void);
26137+#endif
26138+
26139+/* ---------------------------------------------------------------------- */
26140+
26141+/* kmem cache */
26142+enum {
26143+ AuCache_DINFO,
26144+ AuCache_ICNTNR,
26145+ AuCache_FINFO,
26146+ AuCache_VDIR,
26147+ AuCache_DEHSTR,
26148+ AuCache_HNOTIFY, /* must be last */
26149+ AuCache_Last
26150+};
26151+
26152+extern struct kmem_cache *au_cache[AuCache_Last];
26153+
26154+#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
26155+#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
26156+#define AuCacheCtor(type, ctor) \
26157+ kmem_cache_create(#type, sizeof(struct type), \
26158+ __alignof__(struct type), AuCacheFlags, ctor)
26159+
26160+#define AuCacheFuncAlloc(name, index) \
26161+ static inline struct au_##name *au_cache_alloc_##name(void) \
26162+ { return kmem_cache_alloc(au_cache[AuCache_##index], GFP_NOFS); }
26163+
26164+#define AuCacheFuncs(name, index) \
26165+ static inline void au_cache_free_##name##_norcu(struct au_##name *p) \
26166+ { kmem_cache_free(au_cache[AuCache_##index], p); } \
26167+ \
26168+ static inline void au_cache_free_##name##_rcu_cb(struct rcu_head *rcu) \
26169+ { void *p = rcu; \
26170+ p -= offsetof(struct au_##name, rcu); \
26171+ kmem_cache_free(au_cache[AuCache_##index], p); } \
26172+ static inline void au_cache_free_##name##_rcu(struct au_##name *p) \
26173+ { BUILD_BUG_ON(sizeof(struct au_##name) < sizeof(struct rcu_head)); \
26174+ call_rcu(&p->rcu, au_cache_free_##name##_rcu_cb); } \
26175+ \
26176+ static inline void au_cache_free_##name(struct au_##name *p) \
26177+ { /* au_cache_free_##name##_norcu(p); */ \
26178+ au_cache_free_##name##_rcu(p); }
26179+
26180+AuCacheFuncs(dinfo, DINFO);
26181+AuCacheFuncAlloc(dinfo, DINFO);
26182+
26183+AuCacheFuncs(icntnr, ICNTNR);
26184+static inline struct au_icntnr *au_cache_alloc_icntnr(struct super_block *sb)
26185+{ return alloc_inode_sb(sb, au_cache[AuCache_ICNTNR], GFP_NOFS); }
26186+
26187+AuCacheFuncs(finfo, FINFO);
26188+AuCacheFuncAlloc(finfo, FINFO);
26189+
26190+AuCacheFuncs(vdir, VDIR);
26191+AuCacheFuncAlloc(vdir, VDIR);
26192+
26193+AuCacheFuncs(vdir_dehstr, DEHSTR);
26194+AuCacheFuncAlloc(vdir_dehstr, DEHSTR);
26195+
26196+#ifdef CONFIG_AUFS_HNOTIFY
26197+AuCacheFuncs(hnotify, HNOTIFY);
26198+AuCacheFuncAlloc(hnotify, HNOTIFY);
26199+#endif
26200+
26201+#endif /* __KERNEL__ */
26202+#endif /* __AUFS_MODULE_H__ */
26203diff -urN /usr/share/empty/fs/aufs/mvdown.c linux/fs/aufs/mvdown.c
26204--- /usr/share/empty/fs/aufs/mvdown.c 1970-01-01 01:00:00.000000000 +0100
26205+++ linux/fs/aufs/mvdown.c 2022-11-05 23:02:18.969222617 +0100
26206@@ -0,0 +1,706 @@
26207+// SPDX-License-Identifier: GPL-2.0
26208+/*
26209+ * Copyright (C) 2011-2022 Junjiro R. Okajima
26210+ *
26211+ * This program is free software; you can redistribute it and/or modify
26212+ * it under the terms of the GNU General Public License as published by
26213+ * the Free Software Foundation; either version 2 of the License, or
26214+ * (at your option) any later version.
26215+ *
26216+ * This program is distributed in the hope that it will be useful,
26217+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26218+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26219+ * GNU General Public License for more details.
26220+ *
26221+ * You should have received a copy of the GNU General Public License
26222+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26223+ */
26224+
26225+/*
26226+ * move-down, opposite of copy-up
26227+ */
26228+
26229+#include "aufs.h"
26230+
26231+struct au_mvd_args {
26232+ struct {
26233+ struct super_block *h_sb;
26234+ struct dentry *h_parent;
26235+ struct au_hinode *hdir;
26236+ struct inode *h_dir, *h_inode;
26237+ struct au_pin pin;
26238+ } info[AUFS_MVDOWN_NARRAY];
26239+
26240+ struct aufs_mvdown mvdown;
26241+ struct dentry *dentry, *parent;
26242+ struct inode *inode, *dir;
26243+ struct super_block *sb;
26244+ aufs_bindex_t bopq, bwh, bfound;
26245+ unsigned char rename_lock;
26246+};
26247+
26248+#define mvd_errno mvdown.au_errno
26249+#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
26250+#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
26251+#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
26252+#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
26253+
26254+#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
26255+#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
26256+#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
26257+#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
26258+#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
26259+#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
26260+
26261+#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
26262+#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
26263+#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
26264+#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
26265+#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
26266+#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
26267+
26268+#define AU_MVD_PR(flag, ...) do { \
26269+ if (flag) \
26270+ pr_err(__VA_ARGS__); \
26271+ } while (0)
26272+
26273+static int find_lower_writable(struct au_mvd_args *a)
26274+{
26275+ struct super_block *sb;
26276+ aufs_bindex_t bindex, bbot;
26277+ struct au_branch *br;
26278+
26279+ sb = a->sb;
26280+ bindex = a->mvd_bsrc;
26281+ bbot = au_sbbot(sb);
26282+ if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
26283+ for (bindex++; bindex <= bbot; bindex++) {
26284+ br = au_sbr(sb, bindex);
26285+ if (au_br_fhsm(br->br_perm)
26286+ && !sb_rdonly(au_br_sb(br)))
26287+ return bindex;
26288+ }
26289+ else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
26290+ for (bindex++; bindex <= bbot; bindex++) {
26291+ br = au_sbr(sb, bindex);
26292+ if (!au_br_rdonly(br))
26293+ return bindex;
26294+ }
26295+ else
26296+ for (bindex++; bindex <= bbot; bindex++) {
26297+ br = au_sbr(sb, bindex);
26298+ if (!sb_rdonly(au_br_sb(br))) {
26299+ if (au_br_rdonly(br))
26300+ a->mvdown.flags
26301+ |= AUFS_MVDOWN_ROLOWER_R;
26302+ return bindex;
26303+ }
26304+ }
26305+
26306+ return -1;
26307+}
26308+
26309+/* make the parent dir on bdst */
26310+static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
26311+{
26312+ int err;
26313+
26314+ err = 0;
26315+ a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
26316+ a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
26317+ a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
26318+ a->mvd_h_dst_parent = NULL;
26319+ if (au_dbbot(a->parent) >= a->mvd_bdst)
26320+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
26321+ if (!a->mvd_h_dst_parent) {
26322+ err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
26323+ if (unlikely(err)) {
26324+ AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
26325+ goto out;
26326+ }
26327+ a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
26328+ }
26329+
26330+out:
26331+ AuTraceErr(err);
26332+ return err;
26333+}
26334+
26335+/* lock them all */
26336+static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
26337+{
26338+ int err;
26339+ struct dentry *h_trap;
26340+
26341+ a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
26342+ a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
26343+ err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
26344+ au_opt_udba(a->sb),
26345+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
26346+ AuTraceErr(err);
26347+ if (unlikely(err)) {
26348+ AU_MVD_PR(dmsg, "pin_dst failed\n");
26349+ goto out;
26350+ }
26351+
26352+ if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
26353+ a->rename_lock = 0;
26354+ au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
26355+ AuLsc_DI_PARENT, AuLsc_I_PARENT3,
26356+ au_opt_udba(a->sb),
26357+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
26358+ err = au_do_pin(&a->mvd_pin_src);
26359+ AuTraceErr(err);
26360+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
26361+ if (unlikely(err)) {
26362+ AU_MVD_PR(dmsg, "pin_src failed\n");
26363+ goto out_dst;
26364+ }
26365+ goto out; /* success */
26366+ }
26367+
26368+ a->rename_lock = 1;
26369+ au_pin_hdir_unlock(&a->mvd_pin_dst);
26370+ err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
26371+ au_opt_udba(a->sb),
26372+ AuPin_MNT_WRITE | AuPin_DI_LOCKED);
26373+ AuTraceErr(err);
26374+ a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
26375+ if (unlikely(err)) {
26376+ AU_MVD_PR(dmsg, "pin_src failed\n");
26377+ au_pin_hdir_lock(&a->mvd_pin_dst);
26378+ goto out_dst;
26379+ }
26380+ au_pin_hdir_unlock(&a->mvd_pin_src);
26381+ h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
26382+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
26383+ if (h_trap) {
26384+ err = (h_trap != a->mvd_h_src_parent);
26385+ if (err)
26386+ err = (h_trap != a->mvd_h_dst_parent);
26387+ }
26388+ BUG_ON(err); /* it should never happen */
26389+ if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
26390+ err = -EBUSY;
26391+ AuTraceErr(err);
26392+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
26393+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
26394+ au_pin_hdir_lock(&a->mvd_pin_src);
26395+ au_unpin(&a->mvd_pin_src);
26396+ au_pin_hdir_lock(&a->mvd_pin_dst);
26397+ goto out_dst;
26398+ }
26399+ goto out; /* success */
26400+
26401+out_dst:
26402+ au_unpin(&a->mvd_pin_dst);
26403+out:
26404+ AuTraceErr(err);
26405+ return err;
26406+}
26407+
26408+static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
26409+{
26410+ if (!a->rename_lock)
26411+ au_unpin(&a->mvd_pin_src);
26412+ else {
26413+ vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
26414+ a->mvd_h_dst_parent, a->mvd_hdir_dst);
26415+ au_pin_hdir_lock(&a->mvd_pin_src);
26416+ au_unpin(&a->mvd_pin_src);
26417+ au_pin_hdir_lock(&a->mvd_pin_dst);
26418+ }
26419+ au_unpin(&a->mvd_pin_dst);
26420+}
26421+
26422+/* copy-down the file */
26423+static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
26424+{
26425+ int err;
26426+ struct au_cp_generic cpg = {
26427+ .dentry = a->dentry,
26428+ .bdst = a->mvd_bdst,
26429+ .bsrc = a->mvd_bsrc,
26430+ .len = -1,
26431+ .pin = &a->mvd_pin_dst,
26432+ .flags = AuCpup_DTIME | AuCpup_HOPEN
26433+ };
26434+
26435+ AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
26436+ if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
26437+ au_fset_cpup(cpg.flags, OVERWRITE);
26438+ if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
26439+ au_fset_cpup(cpg.flags, RWDST);
26440+ err = au_sio_cpdown_simple(&cpg);
26441+ if (unlikely(err))
26442+ AU_MVD_PR(dmsg, "cpdown failed\n");
26443+
26444+ AuTraceErr(err);
26445+ return err;
26446+}
26447+
26448+/*
26449+ * unlink the whiteout on bdst if exist which may be created by UDBA while we
26450+ * were sleeping
26451+ */
26452+static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
26453+{
26454+ int err;
26455+ struct path h_path;
26456+ struct au_branch *br;
26457+ struct inode *delegated;
26458+
26459+ br = au_sbr(a->sb, a->mvd_bdst);
26460+ h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
26461+ err = PTR_ERR(h_path.dentry);
26462+ if (IS_ERR(h_path.dentry)) {
26463+ AU_MVD_PR(dmsg, "wh_lkup failed\n");
26464+ goto out;
26465+ }
26466+
26467+ err = 0;
26468+ if (d_is_positive(h_path.dentry)) {
26469+ h_path.mnt = au_br_mnt(br);
26470+ delegated = NULL;
26471+ err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
26472+ &delegated, /*force*/0);
26473+ if (unlikely(err == -EWOULDBLOCK)) {
26474+ pr_warn("cannot retry for NFSv4 delegation"
26475+ " for an internal unlink\n");
26476+ iput(delegated);
26477+ }
26478+ if (unlikely(err))
26479+ AU_MVD_PR(dmsg, "wh_unlink failed\n");
26480+ }
26481+ dput(h_path.dentry);
26482+
26483+out:
26484+ AuTraceErr(err);
26485+ return err;
26486+}
26487+
26488+/*
26489+ * unlink the topmost h_dentry
26490+ */
26491+static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
26492+{
26493+ int err;
26494+ struct path h_path;
26495+ struct inode *delegated;
26496+
26497+ h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
26498+ h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
26499+ delegated = NULL;
26500+ err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
26501+ if (unlikely(err == -EWOULDBLOCK)) {
26502+ pr_warn("cannot retry for NFSv4 delegation"
26503+ " for an internal unlink\n");
26504+ iput(delegated);
26505+ }
26506+ if (unlikely(err))
26507+ AU_MVD_PR(dmsg, "unlink failed\n");
26508+
26509+ AuTraceErr(err);
26510+ return err;
26511+}
26512+
26513+/* Since mvdown succeeded, we ignore an error of this function */
26514+static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
26515+{
26516+ int err;
26517+ struct au_branch *br;
26518+
26519+ a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
26520+ br = au_sbr(a->sb, a->mvd_bsrc);
26521+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
26522+ if (!err) {
26523+ br = au_sbr(a->sb, a->mvd_bdst);
26524+ a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
26525+ err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
26526+ }
26527+ if (!err)
26528+ a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
26529+ else
26530+ AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
26531+}
26532+
26533+/*
26534+ * copy-down the file and unlink the bsrc file.
26535+ * - unlink the bdst whout if exist
26536+ * - copy-down the file (with whtmp name and rename)
26537+ * - unlink the bsrc file
26538+ */
26539+static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
26540+{
26541+ int err;
26542+
26543+ err = au_do_mkdir(dmsg, a);
26544+ if (!err)
26545+ err = au_do_lock(dmsg, a);
26546+ if (unlikely(err))
26547+ goto out;
26548+
26549+ /*
26550+ * do not revert the activities we made on bdst since they should be
26551+ * harmless in aufs.
26552+ */
26553+
26554+ err = au_do_cpdown(dmsg, a);
26555+ if (!err)
26556+ err = au_do_unlink_wh(dmsg, a);
26557+ if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
26558+ err = au_do_unlink(dmsg, a);
26559+ if (unlikely(err))
26560+ goto out_unlock;
26561+
26562+ AuDbg("%pd2, 0x%x, %d --> %d\n",
26563+ a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
26564+ if (find_lower_writable(a) < 0)
26565+ a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
26566+
26567+ if (a->mvdown.flags & AUFS_MVDOWN_STFS)
26568+ au_do_stfs(dmsg, a);
26569+
26570+ /* maintain internal array */
26571+ if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
26572+ au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
26573+ au_set_dbtop(a->dentry, a->mvd_bdst);
26574+ au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
26575+ au_set_ibtop(a->inode, a->mvd_bdst);
26576+ } else {
26577+ /* hide the lower */
26578+ au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
26579+ au_set_dbbot(a->dentry, a->mvd_bsrc);
26580+ au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
26581+ au_set_ibbot(a->inode, a->mvd_bsrc);
26582+ }
26583+ if (au_dbbot(a->dentry) < a->mvd_bdst)
26584+ au_set_dbbot(a->dentry, a->mvd_bdst);
26585+ if (au_ibbot(a->inode) < a->mvd_bdst)
26586+ au_set_ibbot(a->inode, a->mvd_bdst);
26587+
26588+out_unlock:
26589+ au_do_unlock(dmsg, a);
26590+out:
26591+ AuTraceErr(err);
26592+ return err;
26593+}
26594+
26595+/* ---------------------------------------------------------------------- */
26596+
26597+/* make sure the file is idle */
26598+static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
26599+{
26600+ int err, plinked;
26601+
26602+ err = 0;
26603+ plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
26604+ if (au_dbtop(a->dentry) == a->mvd_bsrc
26605+ && au_dcount(a->dentry) == 1
26606+ && atomic_read(&a->inode->i_count) == 1
26607+ /* && a->mvd_h_src_inode->i_nlink == 1 */
26608+ && (!plinked || !au_plink_test(a->inode))
26609+ && a->inode->i_nlink == 1)
26610+ goto out;
26611+
26612+ err = -EBUSY;
26613+ AU_MVD_PR(dmsg,
26614+ "b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
26615+ a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
26616+ atomic_read(&a->inode->i_count), a->inode->i_nlink,
26617+ a->mvd_h_src_inode->i_nlink,
26618+ plinked, plinked ? au_plink_test(a->inode) : 0);
26619+
26620+out:
26621+ AuTraceErr(err);
26622+ return err;
26623+}
26624+
26625+/* make sure the parent dir is fine */
26626+static int au_mvd_args_parent(const unsigned char dmsg,
26627+ struct au_mvd_args *a)
26628+{
26629+ int err;
26630+ aufs_bindex_t bindex;
26631+
26632+ err = 0;
26633+ if (unlikely(au_alive_dir(a->parent))) {
26634+ err = -ENOENT;
26635+ AU_MVD_PR(dmsg, "parent dir is dead\n");
26636+ goto out;
26637+ }
26638+
26639+ a->bopq = au_dbdiropq(a->parent);
26640+ bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
26641+ AuDbg("b%d\n", bindex);
26642+ if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
26643+ || (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
26644+ err = -EINVAL;
26645+ a->mvd_errno = EAU_MVDOWN_OPAQUE;
26646+ AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
26647+ a->bopq, a->mvd_bdst);
26648+ }
26649+
26650+out:
26651+ AuTraceErr(err);
26652+ return err;
26653+}
26654+
26655+static int au_mvd_args_intermediate(const unsigned char dmsg,
26656+ struct au_mvd_args *a)
26657+{
26658+ int err;
26659+ struct au_dinfo *dinfo, *tmp;
26660+
26661+ /* lookup the next lower positive entry */
26662+ err = -ENOMEM;
26663+ tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
26664+ if (unlikely(!tmp))
26665+ goto out;
26666+
26667+ a->bfound = -1;
26668+ a->bwh = -1;
26669+ dinfo = au_di(a->dentry);
26670+ au_di_cp(tmp, dinfo);
26671+ au_di_swap(tmp, dinfo);
26672+
26673+ /* returns the number of positive dentries */
26674+ err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
26675+ /* AuLkup_IGNORE_PERM */ 0);
26676+ if (!err)
26677+ a->bwh = au_dbwh(a->dentry);
26678+ else if (err > 0)
26679+ a->bfound = au_dbtop(a->dentry);
26680+
26681+ au_di_swap(tmp, dinfo);
26682+ au_rw_write_unlock(&tmp->di_rwsem);
26683+ au_di_free(tmp);
26684+ if (unlikely(err < 0))
26685+ AU_MVD_PR(dmsg, "failed look-up lower\n");
26686+
26687+ /*
26688+ * here, we have these cases.
26689+ * bfound == -1
26690+ * no positive dentry under bsrc. there are more sub-cases.
26691+ * bwh < 0
26692+ * there no whiteout, we can safely move-down.
26693+ * bwh <= bsrc
26694+ * impossible
26695+ * bsrc < bwh && bwh < bdst
26696+ * there is a whiteout on RO branch. cannot proceed.
26697+ * bwh == bdst
26698+ * there is a whiteout on the RW target branch. it should
26699+ * be removed.
26700+ * bdst < bwh
26701+ * there is a whiteout somewhere unrelated branch.
26702+ * -1 < bfound && bfound <= bsrc
26703+ * impossible.
26704+ * bfound < bdst
26705+ * found, but it is on RO branch between bsrc and bdst. cannot
26706+ * proceed.
26707+ * bfound == bdst
26708+ * found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
26709+ * error.
26710+ * bdst < bfound
26711+ * found, after we create the file on bdst, it will be hidden.
26712+ */
26713+
26714+ AuDebugOn(a->bfound == -1
26715+ && a->bwh != -1
26716+ && a->bwh <= a->mvd_bsrc);
26717+ AuDebugOn(-1 < a->bfound
26718+ && a->bfound <= a->mvd_bsrc);
26719+
26720+ err = -EINVAL;
26721+ if (a->bfound == -1
26722+ && a->mvd_bsrc < a->bwh
26723+ && a->bwh != -1
26724+ && a->bwh < a->mvd_bdst) {
26725+ a->mvd_errno = EAU_MVDOWN_WHITEOUT;
26726+ AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
26727+ a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
26728+ goto out;
26729+ } else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
26730+ a->mvd_errno = EAU_MVDOWN_UPPER;
26731+ AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
26732+ a->mvd_bdst, a->bfound);
26733+ goto out;
26734+ }
26735+
26736+ err = 0; /* success */
26737+
26738+out:
26739+ AuTraceErr(err);
26740+ return err;
26741+}
26742+
26743+static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
26744+{
26745+ int err;
26746+
26747+ err = 0;
26748+ if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
26749+ && a->bfound == a->mvd_bdst)
26750+ err = -EEXIST;
26751+ AuTraceErr(err);
26752+ return err;
26753+}
26754+
26755+static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
26756+{
26757+ int err;
26758+ struct au_branch *br;
26759+
26760+ err = -EISDIR;
26761+ if (unlikely(S_ISDIR(a->inode->i_mode)))
26762+ goto out;
26763+
26764+ err = -EINVAL;
26765+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
26766+ a->mvd_bsrc = au_ibtop(a->inode);
26767+ else {
26768+ a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
26769+ if (unlikely(a->mvd_bsrc < 0
26770+ || (a->mvd_bsrc < au_dbtop(a->dentry)
26771+ || au_dbbot(a->dentry) < a->mvd_bsrc
26772+ || !au_h_dptr(a->dentry, a->mvd_bsrc))
26773+ || (a->mvd_bsrc < au_ibtop(a->inode)
26774+ || au_ibbot(a->inode) < a->mvd_bsrc
26775+ || !au_h_iptr(a->inode, a->mvd_bsrc)))) {
26776+ a->mvd_errno = EAU_MVDOWN_NOUPPER;
26777+ AU_MVD_PR(dmsg, "no upper\n");
26778+ goto out;
26779+ }
26780+ }
26781+ if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
26782+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
26783+ AU_MVD_PR(dmsg, "on the bottom\n");
26784+ goto out;
26785+ }
26786+ a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
26787+ br = au_sbr(a->sb, a->mvd_bsrc);
26788+ err = au_br_rdonly(br);
26789+ if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
26790+ if (unlikely(err))
26791+ goto out;
26792+ } else if (!(vfsub_native_ro(a->mvd_h_src_inode)
26793+ || IS_APPEND(a->mvd_h_src_inode))) {
26794+ if (err)
26795+ a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
26796+ /* go on */
26797+ } else
26798+ goto out;
26799+
26800+ err = -EINVAL;
26801+ if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
26802+ a->mvd_bdst = find_lower_writable(a);
26803+ if (unlikely(a->mvd_bdst < 0)) {
26804+ a->mvd_errno = EAU_MVDOWN_BOTTOM;
26805+ AU_MVD_PR(dmsg, "no writable lower branch\n");
26806+ goto out;
26807+ }
26808+ } else {
26809+ a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
26810+ if (unlikely(a->mvd_bdst < 0
26811+ || au_sbbot(a->sb) < a->mvd_bdst)) {
26812+ a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
26813+ AU_MVD_PR(dmsg, "no lower brid\n");
26814+ goto out;
26815+ }
26816+ }
26817+
26818+ err = au_mvd_args_busy(dmsg, a);
26819+ if (!err)
26820+ err = au_mvd_args_parent(dmsg, a);
26821+ if (!err)
26822+ err = au_mvd_args_intermediate(dmsg, a);
26823+ if (!err)
26824+ err = au_mvd_args_exist(dmsg, a);
26825+ if (!err)
26826+ AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
26827+
26828+out:
26829+ AuTraceErr(err);
26830+ return err;
26831+}
26832+
26833+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
26834+{
26835+ int err, e;
26836+ unsigned char dmsg;
26837+ struct au_mvd_args *args;
26838+ struct inode *inode;
26839+
26840+ inode = d_inode(dentry);
26841+ err = -EPERM;
26842+ if (unlikely(!capable(CAP_SYS_ADMIN)))
26843+ goto out;
26844+
26845+ err = -ENOMEM;
26846+ args = kmalloc(sizeof(*args), GFP_NOFS);
26847+ if (unlikely(!args))
26848+ goto out;
26849+
26850+ err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
26851+ if (!err)
26852+ /* VERIFY_WRITE */
26853+ err = !access_ok(uarg, sizeof(*uarg));
26854+ if (unlikely(err)) {
26855+ err = -EFAULT;
26856+ AuTraceErr(err);
26857+ goto out_free;
26858+ }
26859+ AuDbg("flags 0x%x\n", args->mvdown.flags);
26860+ args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
26861+ args->mvdown.au_errno = 0;
26862+ args->dentry = dentry;
26863+ args->inode = inode;
26864+ args->sb = dentry->d_sb;
26865+
26866+ err = -ENOENT;
26867+ dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
26868+ args->parent = dget_parent(dentry);
26869+ args->dir = d_inode(args->parent);
26870+ inode_lock_nested(args->dir, I_MUTEX_PARENT);
26871+ dput(args->parent);
26872+ if (unlikely(args->parent != dentry->d_parent)) {
26873+ AU_MVD_PR(dmsg, "parent dir is moved\n");
26874+ goto out_dir;
26875+ }
26876+
26877+ inode_lock_nested(inode, I_MUTEX_CHILD);
26878+ err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
26879+ if (unlikely(err))
26880+ goto out_inode;
26881+
26882+ di_write_lock_parent(args->parent);
26883+ err = au_mvd_args(dmsg, args);
26884+ if (unlikely(err))
26885+ goto out_parent;
26886+
26887+ err = au_do_mvdown(dmsg, args);
26888+ if (unlikely(err))
26889+ goto out_parent;
26890+
26891+ au_cpup_attr_timesizes(args->dir);
26892+ au_cpup_attr_timesizes(inode);
26893+ if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
26894+ au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
26895+ /* au_digen_dec(dentry); */
26896+
26897+out_parent:
26898+ di_write_unlock(args->parent);
26899+ aufs_read_unlock(dentry, AuLock_DW);
26900+out_inode:
26901+ inode_unlock(inode);
26902+out_dir:
26903+ inode_unlock(args->dir);
26904+out_free:
26905+ e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
26906+ if (unlikely(e))
26907+ err = -EFAULT;
26908+ au_kfree_rcu(args);
26909+out:
26910+ AuTraceErr(err);
26911+ return err;
26912+}
26913diff -urN /usr/share/empty/fs/aufs/opts.c linux/fs/aufs/opts.c
26914--- /usr/share/empty/fs/aufs/opts.c 1970-01-01 01:00:00.000000000 +0100
26915+++ linux/fs/aufs/opts.c 2022-11-05 23:02:18.969222617 +0100
26916@@ -0,0 +1,1032 @@
26917+// SPDX-License-Identifier: GPL-2.0
26918+/*
26919+ * Copyright (C) 2005-2022 Junjiro R. Okajima
26920+ *
26921+ * This program is free software; you can redistribute it and/or modify
26922+ * it under the terms of the GNU General Public License as published by
26923+ * the Free Software Foundation; either version 2 of the License, or
26924+ * (at your option) any later version.
26925+ *
26926+ * This program is distributed in the hope that it will be useful,
26927+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
26928+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
26929+ * GNU General Public License for more details.
26930+ *
26931+ * You should have received a copy of the GNU General Public License
26932+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
26933+ */
26934+
26935+/*
26936+ * mount options/flags
26937+ */
26938+
26939+#include <linux/types.h> /* a distribution requires */
26940+#include <linux/parser.h>
26941+#include "aufs.h"
26942+
26943+/* ---------------------------------------------------------------------- */
26944+
26945+static const char *au_parser_pattern(int val, match_table_t tbl)
26946+{
26947+ struct match_token *p;
26948+
26949+ p = tbl;
26950+ while (p->pattern) {
26951+ if (p->token == val)
26952+ return p->pattern;
26953+ p++;
26954+ }
26955+ BUG();
26956+ return "??";
26957+}
26958+
26959+static const char *au_optstr(int *val, match_table_t tbl)
26960+{
26961+ struct match_token *p;
26962+ int v;
26963+
26964+ v = *val;
26965+ if (!v)
26966+ goto out;
26967+ p = tbl;
26968+ while (p->pattern) {
26969+ if (p->token
26970+ && (v & p->token) == p->token) {
26971+ *val &= ~p->token;
26972+ return p->pattern;
26973+ }
26974+ p++;
26975+ }
26976+
26977+out:
26978+ return NULL;
26979+}
26980+
26981+/* ---------------------------------------------------------------------- */
26982+
26983+static match_table_t brperm = {
26984+ {AuBrPerm_RO, AUFS_BRPERM_RO},
26985+ {AuBrPerm_RR, AUFS_BRPERM_RR},
26986+ {AuBrPerm_RW, AUFS_BRPERM_RW},
26987+ {0, NULL}
26988+};
26989+
26990+static match_table_t brattr = {
26991+ /* general */
26992+ {AuBrAttr_COO_REG, AUFS_BRATTR_COO_REG},
26993+ {AuBrAttr_COO_ALL, AUFS_BRATTR_COO_ALL},
26994+ /* 'unpin' attrib is meaningless since linux-3.18-rc1 */
26995+ {AuBrAttr_UNPIN, AUFS_BRATTR_UNPIN},
26996+#ifdef CONFIG_AUFS_FHSM
26997+ {AuBrAttr_FHSM, AUFS_BRATTR_FHSM},
26998+#endif
26999+#ifdef CONFIG_AUFS_XATTR
27000+ {AuBrAttr_ICEX, AUFS_BRATTR_ICEX},
27001+ {AuBrAttr_ICEX_SEC, AUFS_BRATTR_ICEX_SEC},
27002+ {AuBrAttr_ICEX_SYS, AUFS_BRATTR_ICEX_SYS},
27003+ {AuBrAttr_ICEX_TR, AUFS_BRATTR_ICEX_TR},
27004+ {AuBrAttr_ICEX_USR, AUFS_BRATTR_ICEX_USR},
27005+ {AuBrAttr_ICEX_OTH, AUFS_BRATTR_ICEX_OTH},
27006+#endif
27007+
27008+ /* ro/rr branch */
27009+ {AuBrRAttr_WH, AUFS_BRRATTR_WH},
27010+
27011+ /* rw branch */
27012+ {AuBrWAttr_MOO, AUFS_BRWATTR_MOO},
27013+ {AuBrWAttr_NoLinkWH, AUFS_BRWATTR_NLWH},
27014+
27015+ {0, NULL}
27016+};
27017+
27018+static int br_attr_val(char *str, match_table_t table, substring_t args[])
27019+{
27020+ int attr, v;
27021+ char *p;
27022+
27023+ attr = 0;
27024+ do {
27025+ p = strchr(str, '+');
27026+ if (p)
27027+ *p = 0;
27028+ v = match_token(str, table, args);
27029+ if (v) {
27030+ if (v & AuBrAttr_CMOO_Mask)
27031+ attr &= ~AuBrAttr_CMOO_Mask;
27032+ attr |= v;
27033+ } else {
27034+ if (p)
27035+ *p = '+';
27036+ pr_warn("ignored branch attribute %s\n", str);
27037+ break;
27038+ }
27039+ if (p)
27040+ str = p + 1;
27041+ } while (p);
27042+
27043+ return attr;
27044+}
27045+
27046+static int au_do_optstr_br_attr(au_br_perm_str_t *str, int perm)
27047+{
27048+ int sz;
27049+ const char *p;
27050+ char *q;
27051+
27052+ q = str->a;
27053+ *q = 0;
27054+ p = au_optstr(&perm, brattr);
27055+ if (p) {
27056+ sz = strlen(p);
27057+ memcpy(q, p, sz + 1);
27058+ q += sz;
27059+ } else
27060+ goto out;
27061+
27062+ do {
27063+ p = au_optstr(&perm, brattr);
27064+ if (p) {
27065+ *q++ = '+';
27066+ sz = strlen(p);
27067+ memcpy(q, p, sz + 1);
27068+ q += sz;
27069+ }
27070+ } while (p);
27071+
27072+out:
27073+ return q - str->a;
27074+}
27075+
27076+int au_br_perm_val(char *perm)
27077+{
27078+ int val, bad, sz;
27079+ char *p;
27080+ substring_t args[MAX_OPT_ARGS];
27081+ au_br_perm_str_t attr;
27082+
27083+ p = strchr(perm, '+');
27084+ if (p)
27085+ *p = 0;
27086+ val = match_token(perm, brperm, args);
27087+ if (!val) {
27088+ if (p)
27089+ *p = '+';
27090+ pr_warn("ignored branch permission %s\n", perm);
27091+ val = AuBrPerm_RO;
27092+ goto out;
27093+ }
27094+ if (!p)
27095+ goto out;
27096+
27097+ val |= br_attr_val(p + 1, brattr, args);
27098+
27099+ bad = 0;
27100+ switch (val & AuBrPerm_Mask) {
27101+ case AuBrPerm_RO:
27102+ case AuBrPerm_RR:
27103+ bad = val & AuBrWAttr_Mask;
27104+ val &= ~AuBrWAttr_Mask;
27105+ break;
27106+ case AuBrPerm_RW:
27107+ bad = val & AuBrRAttr_Mask;
27108+ val &= ~AuBrRAttr_Mask;
27109+ break;
27110+ }
27111+
27112+ /*
27113+ * 'unpin' attrib becomes meaningless since linux-3.18-rc1, but aufs
27114+ * does not treat it as an error, just warning.
27115+ * this is a tiny guard for the user operation.
27116+ */
27117+ if (val & AuBrAttr_UNPIN) {
27118+ bad |= AuBrAttr_UNPIN;
27119+ val &= ~AuBrAttr_UNPIN;
27120+ }
27121+
27122+ if (unlikely(bad)) {
27123+ sz = au_do_optstr_br_attr(&attr, bad);
27124+ AuDebugOn(!sz);
27125+ pr_warn("ignored branch attribute %s\n", attr.a);
27126+ }
27127+
27128+out:
27129+ return val;
27130+}
27131+
27132+void au_optstr_br_perm(au_br_perm_str_t *str, int perm)
27133+{
27134+ au_br_perm_str_t attr;
27135+ const char *p;
27136+ char *q;
27137+ int sz;
27138+
27139+ q = str->a;
27140+ p = au_optstr(&perm, brperm);
27141+ AuDebugOn(!p || !*p);
27142+ sz = strlen(p);
27143+ memcpy(q, p, sz + 1);
27144+ q += sz;
27145+
27146+ sz = au_do_optstr_br_attr(&attr, perm);
27147+ if (sz) {
27148+ *q++ = '+';
27149+ memcpy(q, attr.a, sz + 1);
27150+ }
27151+
27152+ AuDebugOn(strlen(str->a) >= sizeof(str->a));
27153+}
27154+
27155+/* ---------------------------------------------------------------------- */
27156+
27157+static match_table_t udbalevel = {
27158+ {AuOpt_UDBA_REVAL, "reval"},
27159+ {AuOpt_UDBA_NONE, "none"},
27160+#ifdef CONFIG_AUFS_HNOTIFY
27161+ {AuOpt_UDBA_HNOTIFY, "notify"}, /* abstraction */
27162+#ifdef CONFIG_AUFS_HFSNOTIFY
27163+ {AuOpt_UDBA_HNOTIFY, "fsnotify"},
27164+#endif
27165+#endif
27166+ {-1, NULL}
27167+};
27168+
27169+int au_udba_val(char *str)
27170+{
27171+ substring_t args[MAX_OPT_ARGS];
27172+
27173+ return match_token(str, udbalevel, args);
27174+}
27175+
27176+const char *au_optstr_udba(int udba)
27177+{
27178+ return au_parser_pattern(udba, udbalevel);
27179+}
27180+
27181+/* ---------------------------------------------------------------------- */
27182+
27183+static match_table_t au_wbr_create_policy = {
27184+ {AuWbrCreate_TDP, "tdp"},
27185+ {AuWbrCreate_TDP, "top-down-parent"},
27186+ {AuWbrCreate_RR, "rr"},
27187+ {AuWbrCreate_RR, "round-robin"},
27188+ {AuWbrCreate_MFS, "mfs"},
27189+ {AuWbrCreate_MFS, "most-free-space"},
27190+ {AuWbrCreate_MFSV, "mfs:%d"},
27191+ {AuWbrCreate_MFSV, "most-free-space:%d"},
27192+
27193+ /* top-down regardless the parent, and then mfs */
27194+ {AuWbrCreate_TDMFS, "tdmfs:%d"},
27195+ {AuWbrCreate_TDMFSV, "tdmfs:%d:%d"},
27196+
27197+ {AuWbrCreate_MFSRR, "mfsrr:%d"},
27198+ {AuWbrCreate_MFSRRV, "mfsrr:%d:%d"},
27199+ {AuWbrCreate_PMFS, "pmfs"},
27200+ {AuWbrCreate_PMFSV, "pmfs:%d"},
27201+ {AuWbrCreate_PMFSRR, "pmfsrr:%d"},
27202+ {AuWbrCreate_PMFSRRV, "pmfsrr:%d:%d"},
27203+
27204+ {-1, NULL}
27205+};
27206+
27207+static int au_wbr_mfs_wmark(substring_t *arg, char *str,
27208+ struct au_opt_wbr_create *create)
27209+{
27210+ int err;
27211+ unsigned long long ull;
27212+
27213+ err = 0;
27214+ if (!match_u64(arg, &ull))
27215+ create->mfsrr_watermark = ull;
27216+ else {
27217+ pr_err("bad integer in %s\n", str);
27218+ err = -EINVAL;
27219+ }
27220+
27221+ return err;
27222+}
27223+
27224+static int au_wbr_mfs_sec(substring_t *arg, char *str,
27225+ struct au_opt_wbr_create *create)
27226+{
27227+ int n, err;
27228+
27229+ err = 0;
27230+ if (!match_int(arg, &n) && 0 <= n && n <= AUFS_MFS_MAX_SEC)
27231+ create->mfs_second = n;
27232+ else {
27233+ pr_err("bad integer in %s\n", str);
27234+ err = -EINVAL;
27235+ }
27236+
27237+ return err;
27238+}
27239+
27240+int au_wbr_create_val(char *str, struct au_opt_wbr_create *create)
27241+{
27242+ int err, e;
27243+ substring_t args[MAX_OPT_ARGS];
27244+
27245+ err = match_token(str, au_wbr_create_policy, args);
27246+ create->wbr_create = err;
27247+ switch (err) {
27248+ case AuWbrCreate_MFSRRV:
27249+ case AuWbrCreate_TDMFSV:
27250+ case AuWbrCreate_PMFSRRV:
27251+ e = au_wbr_mfs_wmark(&args[0], str, create);
27252+ if (!e)
27253+ e = au_wbr_mfs_sec(&args[1], str, create);
27254+ if (unlikely(e))
27255+ err = e;
27256+ break;
27257+ case AuWbrCreate_MFSRR:
27258+ case AuWbrCreate_TDMFS:
27259+ case AuWbrCreate_PMFSRR:
27260+ e = au_wbr_mfs_wmark(&args[0], str, create);
27261+ if (unlikely(e)) {
27262+ err = e;
27263+ break;
27264+ }
27265+ fallthrough;
27266+ case AuWbrCreate_MFS:
27267+ case AuWbrCreate_PMFS:
27268+ create->mfs_second = AUFS_MFS_DEF_SEC;
27269+ break;
27270+ case AuWbrCreate_MFSV:
27271+ case AuWbrCreate_PMFSV:
27272+ e = au_wbr_mfs_sec(&args[0], str, create);
27273+ if (unlikely(e))
27274+ err = e;
27275+ break;
27276+ }
27277+
27278+ return err;
27279+}
27280+
27281+const char *au_optstr_wbr_create(int wbr_create)
27282+{
27283+ return au_parser_pattern(wbr_create, au_wbr_create_policy);
27284+}
27285+
27286+static match_table_t au_wbr_copyup_policy = {
27287+ {AuWbrCopyup_TDP, "tdp"},
27288+ {AuWbrCopyup_TDP, "top-down-parent"},
27289+ {AuWbrCopyup_BUP, "bup"},
27290+ {AuWbrCopyup_BUP, "bottom-up-parent"},
27291+ {AuWbrCopyup_BU, "bu"},
27292+ {AuWbrCopyup_BU, "bottom-up"},
27293+ {-1, NULL}
27294+};
27295+
27296+int au_wbr_copyup_val(char *str)
27297+{
27298+ substring_t args[MAX_OPT_ARGS];
27299+
27300+ return match_token(str, au_wbr_copyup_policy, args);
27301+}
27302+
27303+const char *au_optstr_wbr_copyup(int wbr_copyup)
27304+{
27305+ return au_parser_pattern(wbr_copyup, au_wbr_copyup_policy);
27306+}
27307+
27308+/* ---------------------------------------------------------------------- */
27309+
27310+int au_opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
27311+ aufs_bindex_t bindex)
27312+{
27313+ int err;
27314+ struct au_opt_add *add = &opt->add;
27315+ char *p;
27316+
27317+ add->bindex = bindex;
27318+ add->perm = AuBrPerm_RO;
27319+ add->pathname = opt_str;
27320+ p = strchr(opt_str, '=');
27321+ if (p) {
27322+ *p++ = 0;
27323+ if (*p)
27324+ add->perm = au_br_perm_val(p);
27325+ }
27326+
27327+ err = vfsub_kern_path(add->pathname, AuOpt_LkupDirFlags, &add->path);
27328+ if (!err) {
27329+ if (!p) {
27330+ add->perm = AuBrPerm_RO;
27331+ if (au_test_fs_rr(add->path.dentry->d_sb))
27332+ add->perm = AuBrPerm_RR;
27333+ else if (!bindex && !(sb_flags & SB_RDONLY))
27334+ add->perm = AuBrPerm_RW;
27335+ }
27336+ opt->type = Opt_add;
27337+ goto out;
27338+ }
27339+ pr_err("lookup failed %s (%d)\n", add->pathname, err);
27340+ err = -EINVAL;
27341+
27342+out:
27343+ return err;
27344+}
27345+
27346+static int au_opt_wbr_create(struct super_block *sb,
27347+ struct au_opt_wbr_create *create)
27348+{
27349+ int err;
27350+ struct au_sbinfo *sbinfo;
27351+
27352+ SiMustWriteLock(sb);
27353+
27354+ err = 1; /* handled */
27355+ sbinfo = au_sbi(sb);
27356+ if (sbinfo->si_wbr_create_ops->fin) {
27357+ err = sbinfo->si_wbr_create_ops->fin(sb);
27358+ if (!err)
27359+ err = 1;
27360+ }
27361+
27362+ sbinfo->si_wbr_create = create->wbr_create;
27363+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + create->wbr_create;
27364+ switch (create->wbr_create) {
27365+ case AuWbrCreate_MFSRRV:
27366+ case AuWbrCreate_MFSRR:
27367+ case AuWbrCreate_TDMFS:
27368+ case AuWbrCreate_TDMFSV:
27369+ case AuWbrCreate_PMFSRR:
27370+ case AuWbrCreate_PMFSRRV:
27371+ sbinfo->si_wbr_mfs.mfsrr_watermark = create->mfsrr_watermark;
27372+ fallthrough;
27373+ case AuWbrCreate_MFS:
27374+ case AuWbrCreate_MFSV:
27375+ case AuWbrCreate_PMFS:
27376+ case AuWbrCreate_PMFSV:
27377+ sbinfo->si_wbr_mfs.mfs_expire
27378+ = msecs_to_jiffies(create->mfs_second * MSEC_PER_SEC);
27379+ break;
27380+ }
27381+
27382+ if (sbinfo->si_wbr_create_ops->init)
27383+ sbinfo->si_wbr_create_ops->init(sb); /* ignore */
27384+
27385+ return err;
27386+}
27387+
27388+/*
27389+ * returns,
27390+ * plus: processed without an error
27391+ * zero: unprocessed
27392+ */
27393+static int au_opt_simple(struct super_block *sb, struct au_opt *opt,
27394+ struct au_opts *opts)
27395+{
27396+ int err;
27397+ struct au_sbinfo *sbinfo;
27398+
27399+ SiMustWriteLock(sb);
27400+
27401+ err = 1; /* handled */
27402+ sbinfo = au_sbi(sb);
27403+ switch (opt->type) {
27404+ case Opt_udba:
27405+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
27406+ sbinfo->si_mntflags |= opt->udba;
27407+ opts->given_udba |= opt->udba;
27408+ break;
27409+
27410+ case Opt_plink:
27411+ if (opt->tf)
27412+ au_opt_set(sbinfo->si_mntflags, PLINK);
27413+ else {
27414+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
27415+ au_plink_put(sb, /*verbose*/1);
27416+ au_opt_clr(sbinfo->si_mntflags, PLINK);
27417+ }
27418+ break;
27419+ case Opt_list_plink:
27420+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
27421+ au_plink_list(sb);
27422+ break;
27423+
27424+ case Opt_dio:
27425+ if (opt->tf) {
27426+ au_opt_set(sbinfo->si_mntflags, DIO);
27427+ au_fset_opts(opts->flags, REFRESH_DYAOP);
27428+ } else {
27429+ au_opt_clr(sbinfo->si_mntflags, DIO);
27430+ au_fset_opts(opts->flags, REFRESH_DYAOP);
27431+ }
27432+ break;
27433+
27434+ case Opt_fhsm_sec:
27435+ au_fhsm_set(sbinfo, opt->fhsm_second);
27436+ break;
27437+
27438+ case Opt_diropq_a:
27439+ au_opt_set(sbinfo->si_mntflags, ALWAYS_DIROPQ);
27440+ break;
27441+ case Opt_diropq_w:
27442+ au_opt_clr(sbinfo->si_mntflags, ALWAYS_DIROPQ);
27443+ break;
27444+
27445+ case Opt_warn_perm:
27446+ if (opt->tf)
27447+ au_opt_set(sbinfo->si_mntflags, WARN_PERM);
27448+ else
27449+ au_opt_clr(sbinfo->si_mntflags, WARN_PERM);
27450+ break;
27451+
27452+ case Opt_verbose:
27453+ if (opt->tf)
27454+ au_opt_set(sbinfo->si_mntflags, VERBOSE);
27455+ else
27456+ au_opt_clr(sbinfo->si_mntflags, VERBOSE);
27457+ break;
27458+
27459+ case Opt_sum:
27460+ if (opt->tf)
27461+ au_opt_set(sbinfo->si_mntflags, SUM);
27462+ else {
27463+ au_opt_clr(sbinfo->si_mntflags, SUM);
27464+ au_opt_clr(sbinfo->si_mntflags, SUM_W);
27465+ }
27466+ break;
27467+ case Opt_wsum:
27468+ au_opt_clr(sbinfo->si_mntflags, SUM);
27469+ au_opt_set(sbinfo->si_mntflags, SUM_W);
27470+ break;
27471+
27472+ case Opt_wbr_create:
27473+ err = au_opt_wbr_create(sb, &opt->wbr_create);
27474+ break;
27475+ case Opt_wbr_copyup:
27476+ sbinfo->si_wbr_copyup = opt->wbr_copyup;
27477+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + opt->wbr_copyup;
27478+ break;
27479+
27480+ case Opt_dirwh:
27481+ sbinfo->si_dirwh = opt->dirwh;
27482+ break;
27483+
27484+ case Opt_rdcache:
27485+ sbinfo->si_rdcache
27486+ = msecs_to_jiffies(opt->rdcache * MSEC_PER_SEC);
27487+ break;
27488+ case Opt_rdblk:
27489+ sbinfo->si_rdblk = opt->rdblk;
27490+ break;
27491+ case Opt_rdhash:
27492+ sbinfo->si_rdhash = opt->rdhash;
27493+ break;
27494+
27495+ case Opt_shwh:
27496+ if (opt->tf)
27497+ au_opt_set(sbinfo->si_mntflags, SHWH);
27498+ else
27499+ au_opt_clr(sbinfo->si_mntflags, SHWH);
27500+ break;
27501+
27502+ case Opt_dirperm1:
27503+ if (opt->tf)
27504+ au_opt_set(sbinfo->si_mntflags, DIRPERM1);
27505+ else
27506+ au_opt_clr(sbinfo->si_mntflags, DIRPERM1);
27507+ break;
27508+
27509+ case Opt_trunc_xino:
27510+ if (opt->tf)
27511+ au_opt_set(sbinfo->si_mntflags, TRUNC_XINO);
27512+ else
27513+ au_opt_clr(sbinfo->si_mntflags, TRUNC_XINO);
27514+ break;
27515+
27516+ case Opt_trunc_xino_path:
27517+ case Opt_itrunc_xino:
27518+ err = au_xino_trunc(sb, opt->xino_itrunc.bindex,
27519+ /*idx_begin*/0);
27520+ if (!err)
27521+ err = 1;
27522+ break;
27523+
27524+ case Opt_trunc_xib:
27525+ if (opt->tf)
27526+ au_fset_opts(opts->flags, TRUNC_XIB);
27527+ else
27528+ au_fclr_opts(opts->flags, TRUNC_XIB);
27529+ break;
27530+
27531+ case Opt_dirren:
27532+ err = 1;
27533+ if (opt->tf) {
27534+ if (!au_opt_test(sbinfo->si_mntflags, DIRREN)) {
27535+ err = au_dr_opt_set(sb);
27536+ if (!err)
27537+ err = 1;
27538+ }
27539+ if (err == 1)
27540+ au_opt_set(sbinfo->si_mntflags, DIRREN);
27541+ } else {
27542+ if (au_opt_test(sbinfo->si_mntflags, DIRREN)) {
27543+ err = au_dr_opt_clr(sb, au_ftest_opts(opts->flags,
27544+ DR_FLUSHED));
27545+ if (!err)
27546+ err = 1;
27547+ }
27548+ if (err == 1)
27549+ au_opt_clr(sbinfo->si_mntflags, DIRREN);
27550+ }
27551+ break;
27552+
27553+ case Opt_acl:
27554+ if (opt->tf)
27555+ sb->s_flags |= SB_POSIXACL;
27556+ else
27557+ sb->s_flags &= ~SB_POSIXACL;
27558+ break;
27559+
27560+ default:
27561+ err = 0;
27562+ break;
27563+ }
27564+
27565+ return err;
27566+}
27567+
27568+/*
27569+ * returns tri-state.
27570+ * plus: processed without an error
27571+ * zero: unprocessed
27572+ * minus: error
27573+ */
27574+static int au_opt_br(struct super_block *sb, struct au_opt *opt,
27575+ struct au_opts *opts)
27576+{
27577+ int err, do_refresh;
27578+
27579+ err = 0;
27580+ switch (opt->type) {
27581+ case Opt_append:
27582+ opt->add.bindex = au_sbbot(sb) + 1;
27583+ if (opt->add.bindex < 0)
27584+ opt->add.bindex = 0;
27585+ goto add;
27586+ /* Always goto add, not fallthrough */
27587+ case Opt_prepend:
27588+ opt->add.bindex = 0;
27589+ fallthrough;
27590+ add: /* indented label */
27591+ case Opt_add:
27592+ err = au_br_add(sb, &opt->add,
27593+ au_ftest_opts(opts->flags, REMOUNT));
27594+ if (!err) {
27595+ err = 1;
27596+ au_fset_opts(opts->flags, REFRESH);
27597+ }
27598+ break;
27599+
27600+ case Opt_del:
27601+ case Opt_idel:
27602+ err = au_br_del(sb, &opt->del,
27603+ au_ftest_opts(opts->flags, REMOUNT));
27604+ if (!err) {
27605+ err = 1;
27606+ au_fset_opts(opts->flags, TRUNC_XIB);
27607+ au_fset_opts(opts->flags, REFRESH);
27608+ }
27609+ break;
27610+
27611+ case Opt_mod:
27612+ case Opt_imod:
27613+ err = au_br_mod(sb, &opt->mod,
27614+ au_ftest_opts(opts->flags, REMOUNT),
27615+ &do_refresh);
27616+ if (!err) {
27617+ err = 1;
27618+ if (do_refresh)
27619+ au_fset_opts(opts->flags, REFRESH);
27620+ }
27621+ break;
27622+ }
27623+ return err;
27624+}
27625+
27626+static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
27627+ struct au_opt_xino **opt_xino,
27628+ struct au_opts *opts)
27629+{
27630+ int err;
27631+
27632+ err = 0;
27633+ switch (opt->type) {
27634+ case Opt_xino:
27635+ err = au_xino_set(sb, &opt->xino,
27636+ !!au_ftest_opts(opts->flags, REMOUNT));
27637+ if (!err)
27638+ *opt_xino = &opt->xino;
27639+ break;
27640+ case Opt_noxino:
27641+ au_xino_clr(sb);
27642+ *opt_xino = (void *)-1;
27643+ break;
27644+ }
27645+
27646+ return err;
27647+}
27648+
27649+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
27650+ unsigned int pending)
27651+{
27652+ int err, fhsm;
27653+ aufs_bindex_t bindex, bbot;
27654+ unsigned char do_plink, skip, do_free, can_no_dreval;
27655+ struct au_branch *br;
27656+ struct au_wbr *wbr;
27657+ struct dentry *root, *dentry;
27658+ struct inode *dir, *h_dir;
27659+ struct au_sbinfo *sbinfo;
27660+ struct au_hinode *hdir;
27661+
27662+ SiMustAnyLock(sb);
27663+
27664+ sbinfo = au_sbi(sb);
27665+ AuDebugOn(!(sbinfo->si_mntflags & AuOptMask_UDBA));
27666+
27667+ if (!(sb_flags & SB_RDONLY)) {
27668+ if (unlikely(!au_br_writable(au_sbr_perm(sb, 0))))
27669+ pr_warn("first branch should be rw\n");
27670+ if (unlikely(au_opt_test(sbinfo->si_mntflags, SHWH)))
27671+ pr_warn_once("shwh should be used with ro\n");
27672+ }
27673+
27674+ if (au_opt_test((sbinfo->si_mntflags | pending), UDBA_HNOTIFY)
27675+ && !au_opt_test(sbinfo->si_mntflags, XINO))
27676+ pr_warn_once("udba=*notify requires xino\n");
27677+
27678+ if (au_opt_test(sbinfo->si_mntflags, DIRPERM1))
27679+ pr_warn_once("dirperm1 breaks the protection"
27680+ " by the permission bits on the lower branch\n");
27681+
27682+ err = 0;
27683+ fhsm = 0;
27684+ root = sb->s_root;
27685+ dir = d_inode(root);
27686+ do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
27687+ can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
27688+ UDBA_NONE);
27689+ bbot = au_sbbot(sb);
27690+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
27691+ skip = 0;
27692+ h_dir = au_h_iptr(dir, bindex);
27693+ br = au_sbr(sb, bindex);
27694+
27695+ if ((br->br_perm & AuBrAttr_ICEX)
27696+ && !h_dir->i_op->listxattr)
27697+ br->br_perm &= ~AuBrAttr_ICEX;
27698+#if 0 /* untested */
27699+ if ((br->br_perm & AuBrAttr_ICEX_SEC)
27700+ && (au_br_sb(br)->s_flags & SB_NOSEC))
27701+ br->br_perm &= ~AuBrAttr_ICEX_SEC;
27702+#endif
27703+
27704+ do_free = 0;
27705+ wbr = br->br_wbr;
27706+ if (wbr)
27707+ wbr_wh_read_lock(wbr);
27708+
27709+ if (!au_br_writable(br->br_perm)) {
27710+ do_free = !!wbr;
27711+ skip = (!wbr
27712+ || (!wbr->wbr_whbase
27713+ && !wbr->wbr_plink
27714+ && !wbr->wbr_orph));
27715+ } else if (!au_br_wh_linkable(br->br_perm)) {
27716+ /* skip = (!br->br_whbase && !br->br_orph); */
27717+ skip = (!wbr || !wbr->wbr_whbase);
27718+ if (skip && wbr) {
27719+ if (do_plink)
27720+ skip = !!wbr->wbr_plink;
27721+ else
27722+ skip = !wbr->wbr_plink;
27723+ }
27724+ } else {
27725+ /* skip = (br->br_whbase && br->br_ohph); */
27726+ skip = (wbr && wbr->wbr_whbase);
27727+ if (skip) {
27728+ if (do_plink)
27729+ skip = !!wbr->wbr_plink;
27730+ else
27731+ skip = !wbr->wbr_plink;
27732+ }
27733+ }
27734+ if (wbr)
27735+ wbr_wh_read_unlock(wbr);
27736+
27737+ if (can_no_dreval) {
27738+ dentry = br->br_path.dentry;
27739+ spin_lock(&dentry->d_lock);
27740+ if (dentry->d_flags &
27741+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE))
27742+ can_no_dreval = 0;
27743+ spin_unlock(&dentry->d_lock);
27744+ }
27745+
27746+ if (au_br_fhsm(br->br_perm)) {
27747+ fhsm++;
27748+ AuDebugOn(!br->br_fhsm);
27749+ }
27750+
27751+ if (skip)
27752+ continue;
27753+
27754+ hdir = au_hi(dir, bindex);
27755+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
27756+ if (wbr)
27757+ wbr_wh_write_lock(wbr);
27758+ err = au_wh_init(br, sb);
27759+ if (wbr)
27760+ wbr_wh_write_unlock(wbr);
27761+ au_hn_inode_unlock(hdir);
27762+
27763+ if (!err && do_free) {
27764+ au_kfree_rcu(wbr);
27765+ br->br_wbr = NULL;
27766+ }
27767+ }
27768+
27769+ if (can_no_dreval)
27770+ au_fset_si(sbinfo, NO_DREVAL);
27771+ else
27772+ au_fclr_si(sbinfo, NO_DREVAL);
27773+
27774+ if (fhsm >= 2) {
27775+ au_fset_si(sbinfo, FHSM);
27776+ for (bindex = bbot; bindex >= 0; bindex--) {
27777+ br = au_sbr(sb, bindex);
27778+ if (au_br_fhsm(br->br_perm)) {
27779+ au_fhsm_set_bottom(sb, bindex);
27780+ break;
27781+ }
27782+ }
27783+ } else {
27784+ au_fclr_si(sbinfo, FHSM);
27785+ au_fhsm_set_bottom(sb, -1);
27786+ }
27787+
27788+ return err;
27789+}
27790+
27791+int au_opts_mount(struct super_block *sb, struct au_opts *opts)
27792+{
27793+ int err;
27794+ unsigned int tmp;
27795+ aufs_bindex_t bindex, bbot;
27796+ struct au_opt *opt;
27797+ struct au_opt_xino *opt_xino, xino;
27798+ struct au_sbinfo *sbinfo;
27799+ struct au_branch *br;
27800+ struct inode *dir;
27801+
27802+ SiMustWriteLock(sb);
27803+
27804+ err = 0;
27805+ opt_xino = NULL;
27806+ opt = opts->opt;
27807+ while (err >= 0 && opt->type != Opt_tail)
27808+ err = au_opt_simple(sb, opt++, opts);
27809+ if (err > 0)
27810+ err = 0;
27811+ else if (unlikely(err < 0))
27812+ goto out;
27813+
27814+ /* disable xino and udba temporary */
27815+ sbinfo = au_sbi(sb);
27816+ tmp = sbinfo->si_mntflags;
27817+ au_opt_clr(sbinfo->si_mntflags, XINO);
27818+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_REVAL);
27819+
27820+ opt = opts->opt;
27821+ while (err >= 0 && opt->type != Opt_tail)
27822+ err = au_opt_br(sb, opt++, opts);
27823+ if (err > 0)
27824+ err = 0;
27825+ else if (unlikely(err < 0))
27826+ goto out;
27827+
27828+ bbot = au_sbbot(sb);
27829+ if (unlikely(bbot < 0)) {
27830+ err = -EINVAL;
27831+ pr_err("no branches\n");
27832+ goto out;
27833+ }
27834+
27835+ if (au_opt_test(tmp, XINO))
27836+ au_opt_set(sbinfo->si_mntflags, XINO);
27837+ opt = opts->opt;
27838+ while (!err && opt->type != Opt_tail)
27839+ err = au_opt_xino(sb, opt++, &opt_xino, opts);
27840+ if (unlikely(err))
27841+ goto out;
27842+
27843+ err = au_opts_verify(sb, sb->s_flags, tmp);
27844+ if (unlikely(err))
27845+ goto out;
27846+
27847+ /* restore xino */
27848+ if (au_opt_test(tmp, XINO) && !opt_xino) {
27849+ xino.file = au_xino_def(sb);
27850+ err = PTR_ERR(xino.file);
27851+ if (IS_ERR(xino.file))
27852+ goto out;
27853+
27854+ err = au_xino_set(sb, &xino, /*remount*/0);
27855+ fput(xino.file);
27856+ if (unlikely(err))
27857+ goto out;
27858+ }
27859+
27860+ /* restore udba */
27861+ tmp &= AuOptMask_UDBA;
27862+ sbinfo->si_mntflags &= ~AuOptMask_UDBA;
27863+ sbinfo->si_mntflags |= tmp;
27864+ bbot = au_sbbot(sb);
27865+ for (bindex = 0; bindex <= bbot; bindex++) {
27866+ br = au_sbr(sb, bindex);
27867+ err = au_hnotify_reset_br(tmp, br, br->br_perm);
27868+ if (unlikely(err))
27869+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
27870+ bindex, err);
27871+ /* go on even if err */
27872+ }
27873+ if (au_opt_test(tmp, UDBA_HNOTIFY)) {
27874+ dir = d_inode(sb->s_root);
27875+ au_hn_reset(dir, au_hi_flags(dir, /*isdir*/1) & ~AuHi_XINO);
27876+ }
27877+
27878+out:
27879+ return err;
27880+}
27881+
27882+int au_opts_remount(struct super_block *sb, struct au_opts *opts)
27883+{
27884+ int err, rerr;
27885+ unsigned char no_dreval;
27886+ struct inode *dir;
27887+ struct au_opt_xino *opt_xino;
27888+ struct au_opt *opt;
27889+ struct au_sbinfo *sbinfo;
27890+
27891+ SiMustWriteLock(sb);
27892+
27893+ err = au_dr_opt_flush(sb);
27894+ if (unlikely(err))
27895+ goto out;
27896+ au_fset_opts(opts->flags, DR_FLUSHED);
27897+
27898+ dir = d_inode(sb->s_root);
27899+ sbinfo = au_sbi(sb);
27900+ opt_xino = NULL;
27901+ opt = opts->opt;
27902+ while (err >= 0 && opt->type != Opt_tail) {
27903+ err = au_opt_simple(sb, opt, opts);
27904+ if (!err)
27905+ err = au_opt_br(sb, opt, opts);
27906+ if (!err)
27907+ err = au_opt_xino(sb, opt, &opt_xino, opts);
27908+ opt++;
27909+ }
27910+ if (err > 0)
27911+ err = 0;
27912+ AuTraceErr(err);
27913+ /* go on even err */
27914+
27915+ no_dreval = !!au_ftest_si(sbinfo, NO_DREVAL);
27916+ rerr = au_opts_verify(sb, opts->sb_flags, /*pending*/0);
27917+ if (unlikely(rerr && !err))
27918+ err = rerr;
27919+
27920+ if (no_dreval != !!au_ftest_si(sbinfo, NO_DREVAL))
27921+ au_fset_opts(opts->flags, REFRESH_IDOP);
27922+
27923+ if (au_ftest_opts(opts->flags, TRUNC_XIB)) {
27924+ rerr = au_xib_trunc(sb);
27925+ if (unlikely(rerr && !err))
27926+ err = rerr;
27927+ }
27928+
27929+ /* will be handled by the caller */
27930+ if (!au_ftest_opts(opts->flags, REFRESH)
27931+ && (opts->given_udba
27932+ || au_opt_test(sbinfo->si_mntflags, XINO)
27933+ || au_ftest_opts(opts->flags, REFRESH_IDOP)
27934+ ))
27935+ au_fset_opts(opts->flags, REFRESH);
27936+
27937+ AuDbg("status 0x%x\n", opts->flags);
27938+
27939+out:
27940+ return err;
27941+}
27942+
27943+/* ---------------------------------------------------------------------- */
27944+
27945+unsigned int au_opt_udba(struct super_block *sb)
27946+{
27947+ return au_mntflags(sb) & AuOptMask_UDBA;
27948+}
27949diff -urN /usr/share/empty/fs/aufs/opts.h linux/fs/aufs/opts.h
27950--- /usr/share/empty/fs/aufs/opts.h 1970-01-01 01:00:00.000000000 +0100
27951+++ linux/fs/aufs/opts.h 2022-11-05 23:02:18.969222617 +0100
27952@@ -0,0 +1,263 @@
27953+/* SPDX-License-Identifier: GPL-2.0 */
27954+/*
27955+ * Copyright (C) 2005-2022 Junjiro R. Okajima
27956+ *
27957+ * This program is free software; you can redistribute it and/or modify
27958+ * it under the terms of the GNU General Public License as published by
27959+ * the Free Software Foundation; either version 2 of the License, or
27960+ * (at your option) any later version.
27961+ *
27962+ * This program is distributed in the hope that it will be useful,
27963+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
27964+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27965+ * GNU General Public License for more details.
27966+ *
27967+ * You should have received a copy of the GNU General Public License
27968+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
27969+ */
27970+
27971+/*
27972+ * mount options/flags
27973+ */
27974+
27975+#ifndef __AUFS_OPTS_H__
27976+#define __AUFS_OPTS_H__
27977+
27978+#ifdef __KERNEL__
27979+
27980+#include <linux/fs_parser.h>
27981+#include <linux/namei.h>
27982+#include <linux/path.h>
27983+
27984+enum {
27985+ Opt_br,
27986+ Opt_add, Opt_del, Opt_mod, Opt_append, Opt_prepend,
27987+ Opt_idel, Opt_imod,
27988+ Opt_dirwh, Opt_rdcache, Opt_rdblk, Opt_rdhash,
27989+ Opt_xino, Opt_noxino,
27990+ Opt_trunc_xino, Opt_trunc_xino_v,
27991+ Opt_trunc_xino_path, Opt_itrunc_xino,
27992+ Opt_trunc_xib,
27993+ Opt_shwh,
27994+ Opt_plink, Opt_list_plink,
27995+ Opt_udba,
27996+ Opt_dio,
27997+ Opt_diropq, Opt_diropq_a, Opt_diropq_w,
27998+ Opt_warn_perm,
27999+ Opt_wbr_copyup, Opt_wbr_create,
28000+ Opt_fhsm_sec,
28001+ Opt_verbose, Opt_noverbose,
28002+ Opt_sum, Opt_wsum,
28003+ Opt_dirperm1,
28004+ Opt_dirren,
28005+ Opt_acl,
28006+ Opt_tail, Opt_ignore, Opt_ignore_silent, Opt_err
28007+};
28008+
28009+/* ---------------------------------------------------------------------- */
28010+
28011+/* mount flags */
28012+#define AuOpt_XINO 1 /* external inode number bitmap
28013+ and translation table */
28014+#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
28015+#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
28016+#define AuOpt_UDBA_REVAL (1 << 3)
28017+#define AuOpt_UDBA_HNOTIFY (1 << 4)
28018+#define AuOpt_SHWH (1 << 5) /* show whiteout */
28019+#define AuOpt_PLINK (1 << 6) /* pseudo-link */
28020+#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
28021+ bits */
28022+#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
28023+#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
28024+#define AuOpt_SUM_W (1 << 11) /* unimplemented */
28025+#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
28026+#define AuOpt_VERBOSE (1 << 13) /* print the cause of error */
28027+#define AuOpt_DIO (1 << 14) /* direct io */
28028+#define AuOpt_DIRREN (1 << 15) /* directory rename */
28029+
28030+#ifndef CONFIG_AUFS_HNOTIFY
28031+#undef AuOpt_UDBA_HNOTIFY
28032+#define AuOpt_UDBA_HNOTIFY 0
28033+#endif
28034+#ifndef CONFIG_AUFS_DIRREN
28035+#undef AuOpt_DIRREN
28036+#define AuOpt_DIRREN 0
28037+#endif
28038+#ifndef CONFIG_AUFS_SHWH
28039+#undef AuOpt_SHWH
28040+#define AuOpt_SHWH 0
28041+#endif
28042+
28043+#define AuOpt_Def (AuOpt_XINO \
28044+ | AuOpt_UDBA_REVAL \
28045+ | AuOpt_PLINK \
28046+ /* | AuOpt_DIRPERM1 */ \
28047+ | AuOpt_WARN_PERM)
28048+#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
28049+ | AuOpt_UDBA_REVAL \
28050+ | AuOpt_UDBA_HNOTIFY)
28051+
28052+#define AuOpt_LkupDirFlags (LOOKUP_FOLLOW | LOOKUP_DIRECTORY)
28053+
28054+#define au_opt_test(flags, name) (flags & AuOpt_##name)
28055+#define au_opt_set(flags, name) do { \
28056+ BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
28057+ ((flags) |= AuOpt_##name); \
28058+} while (0)
28059+#define au_opt_set_udba(flags, name) do { \
28060+ (flags) &= ~AuOptMask_UDBA; \
28061+ ((flags) |= AuOpt_##name); \
28062+} while (0)
28063+#define au_opt_clr(flags, name) do { \
28064+ ((flags) &= ~AuOpt_##name); \
28065+} while (0)
28066+
28067+static inline unsigned int au_opts_plink(unsigned int mntflags)
28068+{
28069+#ifdef CONFIG_PROC_FS
28070+ return mntflags;
28071+#else
28072+ return mntflags & ~AuOpt_PLINK;
28073+#endif
28074+}
28075+
28076+/* ---------------------------------------------------------------------- */
28077+
28078+/* policies to select one among multiple writable branches */
28079+enum {
28080+ AuWbrCreate_TDP, /* top down parent */
28081+ AuWbrCreate_RR, /* round robin */
28082+ AuWbrCreate_MFS, /* most free space */
28083+ AuWbrCreate_MFSV, /* mfs with seconds */
28084+ AuWbrCreate_MFSRR, /* mfs then rr */
28085+ AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
28086+ AuWbrCreate_TDMFS, /* top down regardless parent and mfs */
28087+ AuWbrCreate_TDMFSV, /* top down regardless parent and mfs */
28088+ AuWbrCreate_PMFS, /* parent and mfs */
28089+ AuWbrCreate_PMFSV, /* parent and mfs with seconds */
28090+ AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
28091+ AuWbrCreate_PMFSRRV, /* plus seconds */
28092+
28093+ AuWbrCreate_Def = AuWbrCreate_TDP
28094+};
28095+
28096+enum {
28097+ AuWbrCopyup_TDP, /* top down parent */
28098+ AuWbrCopyup_BUP, /* bottom up parent */
28099+ AuWbrCopyup_BU, /* bottom up */
28100+
28101+ AuWbrCopyup_Def = AuWbrCopyup_TDP
28102+};
28103+
28104+/* ---------------------------------------------------------------------- */
28105+
28106+struct file;
28107+
28108+struct au_opt_add {
28109+ aufs_bindex_t bindex;
28110+ char *pathname;
28111+ int perm;
28112+ struct path path;
28113+};
28114+
28115+struct au_opt_del {
28116+ char *pathname;
28117+ struct path h_path;
28118+};
28119+
28120+struct au_opt_mod {
28121+ char *path;
28122+ int perm;
28123+ struct dentry *h_root;
28124+};
28125+
28126+struct au_opt_xino {
28127+ char *path;
28128+ struct file *file;
28129+};
28130+
28131+struct au_opt_xino_itrunc {
28132+ aufs_bindex_t bindex;
28133+};
28134+
28135+struct au_opt_wbr_create {
28136+ int wbr_create;
28137+ int mfs_second;
28138+ unsigned long long mfsrr_watermark;
28139+};
28140+
28141+struct au_opt {
28142+ int type;
28143+ union {
28144+ struct au_opt_xino xino;
28145+ struct au_opt_xino_itrunc xino_itrunc;
28146+ struct au_opt_add add;
28147+ struct au_opt_del del;
28148+ struct au_opt_mod mod;
28149+ int dirwh;
28150+ int rdcache;
28151+ unsigned int rdblk;
28152+ unsigned int rdhash;
28153+ int udba;
28154+ struct au_opt_wbr_create wbr_create;
28155+ int wbr_copyup;
28156+ unsigned int fhsm_second;
28157+ bool tf; /* generic flag, true or false */
28158+ };
28159+};
28160+
28161+/* opts flags */
28162+#define AuOpts_REMOUNT 1
28163+#define AuOpts_REFRESH (1 << 1)
28164+#define AuOpts_TRUNC_XIB (1 << 2)
28165+#define AuOpts_REFRESH_DYAOP (1 << 3)
28166+#define AuOpts_REFRESH_IDOP (1 << 4)
28167+#define AuOpts_DR_FLUSHED (1 << 5)
28168+#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
28169+#define au_fset_opts(flags, name) \
28170+ do { (flags) |= AuOpts_##name; } while (0)
28171+#define au_fclr_opts(flags, name) \
28172+ do { (flags) &= ~AuOpts_##name; } while (0)
28173+
28174+#ifndef CONFIG_AUFS_DIRREN
28175+#undef AuOpts_DR_FLUSHED
28176+#define AuOpts_DR_FLUSHED 0
28177+#endif
28178+
28179+struct au_opts {
28180+ struct au_opt *opt;
28181+ int max_opt;
28182+
28183+ unsigned int given_udba;
28184+ unsigned int flags;
28185+ unsigned long sb_flags;
28186+};
28187+
28188+/* ---------------------------------------------------------------------- */
28189+
28190+/* opts.c */
28191+int au_br_perm_val(char *perm);
28192+void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
28193+int au_udba_val(char *str);
28194+const char *au_optstr_udba(int udba);
28195+int au_wbr_create_val(char *str, struct au_opt_wbr_create *create);
28196+const char *au_optstr_wbr_create(int wbr_create);
28197+int au_wbr_copyup_val(char *str);
28198+const char *au_optstr_wbr_copyup(int wbr_copyup);
28199+
28200+int au_opt_add(struct au_opt *opt, char *opt_str, unsigned long sb_flags,
28201+ aufs_bindex_t bindex);
28202+struct super_block;
28203+int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
28204+ unsigned int pending);
28205+int au_opts_mount(struct super_block *sb, struct au_opts *opts);
28206+int au_opts_remount(struct super_block *sb, struct au_opts *opts);
28207+
28208+unsigned int au_opt_udba(struct super_block *sb);
28209+
28210+/* fsctx.c */
28211+int aufs_fsctx_init(struct fs_context *fc);
28212+extern const struct fs_parameter_spec aufs_fsctx_paramspec[];
28213+
28214+#endif /* __KERNEL__ */
28215+#endif /* __AUFS_OPTS_H__ */
28216diff -urN /usr/share/empty/fs/aufs/plink.c linux/fs/aufs/plink.c
28217--- /usr/share/empty/fs/aufs/plink.c 1970-01-01 01:00:00.000000000 +0100
28218+++ linux/fs/aufs/plink.c 2022-11-05 23:02:18.969222617 +0100
28219@@ -0,0 +1,516 @@
28220+// SPDX-License-Identifier: GPL-2.0
28221+/*
28222+ * Copyright (C) 2005-2022 Junjiro R. Okajima
28223+ *
28224+ * This program is free software; you can redistribute it and/or modify
28225+ * it under the terms of the GNU General Public License as published by
28226+ * the Free Software Foundation; either version 2 of the License, or
28227+ * (at your option) any later version.
28228+ *
28229+ * This program is distributed in the hope that it will be useful,
28230+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28231+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28232+ * GNU General Public License for more details.
28233+ *
28234+ * You should have received a copy of the GNU General Public License
28235+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
28236+ */
28237+
28238+/*
28239+ * pseudo-link
28240+ */
28241+
28242+#include "aufs.h"
28243+
28244+/*
28245+ * the pseudo-link maintenance mode.
28246+ * during a user process maintains the pseudo-links,
28247+ * prohibit adding a new plink and branch manipulation.
28248+ *
28249+ * Flags
28250+ * NOPLM:
28251+ * For entry functions which will handle plink, and i_mutex is already held
28252+ * in VFS.
28253+ * They cannot wait and should return an error at once.
28254+ * Callers has to check the error.
28255+ * NOPLMW:
28256+ * For entry functions which will handle plink, but i_mutex is not held
28257+ * in VFS.
28258+ * They can wait the plink maintenance mode to finish.
28259+ *
28260+ * They behave like F_SETLK and F_SETLKW.
28261+ * If the caller never handle plink, then both flags are unnecessary.
28262+ */
28263+
28264+int au_plink_maint(struct super_block *sb, int flags)
28265+{
28266+ int err;
28267+ pid_t pid, ppid;
28268+ struct task_struct *parent, *prev;
28269+ struct au_sbinfo *sbi;
28270+
28271+ SiMustAnyLock(sb);
28272+
28273+ err = 0;
28274+ if (!au_opt_test(au_mntflags(sb), PLINK))
28275+ goto out;
28276+
28277+ sbi = au_sbi(sb);
28278+ pid = sbi->si_plink_maint_pid;
28279+ if (!pid || pid == current->pid)
28280+ goto out;
28281+
28282+ /* todo: it highly depends upon /sbin/mount.aufs */
28283+ prev = NULL;
28284+ parent = current;
28285+ ppid = 0;
28286+ rcu_read_lock();
28287+ while (1) {
28288+ parent = rcu_dereference(parent->real_parent);
28289+ if (parent == prev)
28290+ break;
28291+ ppid = task_pid_vnr(parent);
28292+ if (pid == ppid) {
28293+ rcu_read_unlock();
28294+ goto out;
28295+ }
28296+ prev = parent;
28297+ }
28298+ rcu_read_unlock();
28299+
28300+ if (au_ftest_lock(flags, NOPLMW)) {
28301+ /* if there is no i_mutex lock in VFS, we don't need to wait */
28302+ /* AuDebugOn(!lockdep_depth(current)); */
28303+ while (sbi->si_plink_maint_pid) {
28304+ si_read_unlock(sb);
28305+ /* gave up wake_up_bit() */
28306+ wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
28307+
28308+ if (au_ftest_lock(flags, FLUSH))
28309+ au_nwt_flush(&sbi->si_nowait);
28310+ si_noflush_read_lock(sb);
28311+ }
28312+ } else if (au_ftest_lock(flags, NOPLM)) {
28313+ AuDbg("ppid %d, pid %d\n", ppid, pid);
28314+ err = -EAGAIN;
28315+ }
28316+
28317+out:
28318+ return err;
28319+}
28320+
28321+void au_plink_maint_leave(struct au_sbinfo *sbinfo)
28322+{
28323+ spin_lock(&sbinfo->si_plink_maint_lock);
28324+ sbinfo->si_plink_maint_pid = 0;
28325+ spin_unlock(&sbinfo->si_plink_maint_lock);
28326+ wake_up_all(&sbinfo->si_plink_wq);
28327+}
28328+
28329+int au_plink_maint_enter(struct super_block *sb)
28330+{
28331+ int err;
28332+ struct au_sbinfo *sbinfo;
28333+
28334+ err = 0;
28335+ sbinfo = au_sbi(sb);
28336+ /* make sure i am the only one in this fs */
28337+ si_write_lock(sb, AuLock_FLUSH);
28338+ if (au_opt_test(au_mntflags(sb), PLINK)) {
28339+ spin_lock(&sbinfo->si_plink_maint_lock);
28340+ if (!sbinfo->si_plink_maint_pid)
28341+ sbinfo->si_plink_maint_pid = current->pid;
28342+ else
28343+ err = -EBUSY;
28344+ spin_unlock(&sbinfo->si_plink_maint_lock);
28345+ }
28346+ si_write_unlock(sb);
28347+
28348+ return err;
28349+}
28350+
28351+/* ---------------------------------------------------------------------- */
28352+
28353+#ifdef CONFIG_AUFS_DEBUG
28354+void au_plink_list(struct super_block *sb)
28355+{
28356+ int i;
28357+ struct au_sbinfo *sbinfo;
28358+ struct hlist_bl_head *hbl;
28359+ struct hlist_bl_node *pos;
28360+ struct au_icntnr *icntnr;
28361+
28362+ SiMustAnyLock(sb);
28363+
28364+ sbinfo = au_sbi(sb);
28365+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
28366+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
28367+
28368+ for (i = 0; i < AuPlink_NHASH; i++) {
28369+ hbl = sbinfo->si_plink + i;
28370+ hlist_bl_lock(hbl);
28371+ hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
28372+ AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
28373+ hlist_bl_unlock(hbl);
28374+ }
28375+}
28376+#endif
28377+
28378+/* is the inode pseudo-linked? */
28379+int au_plink_test(struct inode *inode)
28380+{
28381+ int found, i;
28382+ struct au_sbinfo *sbinfo;
28383+ struct hlist_bl_head *hbl;
28384+ struct hlist_bl_node *pos;
28385+ struct au_icntnr *icntnr;
28386+
28387+ sbinfo = au_sbi(inode->i_sb);
28388+ AuRwMustAnyLock(&sbinfo->si_rwsem);
28389+ AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
28390+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
28391+
28392+ found = 0;
28393+ i = au_plink_hash(inode->i_ino);
28394+ hbl = sbinfo->si_plink + i;
28395+ hlist_bl_lock(hbl);
28396+ hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
28397+ if (&icntnr->vfs_inode == inode) {
28398+ found = 1;
28399+ break;
28400+ }
28401+ hlist_bl_unlock(hbl);
28402+ return found;
28403+}
28404+
28405+/* ---------------------------------------------------------------------- */
28406+
28407+/*
28408+ * generate a name for plink.
28409+ * the file will be stored under AUFS_WH_PLINKDIR.
28410+ */
28411+/* 20 is max digits length of ulong 64 */
28412+#define PLINK_NAME_LEN ((20 + 1) * 2)
28413+
28414+static int plink_name(char *name, int len, struct inode *inode,
28415+ aufs_bindex_t bindex)
28416+{
28417+ int rlen;
28418+ struct inode *h_inode;
28419+
28420+ h_inode = au_h_iptr(inode, bindex);
28421+ rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
28422+ return rlen;
28423+}
28424+
28425+struct au_do_plink_lkup_args {
28426+ struct dentry **errp;
28427+ struct qstr *tgtname;
28428+ struct path *h_ppath;
28429+};
28430+
28431+static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
28432+ struct path *h_ppath)
28433+{
28434+ struct dentry *h_dentry;
28435+ struct inode *h_inode;
28436+
28437+ h_inode = d_inode(h_ppath->dentry);
28438+ inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
28439+ h_dentry = vfsub_lkup_one(tgtname, h_ppath);
28440+ inode_unlock_shared(h_inode);
28441+
28442+ return h_dentry;
28443+}
28444+
28445+static void au_call_do_plink_lkup(void *args)
28446+{
28447+ struct au_do_plink_lkup_args *a = args;
28448+ *a->errp = au_do_plink_lkup(a->tgtname, a->h_ppath);
28449+}
28450+
28451+/* lookup the plink-ed @inode under the branch at @bindex */
28452+struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
28453+{
28454+ struct dentry *h_dentry;
28455+ struct au_branch *br;
28456+ struct path h_ppath;
28457+ int wkq_err;
28458+ char a[PLINK_NAME_LEN];
28459+ struct qstr tgtname = QSTR_INIT(a, 0);
28460+
28461+ AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
28462+
28463+ br = au_sbr(inode->i_sb, bindex);
28464+ h_ppath.dentry = br->br_wbr->wbr_plink;
28465+ h_ppath.mnt = au_br_mnt(br);
28466+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
28467+
28468+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
28469+ struct au_do_plink_lkup_args args = {
28470+ .errp = &h_dentry,
28471+ .tgtname = &tgtname,
28472+ .h_ppath = &h_ppath
28473+ };
28474+
28475+ wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
28476+ if (unlikely(wkq_err))
28477+ h_dentry = ERR_PTR(wkq_err);
28478+ } else
28479+ h_dentry = au_do_plink_lkup(&tgtname, &h_ppath);
28480+
28481+ return h_dentry;
28482+}
28483+
28484+/* create a pseudo-link */
28485+static int do_whplink(struct qstr *tgt, struct path *h_ppath,
28486+ struct dentry *h_dentry)
28487+{
28488+ int err;
28489+ struct path h_path;
28490+ struct inode *h_dir, *delegated;
28491+
28492+ h_dir = d_inode(h_ppath->dentry);
28493+ inode_lock_nested(h_dir, AuLsc_I_CHILD2);
28494+ h_path.mnt = h_ppath->mnt;
28495+again:
28496+ h_path.dentry = vfsub_lkup_one(tgt, h_ppath);
28497+ err = PTR_ERR(h_path.dentry);
28498+ if (IS_ERR(h_path.dentry))
28499+ goto out;
28500+
28501+ err = 0;
28502+ /* wh.plink dir is not monitored */
28503+ /* todo: is it really safe? */
28504+ if (d_is_positive(h_path.dentry)
28505+ && d_inode(h_path.dentry) != d_inode(h_dentry)) {
28506+ delegated = NULL;
28507+ err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
28508+ if (unlikely(err == -EWOULDBLOCK)) {
28509+ pr_warn("cannot retry for NFSv4 delegation"
28510+ " for an internal unlink\n");
28511+ iput(delegated);
28512+ }
28513+ dput(h_path.dentry);
28514+ h_path.dentry = NULL;
28515+ if (!err)
28516+ goto again;
28517+ }
28518+ if (!err && d_is_negative(h_path.dentry)) {
28519+ delegated = NULL;
28520+ err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
28521+ if (unlikely(err == -EWOULDBLOCK)) {
28522+ pr_warn("cannot retry for NFSv4 delegation"
28523+ " for an internal link\n");
28524+ iput(delegated);
28525+ }
28526+ }
28527+ dput(h_path.dentry);
28528+
28529+out:
28530+ inode_unlock(h_dir);
28531+ return err;
28532+}
28533+
28534+struct do_whplink_args {
28535+ int *errp;
28536+ struct qstr *tgt;
28537+ struct path *h_ppath;
28538+ struct dentry *h_dentry;
28539+};
28540+
28541+static void call_do_whplink(void *args)
28542+{
28543+ struct do_whplink_args *a = args;
28544+ *a->errp = do_whplink(a->tgt, a->h_ppath, a->h_dentry);
28545+}
28546+
28547+static int whplink(struct dentry *h_dentry, struct inode *inode,
28548+ aufs_bindex_t bindex)
28549+{
28550+ int err, wkq_err;
28551+ struct au_branch *br;
28552+ struct au_wbr *wbr;
28553+ struct path h_ppath;
28554+ char a[PLINK_NAME_LEN];
28555+ struct qstr tgtname = QSTR_INIT(a, 0);
28556+
28557+ br = au_sbr(inode->i_sb, bindex);
28558+ wbr = br->br_wbr;
28559+ h_ppath.dentry = wbr->wbr_plink;
28560+ h_ppath.mnt = au_br_mnt(br);
28561+ tgtname.len = plink_name(a, sizeof(a), inode, bindex);
28562+
28563+ /* always superio. */
28564+ if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
28565+ struct do_whplink_args args = {
28566+ .errp = &err,
28567+ .tgt = &tgtname,
28568+ .h_ppath = &h_ppath,
28569+ .h_dentry = h_dentry
28570+ };
28571+ wkq_err = au_wkq_wait(call_do_whplink, &args);
28572+ if (unlikely(wkq_err))
28573+ err = wkq_err;
28574+ } else
28575+ err = do_whplink(&tgtname, &h_ppath, h_dentry);
28576+
28577+ return err;
28578+}
28579+
28580+/*
28581+ * create a new pseudo-link for @h_dentry on @bindex.
28582+ * the linked inode is held in aufs @inode.
28583+ */
28584+void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
28585+ struct dentry *h_dentry)
28586+{
28587+ struct super_block *sb;
28588+ struct au_sbinfo *sbinfo;
28589+ struct hlist_bl_head *hbl;
28590+ struct hlist_bl_node *pos;
28591+ struct au_icntnr *icntnr;
28592+ int found, err, cnt, i;
28593+
28594+ sb = inode->i_sb;
28595+ sbinfo = au_sbi(sb);
28596+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
28597+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
28598+
28599+ found = au_plink_test(inode);
28600+ if (found)
28601+ return;
28602+
28603+ i = au_plink_hash(inode->i_ino);
28604+ hbl = sbinfo->si_plink + i;
28605+ au_igrab(inode);
28606+
28607+ hlist_bl_lock(hbl);
28608+ hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
28609+ if (&icntnr->vfs_inode == inode) {
28610+ found = 1;
28611+ break;
28612+ }
28613+ }
28614+ if (!found) {
28615+ icntnr = container_of(inode, struct au_icntnr, vfs_inode);
28616+ hlist_bl_add_head(&icntnr->plink, hbl);
28617+ }
28618+ hlist_bl_unlock(hbl);
28619+ if (!found) {
28620+ cnt = au_hbl_count(hbl);
28621+#define msg "unexpectedly unbalanced or too many pseudo-links"
28622+ if (cnt > AUFS_PLINK_WARN)
28623+ AuWarn1(msg ", %d\n", cnt);
28624+#undef msg
28625+ err = whplink(h_dentry, inode, bindex);
28626+ if (unlikely(err)) {
28627+ pr_warn("err %d, damaged pseudo link.\n", err);
28628+ au_hbl_del(&icntnr->plink, hbl);
28629+ iput(&icntnr->vfs_inode);
28630+ }
28631+ } else
28632+ iput(&icntnr->vfs_inode);
28633+}
28634+
28635+/* free all plinks */
28636+void au_plink_put(struct super_block *sb, int verbose)
28637+{
28638+ int i, warned;
28639+ struct au_sbinfo *sbinfo;
28640+ struct hlist_bl_head *hbl;
28641+ struct hlist_bl_node *pos, *tmp;
28642+ struct au_icntnr *icntnr;
28643+
28644+ SiMustWriteLock(sb);
28645+
28646+ sbinfo = au_sbi(sb);
28647+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
28648+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
28649+
28650+ /* no spin_lock since sbinfo is write-locked */
28651+ warned = 0;
28652+ for (i = 0; i < AuPlink_NHASH; i++) {
28653+ hbl = sbinfo->si_plink + i;
28654+ if (!warned && verbose && !hlist_bl_empty(hbl)) {
28655+ pr_warn("pseudo-link is not flushed");
28656+ warned = 1;
28657+ }
28658+ hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
28659+ iput(&icntnr->vfs_inode);
28660+ INIT_HLIST_BL_HEAD(hbl);
28661+ }
28662+}
28663+
28664+void au_plink_clean(struct super_block *sb, int verbose)
28665+{
28666+ struct dentry *root;
28667+
28668+ root = sb->s_root;
28669+ aufs_write_lock(root);
28670+ if (au_opt_test(au_mntflags(sb), PLINK))
28671+ au_plink_put(sb, verbose);
28672+ aufs_write_unlock(root);
28673+}
28674+
28675+static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
28676+{
28677+ int do_put;
28678+ aufs_bindex_t btop, bbot, bindex;
28679+
28680+ do_put = 0;
28681+ btop = au_ibtop(inode);
28682+ bbot = au_ibbot(inode);
28683+ if (btop >= 0) {
28684+ for (bindex = btop; bindex <= bbot; bindex++) {
28685+ if (!au_h_iptr(inode, bindex)
28686+ || au_ii_br_id(inode, bindex) != br_id)
28687+ continue;
28688+ au_set_h_iptr(inode, bindex, NULL, 0);
28689+ do_put = 1;
28690+ break;
28691+ }
28692+ if (do_put)
28693+ for (bindex = btop; bindex <= bbot; bindex++)
28694+ if (au_h_iptr(inode, bindex)) {
28695+ do_put = 0;
28696+ break;
28697+ }
28698+ } else
28699+ do_put = 1;
28700+
28701+ return do_put;
28702+}
28703+
28704+/* free the plinks on a branch specified by @br_id */
28705+void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
28706+{
28707+ struct au_sbinfo *sbinfo;
28708+ struct hlist_bl_head *hbl;
28709+ struct hlist_bl_node *pos, *tmp;
28710+ struct au_icntnr *icntnr;
28711+ struct inode *inode;
28712+ int i, do_put;
28713+
28714+ SiMustWriteLock(sb);
28715+
28716+ sbinfo = au_sbi(sb);
28717+ AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
28718+ AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
28719+
28720+ /* no bit_lock since sbinfo is write-locked */
28721+ for (i = 0; i < AuPlink_NHASH; i++) {
28722+ hbl = sbinfo->si_plink + i;
28723+ hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
28724+ inode = au_igrab(&icntnr->vfs_inode);
28725+ ii_write_lock_child(inode);
28726+ do_put = au_plink_do_half_refresh(inode, br_id);
28727+ if (do_put) {
28728+ hlist_bl_del(&icntnr->plink);
28729+ iput(inode);
28730+ }
28731+ ii_write_unlock(inode);
28732+ iput(inode);
28733+ }
28734+ }
28735+}
28736diff -urN /usr/share/empty/fs/aufs/poll.c linux/fs/aufs/poll.c
28737--- /usr/share/empty/fs/aufs/poll.c 1970-01-01 01:00:00.000000000 +0100
28738+++ linux/fs/aufs/poll.c 2022-11-05 23:02:18.969222617 +0100
28739@@ -0,0 +1,51 @@
28740+// SPDX-License-Identifier: GPL-2.0
28741+/*
28742+ * Copyright (C) 2005-2022 Junjiro R. Okajima
28743+ *
28744+ * This program is free software; you can redistribute it and/or modify
28745+ * it under the terms of the GNU General Public License as published by
28746+ * the Free Software Foundation; either version 2 of the License, or
28747+ * (at your option) any later version.
28748+ *
28749+ * This program is distributed in the hope that it will be useful,
28750+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28751+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28752+ * GNU General Public License for more details.
28753+ *
28754+ * You should have received a copy of the GNU General Public License
28755+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
28756+ */
28757+
28758+/*
28759+ * poll operation
28760+ * There is only one filesystem which implements ->poll operation, currently.
28761+ */
28762+
28763+#include "aufs.h"
28764+
28765+__poll_t aufs_poll(struct file *file, struct poll_table_struct *pt)
28766+{
28767+ __poll_t mask;
28768+ struct file *h_file;
28769+ struct super_block *sb;
28770+
28771+ /* We should pretend an error happened. */
28772+ mask = EPOLLERR /* | EPOLLIN | EPOLLOUT */;
28773+ sb = file->f_path.dentry->d_sb;
28774+ si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
28775+
28776+ h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
28777+ if (IS_ERR(h_file)) {
28778+ AuDbg("h_file %ld\n", PTR_ERR(h_file));
28779+ goto out;
28780+ }
28781+
28782+ mask = vfs_poll(h_file, pt);
28783+ fput(h_file); /* instead of au_read_post() */
28784+
28785+out:
28786+ si_read_unlock(sb);
28787+ if (mask & EPOLLERR)
28788+ AuDbg("mask 0x%x\n", mask);
28789+ return mask;
28790+}
28791diff -urN /usr/share/empty/fs/aufs/posix_acl.c linux/fs/aufs/posix_acl.c
28792--- /usr/share/empty/fs/aufs/posix_acl.c 1970-01-01 01:00:00.000000000 +0100
28793+++ linux/fs/aufs/posix_acl.c 2023-02-20 21:05:51.959693785 +0100
28794@@ -0,0 +1,108 @@
28795+// SPDX-License-Identifier: GPL-2.0
28796+/*
28797+ * Copyright (C) 2014-2022 Junjiro R. Okajima
28798+ *
28799+ * This program is free software; you can redistribute it and/or modify
28800+ * it under the terms of the GNU General Public License as published by
28801+ * the Free Software Foundation; either version 2 of the License, or
28802+ * (at your option) any later version.
28803+ *
28804+ * This program is distributed in the hope that it will be useful,
28805+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28806+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28807+ * GNU General Public License for more details.
28808+ *
28809+ * You should have received a copy of the GNU General Public License
28810+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
28811+ */
28812+
28813+/*
28814+ * posix acl operations
28815+ */
28816+
28817+#include <linux/fs.h>
28818+#include "aufs.h"
28819+
28820+struct posix_acl *aufs_get_inode_acl(struct inode *inode, int type, bool rcu)
28821+{
28822+ struct posix_acl *acl;
28823+ int err;
28824+ aufs_bindex_t bindex;
28825+ struct inode *h_inode;
28826+ struct super_block *sb;
28827+
28828+ acl = ERR_PTR(-ECHILD);
28829+ if (rcu)
28830+ goto out;
28831+
28832+ acl = NULL;
28833+ sb = inode->i_sb;
28834+ si_read_lock(sb, AuLock_FLUSH);
28835+ ii_read_lock_child(inode);
28836+ if (!(sb->s_flags & SB_POSIXACL))
28837+ goto unlock;
28838+
28839+ bindex = au_ibtop(inode);
28840+ h_inode = au_h_iptr(inode, bindex);
28841+ if (unlikely(!h_inode
28842+ || ((h_inode->i_mode & S_IFMT)
28843+ != (inode->i_mode & S_IFMT)))) {
28844+ err = au_busy_or_stale();
28845+ acl = ERR_PTR(err);
28846+ goto unlock;
28847+ }
28848+
28849+ /* always topmost only */
28850+ acl = get_inode_acl(h_inode, type);
28851+ if (IS_ERR(acl))
28852+ forget_cached_acl(inode, type);
28853+ else
28854+ set_cached_acl(inode, type, acl);
28855+
28856+unlock:
28857+ ii_read_unlock(inode);
28858+ si_read_unlock(sb);
28859+
28860+out:
28861+ AuTraceErrPtr(acl);
28862+ return acl;
28863+}
28864+
28865+struct posix_acl *aufs_get_acl(struct user_namespace *userns,
28866+ struct dentry *dentry, int type)
28867+{
28868+ struct posix_acl *acl;
28869+ struct inode *inode;
28870+
28871+ inode = d_inode(dentry);
28872+ acl = aufs_get_inode_acl(inode, type, /*rcu*/false);
28873+
28874+ return acl;
28875+}
28876+
28877+int aufs_set_acl(struct user_namespace *userns, struct dentry *dentry,
28878+ struct posix_acl *acl, int type)
28879+{
28880+ int err;
28881+ ssize_t ssz;
28882+ struct inode *inode;
28883+ struct au_sxattr arg = {
28884+ .type = AU_ACL_SET,
28885+ .u.acl_set = {
28886+ .acl = acl,
28887+ .type = type
28888+ },
28889+ };
28890+
28891+ inode = d_inode(dentry);
28892+ IMustLock(inode);
28893+
28894+ ssz = au_sxattr(dentry, inode, &arg);
28895+ /* forget even it if succeeds since the branch might set differently */
28896+ forget_cached_acl(inode, type);
28897+ err = ssz;
28898+ if (ssz >= 0)
28899+ err = 0;
28900+
28901+ return err;
28902+}
28903diff -urN /usr/share/empty/fs/aufs/procfs.c linux/fs/aufs/procfs.c
28904--- /usr/share/empty/fs/aufs/procfs.c 1970-01-01 01:00:00.000000000 +0100
28905+++ linux/fs/aufs/procfs.c 2022-11-05 23:02:18.969222617 +0100
28906@@ -0,0 +1,170 @@
28907+// SPDX-License-Identifier: GPL-2.0
28908+/*
28909+ * Copyright (C) 2010-2022 Junjiro R. Okajima
28910+ *
28911+ * This program is free software; you can redistribute it and/or modify
28912+ * it under the terms of the GNU General Public License as published by
28913+ * the Free Software Foundation; either version 2 of the License, or
28914+ * (at your option) any later version.
28915+ *
28916+ * This program is distributed in the hope that it will be useful,
28917+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
28918+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28919+ * GNU General Public License for more details.
28920+ *
28921+ * You should have received a copy of the GNU General Public License
28922+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
28923+ */
28924+
28925+/*
28926+ * procfs interfaces
28927+ */
28928+
28929+#include <linux/proc_fs.h>
28930+#include "aufs.h"
28931+
28932+static int au_procfs_plm_release(struct inode *inode, struct file *file)
28933+{
28934+ struct au_sbinfo *sbinfo;
28935+
28936+ sbinfo = file->private_data;
28937+ if (sbinfo) {
28938+ au_plink_maint_leave(sbinfo);
28939+ kobject_put(&sbinfo->si_kobj);
28940+ }
28941+
28942+ return 0;
28943+}
28944+
28945+static void au_procfs_plm_write_clean(struct file *file)
28946+{
28947+ struct au_sbinfo *sbinfo;
28948+
28949+ sbinfo = file->private_data;
28950+ if (sbinfo)
28951+ au_plink_clean(sbinfo->si_sb, /*verbose*/0);
28952+}
28953+
28954+static int au_procfs_plm_write_si(struct file *file, unsigned long id)
28955+{
28956+ int err;
28957+ struct super_block *sb;
28958+ struct au_sbinfo *sbinfo;
28959+ struct hlist_bl_node *pos;
28960+
28961+ err = -EBUSY;
28962+ if (unlikely(file->private_data))
28963+ goto out;
28964+
28965+ sb = NULL;
28966+ /* don't use au_sbilist_lock() here */
28967+ hlist_bl_lock(&au_sbilist);
28968+ hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
28969+ if (id == sysaufs_si_id(sbinfo)) {
28970+ if (kobject_get_unless_zero(&sbinfo->si_kobj))
28971+ sb = sbinfo->si_sb;
28972+ break;
28973+ }
28974+ hlist_bl_unlock(&au_sbilist);
28975+
28976+ err = -EINVAL;
28977+ if (unlikely(!sb))
28978+ goto out;
28979+
28980+ err = au_plink_maint_enter(sb);
28981+ if (!err)
28982+ /* keep kobject_get() */
28983+ file->private_data = sbinfo;
28984+ else
28985+ kobject_put(&sbinfo->si_kobj);
28986+out:
28987+ return err;
28988+}
28989+
28990+/*
28991+ * Accept a valid "si=xxxx" only.
28992+ * Once it is accepted successfully, accept "clean" too.
28993+ */
28994+static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
28995+ size_t count, loff_t *ppos)
28996+{
28997+ ssize_t err;
28998+ unsigned long id;
28999+ /* last newline is allowed */
29000+ char buf[3 + sizeof(unsigned long) * 2 + 1];
29001+
29002+ err = -EACCES;
29003+ if (unlikely(!capable(CAP_SYS_ADMIN)))
29004+ goto out;
29005+
29006+ err = -EINVAL;
29007+ if (unlikely(count > sizeof(buf)))
29008+ goto out;
29009+
29010+ err = copy_from_user(buf, ubuf, count);
29011+ if (unlikely(err)) {
29012+ err = -EFAULT;
29013+ goto out;
29014+ }
29015+ buf[count] = 0;
29016+
29017+ err = -EINVAL;
29018+ if (!strcmp("clean", buf)) {
29019+ au_procfs_plm_write_clean(file);
29020+ goto out_success;
29021+ } else if (unlikely(strncmp("si=", buf, 3)))
29022+ goto out;
29023+
29024+ err = kstrtoul(buf + 3, 16, &id);
29025+ if (unlikely(err))
29026+ goto out;
29027+
29028+ err = au_procfs_plm_write_si(file, id);
29029+ if (unlikely(err))
29030+ goto out;
29031+
29032+out_success:
29033+ err = count; /* success */
29034+out:
29035+ return err;
29036+}
29037+
29038+static const struct proc_ops au_procfs_plm_op = {
29039+ .proc_write = au_procfs_plm_write,
29040+ .proc_release = au_procfs_plm_release
29041+};
29042+
29043+/* ---------------------------------------------------------------------- */
29044+
29045+static struct proc_dir_entry *au_procfs_dir;
29046+
29047+void au_procfs_fin(void)
29048+{
29049+ remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
29050+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
29051+}
29052+
29053+int __init au_procfs_init(void)
29054+{
29055+ int err;
29056+ struct proc_dir_entry *entry;
29057+
29058+ err = -ENOMEM;
29059+ au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
29060+ if (unlikely(!au_procfs_dir))
29061+ goto out;
29062+
29063+ entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | 0200,
29064+ au_procfs_dir, &au_procfs_plm_op);
29065+ if (unlikely(!entry))
29066+ goto out_dir;
29067+
29068+ err = 0;
29069+ goto out; /* success */
29070+
29071+
29072+out_dir:
29073+ remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
29074+out:
29075+ return err;
29076+}
29077diff -urN /usr/share/empty/fs/aufs/rdu.c linux/fs/aufs/rdu.c
29078--- /usr/share/empty/fs/aufs/rdu.c 1970-01-01 01:00:00.000000000 +0100
29079+++ linux/fs/aufs/rdu.c 2022-12-17 09:21:34.799855195 +0100
29080@@ -0,0 +1,384 @@
29081+// SPDX-License-Identifier: GPL-2.0
29082+/*
29083+ * Copyright (C) 2005-2022 Junjiro R. Okajima
29084+ *
29085+ * This program is free software; you can redistribute it and/or modify
29086+ * it under the terms of the GNU General Public License as published by
29087+ * the Free Software Foundation; either version 2 of the License, or
29088+ * (at your option) any later version.
29089+ *
29090+ * This program is distributed in the hope that it will be useful,
29091+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29092+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29093+ * GNU General Public License for more details.
29094+ *
29095+ * You should have received a copy of the GNU General Public License
29096+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29097+ */
29098+
29099+/*
29100+ * readdir in userspace.
29101+ */
29102+
29103+#include <linux/compat.h>
29104+#include <linux/fs_stack.h>
29105+#include <linux/security.h>
29106+#include "aufs.h"
29107+
29108+/* bits for struct aufs_rdu.flags */
29109+#define AuRdu_CALLED 1
29110+#define AuRdu_CONT (1 << 1)
29111+#define AuRdu_FULL (1 << 2)
29112+#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
29113+#define au_fset_rdu(flags, name) \
29114+ do { (flags) |= AuRdu_##name; } while (0)
29115+#define au_fclr_rdu(flags, name) \
29116+ do { (flags) &= ~AuRdu_##name; } while (0)
29117+
29118+struct au_rdu_arg {
29119+ struct dir_context ctx;
29120+ struct aufs_rdu *rdu;
29121+ union au_rdu_ent_ul ent;
29122+ unsigned long end;
29123+
29124+ struct super_block *sb;
29125+ int err;
29126+};
29127+
29128+static bool au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
29129+ loff_t offset, u64 h_ino, unsigned int d_type)
29130+{
29131+ int err, len;
29132+ struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
29133+ struct aufs_rdu *rdu = arg->rdu;
29134+ struct au_rdu_ent ent;
29135+
29136+ err = 0;
29137+ arg->err = 0;
29138+ au_fset_rdu(rdu->cookie.flags, CALLED);
29139+ len = au_rdu_len(nlen);
29140+ if (arg->ent.ul + len < arg->end) {
29141+ ent.ino = h_ino;
29142+ ent.bindex = rdu->cookie.bindex;
29143+ ent.type = d_type;
29144+ ent.nlen = nlen;
29145+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
29146+ ent.type = DT_UNKNOWN;
29147+
29148+ /* unnecessary to support mmap_sem since this is a dir */
29149+ err = -EFAULT;
29150+ if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
29151+ goto out;
29152+ if (copy_to_user(arg->ent.e->name, name, nlen))
29153+ goto out;
29154+ /* the terminating NULL */
29155+ if (__put_user(0, arg->ent.e->name + nlen))
29156+ goto out;
29157+ err = 0;
29158+ /* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
29159+ arg->ent.ul += len;
29160+ rdu->rent++;
29161+ } else {
29162+ err = -EFAULT;
29163+ au_fset_rdu(rdu->cookie.flags, FULL);
29164+ rdu->full = 1;
29165+ rdu->tail = arg->ent;
29166+ }
29167+
29168+out:
29169+ /* AuTraceErr(err); */
29170+ return !err;
29171+}
29172+
29173+static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
29174+{
29175+ int err;
29176+ loff_t offset;
29177+ struct au_rdu_cookie *cookie = &arg->rdu->cookie;
29178+
29179+ /* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
29180+ offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
29181+ err = offset;
29182+ if (unlikely(offset != cookie->h_pos))
29183+ goto out;
29184+
29185+ err = 0;
29186+ do {
29187+ arg->err = 0;
29188+ au_fclr_rdu(cookie->flags, CALLED);
29189+ /* smp_mb(); */
29190+ err = vfsub_iterate_dir(h_file, &arg->ctx);
29191+ if (err >= 0)
29192+ err = arg->err;
29193+ } while (!err
29194+ && au_ftest_rdu(cookie->flags, CALLED)
29195+ && !au_ftest_rdu(cookie->flags, FULL));
29196+ cookie->h_pos = h_file->f_pos;
29197+
29198+out:
29199+ AuTraceErr(err);
29200+ return err;
29201+}
29202+
29203+static int au_rdu(struct file *file, struct aufs_rdu *rdu)
29204+{
29205+ int err;
29206+ aufs_bindex_t bbot;
29207+ struct au_rdu_arg arg = {
29208+ .ctx = {
29209+ .actor = au_rdu_fill
29210+ }
29211+ };
29212+ struct dentry *dentry;
29213+ struct inode *inode;
29214+ struct file *h_file;
29215+ struct au_rdu_cookie *cookie = &rdu->cookie;
29216+
29217+ /* VERIFY_WRITE */
29218+ err = !access_ok(rdu->ent.e, rdu->sz);
29219+ if (unlikely(err)) {
29220+ err = -EFAULT;
29221+ AuTraceErr(err);
29222+ goto out;
29223+ }
29224+ rdu->rent = 0;
29225+ rdu->tail = rdu->ent;
29226+ rdu->full = 0;
29227+ arg.rdu = rdu;
29228+ arg.ent = rdu->ent;
29229+ arg.end = arg.ent.ul;
29230+ arg.end += rdu->sz;
29231+
29232+ err = -ENOTDIR;
29233+ if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
29234+ goto out;
29235+
29236+ err = security_file_permission(file, MAY_READ);
29237+ AuTraceErr(err);
29238+ if (unlikely(err))
29239+ goto out;
29240+
29241+ dentry = file->f_path.dentry;
29242+ inode = d_inode(dentry);
29243+ inode_lock_shared(inode);
29244+
29245+ arg.sb = inode->i_sb;
29246+ err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
29247+ if (unlikely(err))
29248+ goto out_mtx;
29249+ err = au_alive_dir(dentry);
29250+ if (unlikely(err))
29251+ goto out_si;
29252+ /* todo: reval? */
29253+ fi_read_lock(file);
29254+
29255+ err = -EAGAIN;
29256+ if (unlikely(au_ftest_rdu(cookie->flags, CONT)
29257+ && cookie->generation != au_figen(file)))
29258+ goto out_unlock;
29259+
29260+ err = 0;
29261+ if (!rdu->blk) {
29262+ rdu->blk = au_sbi(arg.sb)->si_rdblk;
29263+ if (!rdu->blk)
29264+ rdu->blk = au_dir_size(file, /*dentry*/NULL);
29265+ }
29266+ bbot = au_fbtop(file);
29267+ if (cookie->bindex < bbot)
29268+ cookie->bindex = bbot;
29269+ bbot = au_fbbot_dir(file);
29270+ /* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
29271+ for (; !err && cookie->bindex <= bbot;
29272+ cookie->bindex++, cookie->h_pos = 0) {
29273+ h_file = au_hf_dir(file, cookie->bindex);
29274+ if (!h_file)
29275+ continue;
29276+
29277+ au_fclr_rdu(cookie->flags, FULL);
29278+ err = au_rdu_do(h_file, &arg);
29279+ AuTraceErr(err);
29280+ if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
29281+ break;
29282+ }
29283+ AuDbg("rent %llu\n", rdu->rent);
29284+
29285+ if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
29286+ rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
29287+ au_fset_rdu(cookie->flags, CONT);
29288+ cookie->generation = au_figen(file);
29289+ }
29290+
29291+ ii_read_lock_child(inode);
29292+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
29293+ ii_read_unlock(inode);
29294+
29295+out_unlock:
29296+ fi_read_unlock(file);
29297+out_si:
29298+ si_read_unlock(arg.sb);
29299+out_mtx:
29300+ inode_unlock_shared(inode);
29301+out:
29302+ AuTraceErr(err);
29303+ return err;
29304+}
29305+
29306+static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
29307+{
29308+ int err;
29309+ ino_t ino;
29310+ unsigned long long nent;
29311+ union au_rdu_ent_ul *u;
29312+ struct au_rdu_ent ent;
29313+ struct super_block *sb;
29314+
29315+ err = 0;
29316+ nent = rdu->nent;
29317+ u = &rdu->ent;
29318+ sb = file->f_path.dentry->d_sb;
29319+ si_read_lock(sb, AuLock_FLUSH);
29320+ while (nent-- > 0) {
29321+ /* unnecessary to support mmap_sem since this is a dir */
29322+ err = copy_from_user(&ent, u->e, sizeof(ent));
29323+ if (!err)
29324+ /* VERIFY_WRITE */
29325+ err = !access_ok(&u->e->ino, sizeof(ino));
29326+ if (unlikely(err)) {
29327+ err = -EFAULT;
29328+ AuTraceErr(err);
29329+ break;
29330+ }
29331+
29332+ /* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
29333+ if (!ent.wh)
29334+ err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
29335+ else
29336+ err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
29337+ &ino);
29338+ if (unlikely(err)) {
29339+ AuTraceErr(err);
29340+ break;
29341+ }
29342+
29343+ err = __put_user(ino, &u->e->ino);
29344+ if (unlikely(err)) {
29345+ err = -EFAULT;
29346+ AuTraceErr(err);
29347+ break;
29348+ }
29349+ u->ul += au_rdu_len(ent.nlen);
29350+ }
29351+ si_read_unlock(sb);
29352+
29353+ return err;
29354+}
29355+
29356+/* ---------------------------------------------------------------------- */
29357+
29358+static int au_rdu_verify(struct aufs_rdu *rdu)
29359+{
29360+ AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
29361+ "%llu, b%d, 0x%x, g%u}\n",
29362+ rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
29363+ rdu->blk,
29364+ rdu->rent, rdu->shwh, rdu->full,
29365+ rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
29366+ rdu->cookie.generation);
29367+
29368+ if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
29369+ return 0;
29370+
29371+ AuDbg("%u:%u\n",
29372+ rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
29373+ return -EINVAL;
29374+}
29375+
29376+long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
29377+{
29378+ long err, e;
29379+ struct aufs_rdu rdu;
29380+ void __user *p = (void __user *)arg;
29381+
29382+ err = copy_from_user(&rdu, p, sizeof(rdu));
29383+ if (unlikely(err)) {
29384+ err = -EFAULT;
29385+ AuTraceErr(err);
29386+ goto out;
29387+ }
29388+ err = au_rdu_verify(&rdu);
29389+ if (unlikely(err))
29390+ goto out;
29391+
29392+ switch (cmd) {
29393+ case AUFS_CTL_RDU:
29394+ err = au_rdu(file, &rdu);
29395+ if (unlikely(err))
29396+ break;
29397+
29398+ e = copy_to_user(p, &rdu, sizeof(rdu));
29399+ if (unlikely(e)) {
29400+ err = -EFAULT;
29401+ AuTraceErr(err);
29402+ }
29403+ break;
29404+ case AUFS_CTL_RDU_INO:
29405+ err = au_rdu_ino(file, &rdu);
29406+ break;
29407+
29408+ default:
29409+ /* err = -ENOTTY; */
29410+ err = -EINVAL;
29411+ }
29412+
29413+out:
29414+ AuTraceErr(err);
29415+ return err;
29416+}
29417+
29418+#ifdef CONFIG_COMPAT
29419+long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
29420+{
29421+ long err, e;
29422+ struct aufs_rdu rdu;
29423+ void __user *p = compat_ptr(arg);
29424+
29425+ /* todo: get_user()? */
29426+ err = copy_from_user(&rdu, p, sizeof(rdu));
29427+ if (unlikely(err)) {
29428+ err = -EFAULT;
29429+ AuTraceErr(err);
29430+ goto out;
29431+ }
29432+ rdu.ent.e = compat_ptr(rdu.ent.ul);
29433+ err = au_rdu_verify(&rdu);
29434+ if (unlikely(err))
29435+ goto out;
29436+
29437+ switch (cmd) {
29438+ case AUFS_CTL_RDU:
29439+ err = au_rdu(file, &rdu);
29440+ if (unlikely(err))
29441+ break;
29442+
29443+ rdu.ent.ul = ptr_to_compat(rdu.ent.e);
29444+ rdu.tail.ul = ptr_to_compat(rdu.tail.e);
29445+ e = copy_to_user(p, &rdu, sizeof(rdu));
29446+ if (unlikely(e)) {
29447+ err = -EFAULT;
29448+ AuTraceErr(err);
29449+ }
29450+ break;
29451+ case AUFS_CTL_RDU_INO:
29452+ err = au_rdu_ino(file, &rdu);
29453+ break;
29454+
29455+ default:
29456+ /* err = -ENOTTY; */
29457+ err = -EINVAL;
29458+ }
29459+
29460+out:
29461+ AuTraceErr(err);
29462+ return err;
29463+}
29464+#endif
29465diff -urN /usr/share/empty/fs/aufs/rwsem.h linux/fs/aufs/rwsem.h
29466--- /usr/share/empty/fs/aufs/rwsem.h 1970-01-01 01:00:00.000000000 +0100
29467+++ linux/fs/aufs/rwsem.h 2022-11-05 23:02:18.969222617 +0100
29468@@ -0,0 +1,85 @@
29469+/* SPDX-License-Identifier: GPL-2.0 */
29470+/*
29471+ * Copyright (C) 2005-2022 Junjiro R. Okajima
29472+ *
29473+ * This program is free software; you can redistribute it and/or modify
29474+ * it under the terms of the GNU General Public License as published by
29475+ * the Free Software Foundation; either version 2 of the License, or
29476+ * (at your option) any later version.
29477+ *
29478+ * This program is distributed in the hope that it will be useful,
29479+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29480+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29481+ * GNU General Public License for more details.
29482+ *
29483+ * You should have received a copy of the GNU General Public License
29484+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29485+ */
29486+
29487+/*
29488+ * simple read-write semaphore wrappers
29489+ */
29490+
29491+#ifndef __AUFS_RWSEM_H__
29492+#define __AUFS_RWSEM_H__
29493+
29494+#ifdef __KERNEL__
29495+
29496+#include "debug.h"
29497+
29498+/* in the future, the name 'au_rwsem' will be totally gone */
29499+#define au_rwsem rw_semaphore
29500+
29501+/* to debug easier, do not make them inlined functions */
29502+#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(rw))
29503+
29504+#ifdef CONFIG_LOCKDEP
29505+/* rwsem_is_locked() is unusable */
29506+#define AuRwMustReadLock(rw) AuDebugOn(IS_ENABLED(CONFIG_LOCKDEP) \
29507+ && !lockdep_recursing(current) \
29508+ && debug_locks \
29509+ && !lockdep_is_held_type(rw, 1))
29510+#define AuRwMustWriteLock(rw) AuDebugOn(IS_ENABLED(CONFIG_LOCKDEP) \
29511+ && !lockdep_recursing(current) \
29512+ && debug_locks \
29513+ && !lockdep_is_held_type(rw, 0))
29514+#define AuRwMustAnyLock(rw) AuDebugOn(IS_ENABLED(CONFIG_LOCKDEP) \
29515+ && !lockdep_recursing(current) \
29516+ && debug_locks \
29517+ && !lockdep_is_held(rw))
29518+#define AuRwDestroy(rw) AuDebugOn(IS_ENABLED(CONFIG_LOCKDEP) \
29519+ && !lockdep_recursing(current) \
29520+ && debug_locks \
29521+ && lockdep_is_held(rw))
29522+#else
29523+#define AuRwMustReadLock(rw) do {} while (0)
29524+#define AuRwMustWriteLock(rw) do {} while (0)
29525+#define AuRwMustAnyLock(rw) do {} while (0)
29526+#define AuRwDestroy(rw) do {} while (0)
29527+#endif
29528+
29529+#define au_rw_init(rw) init_rwsem(rw)
29530+
29531+#define au_rw_init_wlock(rw) do { \
29532+ au_rw_init(rw); \
29533+ down_write(rw); \
29534+ } while (0)
29535+
29536+#define au_rw_init_wlock_nested(rw, lsc) do { \
29537+ au_rw_init(rw); \
29538+ down_write_nested(rw, lsc); \
29539+ } while (0)
29540+
29541+#define au_rw_read_lock(rw) down_read(rw)
29542+#define au_rw_read_lock_nested(rw, lsc) down_read_nested(rw, lsc)
29543+#define au_rw_read_unlock(rw) up_read(rw)
29544+#define au_rw_dgrade_lock(rw) downgrade_write(rw)
29545+#define au_rw_write_lock(rw) down_write(rw)
29546+#define au_rw_write_lock_nested(rw, lsc) down_write_nested(rw, lsc)
29547+#define au_rw_write_unlock(rw) up_write(rw)
29548+/* why is not _nested version defined? */
29549+#define au_rw_read_trylock(rw) down_read_trylock(rw)
29550+#define au_rw_write_trylock(rw) down_write_trylock(rw)
29551+
29552+#endif /* __KERNEL__ */
29553+#endif /* __AUFS_RWSEM_H__ */
29554diff -urN /usr/share/empty/fs/aufs/sbinfo.c linux/fs/aufs/sbinfo.c
29555--- /usr/share/empty/fs/aufs/sbinfo.c 1970-01-01 01:00:00.000000000 +0100
29556+++ linux/fs/aufs/sbinfo.c 2022-11-05 23:02:18.969222617 +0100
29557@@ -0,0 +1,316 @@
29558+// SPDX-License-Identifier: GPL-2.0
29559+/*
29560+ * Copyright (C) 2005-2022 Junjiro R. Okajima
29561+ *
29562+ * This program is free software; you can redistribute it and/or modify
29563+ * it under the terms of the GNU General Public License as published by
29564+ * the Free Software Foundation; either version 2 of the License, or
29565+ * (at your option) any later version.
29566+ *
29567+ * This program is distributed in the hope that it will be useful,
29568+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29569+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29570+ * GNU General Public License for more details.
29571+ *
29572+ * You should have received a copy of the GNU General Public License
29573+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29574+ */
29575+
29576+/*
29577+ * superblock private data
29578+ */
29579+
29580+#include <linux/iversion.h>
29581+#include "aufs.h"
29582+
29583+/*
29584+ * they are necessary regardless sysfs is disabled.
29585+ */
29586+void au_si_free(struct kobject *kobj)
29587+{
29588+ int i;
29589+ struct au_sbinfo *sbinfo;
29590+ char *locked __maybe_unused; /* debug only */
29591+
29592+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
29593+ for (i = 0; i < AuPlink_NHASH; i++)
29594+ AuDebugOn(!hlist_bl_empty(sbinfo->si_plink + i));
29595+ AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
29596+
29597+ AuLCntZero(au_lcnt_read(&sbinfo->si_ninodes, /*do_rev*/0));
29598+ au_lcnt_fin(&sbinfo->si_ninodes, /*do_sync*/0);
29599+ AuLCntZero(au_lcnt_read(&sbinfo->si_nfiles, /*do_rev*/0));
29600+ au_lcnt_fin(&sbinfo->si_nfiles, /*do_sync*/0);
29601+
29602+ dbgaufs_si_fin(sbinfo);
29603+ au_rw_write_lock(&sbinfo->si_rwsem);
29604+ au_br_free(sbinfo);
29605+ au_rw_write_unlock(&sbinfo->si_rwsem);
29606+
29607+ au_kfree_try_rcu(sbinfo->si_branch);
29608+ mutex_destroy(&sbinfo->si_xib_mtx);
29609+ AuRwDestroy(&sbinfo->si_rwsem);
29610+
29611+ au_lcnt_wait_for_fin(&sbinfo->si_ninodes);
29612+ /* si_nfiles is waited too */
29613+ au_kfree_rcu(sbinfo);
29614+}
29615+
29616+struct au_sbinfo *au_si_alloc(struct super_block *sb)
29617+{
29618+ struct au_sbinfo *sbinfo;
29619+ int err, i;
29620+
29621+ err = -ENOMEM;
29622+ sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
29623+ if (unlikely(!sbinfo))
29624+ goto out;
29625+
29626+ /* will be reallocated separately */
29627+ sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
29628+ if (unlikely(!sbinfo->si_branch))
29629+ goto out_sbinfo;
29630+
29631+ err = sysaufs_si_init(sbinfo);
29632+ if (!err) {
29633+ dbgaufs_si_null(sbinfo);
29634+ err = dbgaufs_si_init(sbinfo);
29635+ if (unlikely(err))
29636+ kobject_put(&sbinfo->si_kobj);
29637+ }
29638+ if (unlikely(err))
29639+ goto out_br;
29640+
29641+ au_nwt_init(&sbinfo->si_nowait);
29642+ au_rw_init_wlock(&sbinfo->si_rwsem);
29643+
29644+ au_lcnt_init(&sbinfo->si_ninodes, /*release*/NULL);
29645+ au_lcnt_init(&sbinfo->si_nfiles, /*release*/NULL);
29646+
29647+ sbinfo->si_bbot = -1;
29648+ sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
29649+
29650+ sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
29651+ sbinfo->si_wbr_create = AuWbrCreate_Def;
29652+ sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
29653+ sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
29654+
29655+ au_fhsm_init(sbinfo);
29656+
29657+ sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
29658+
29659+ sbinfo->si_xino_jiffy = jiffies;
29660+ sbinfo->si_xino_expire
29661+ = msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
29662+ mutex_init(&sbinfo->si_xib_mtx);
29663+ /* leave si_xib_last_pindex and si_xib_next_bit */
29664+
29665+ INIT_HLIST_BL_HEAD(&sbinfo->si_aopen);
29666+
29667+ sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
29668+ sbinfo->si_rdblk = AUFS_RDBLK_DEF;
29669+ sbinfo->si_rdhash = AUFS_RDHASH_DEF;
29670+ sbinfo->si_dirwh = AUFS_DIRWH_DEF;
29671+
29672+ for (i = 0; i < AuPlink_NHASH; i++)
29673+ INIT_HLIST_BL_HEAD(sbinfo->si_plink + i);
29674+ init_waitqueue_head(&sbinfo->si_plink_wq);
29675+ spin_lock_init(&sbinfo->si_plink_maint_lock);
29676+
29677+ INIT_HLIST_BL_HEAD(&sbinfo->si_files);
29678+
29679+ /* with getattr by default */
29680+ sbinfo->si_iop_array = aufs_iop;
29681+
29682+ /* leave other members for sysaufs and si_mnt. */
29683+ sbinfo->si_sb = sb;
29684+ if (sb) {
29685+ sb->s_fs_info = sbinfo;
29686+ si_pid_set(sb);
29687+ }
29688+ return sbinfo; /* success */
29689+
29690+out_br:
29691+ au_kfree_try_rcu(sbinfo->si_branch);
29692+out_sbinfo:
29693+ au_kfree_rcu(sbinfo);
29694+out:
29695+ return ERR_PTR(err);
29696+}
29697+
29698+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
29699+{
29700+ int err, sz;
29701+ struct au_branch **brp;
29702+
29703+ AuRwMustWriteLock(&sbinfo->si_rwsem);
29704+
29705+ err = -ENOMEM;
29706+ sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
29707+ if (unlikely(!sz))
29708+ sz = sizeof(*brp);
29709+ brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
29710+ may_shrink);
29711+ if (brp) {
29712+ sbinfo->si_branch = brp;
29713+ err = 0;
29714+ }
29715+
29716+ return err;
29717+}
29718+
29719+/* ---------------------------------------------------------------------- */
29720+
29721+unsigned int au_sigen_inc(struct super_block *sb)
29722+{
29723+ unsigned int gen;
29724+ struct inode *inode;
29725+
29726+ SiMustWriteLock(sb);
29727+
29728+ gen = ++au_sbi(sb)->si_generation;
29729+ au_update_digen(sb->s_root);
29730+ inode = d_inode(sb->s_root);
29731+ au_update_iigen(inode, /*half*/0);
29732+ inode_inc_iversion(inode);
29733+ return gen;
29734+}
29735+
29736+aufs_bindex_t au_new_br_id(struct super_block *sb)
29737+{
29738+ aufs_bindex_t br_id;
29739+ int i;
29740+ struct au_sbinfo *sbinfo;
29741+
29742+ SiMustWriteLock(sb);
29743+
29744+ sbinfo = au_sbi(sb);
29745+ for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
29746+ br_id = ++sbinfo->si_last_br_id;
29747+ AuDebugOn(br_id < 0);
29748+ if (br_id && au_br_index(sb, br_id) < 0)
29749+ return br_id;
29750+ }
29751+
29752+ return -1;
29753+}
29754+
29755+/* ---------------------------------------------------------------------- */
29756+
29757+/* it is ok that new 'nwt' tasks are appended while we are sleeping */
29758+int si_read_lock(struct super_block *sb, int flags)
29759+{
29760+ int err;
29761+
29762+ err = 0;
29763+ if (au_ftest_lock(flags, FLUSH))
29764+ au_nwt_flush(&au_sbi(sb)->si_nowait);
29765+
29766+ si_noflush_read_lock(sb);
29767+ err = au_plink_maint(sb, flags);
29768+ if (unlikely(err))
29769+ si_read_unlock(sb);
29770+
29771+ return err;
29772+}
29773+
29774+int si_write_lock(struct super_block *sb, int flags)
29775+{
29776+ int err;
29777+
29778+ if (au_ftest_lock(flags, FLUSH))
29779+ au_nwt_flush(&au_sbi(sb)->si_nowait);
29780+
29781+ si_noflush_write_lock(sb);
29782+ err = au_plink_maint(sb, flags);
29783+ if (unlikely(err))
29784+ si_write_unlock(sb);
29785+
29786+ return err;
29787+}
29788+
29789+/* dentry and super_block lock. call at entry point */
29790+int aufs_read_lock(struct dentry *dentry, int flags)
29791+{
29792+ int err;
29793+ struct super_block *sb;
29794+
29795+ sb = dentry->d_sb;
29796+ err = si_read_lock(sb, flags);
29797+ if (unlikely(err))
29798+ goto out;
29799+
29800+ if (au_ftest_lock(flags, DW))
29801+ di_write_lock_child(dentry);
29802+ else
29803+ di_read_lock_child(dentry, flags);
29804+
29805+ if (au_ftest_lock(flags, GEN)) {
29806+ err = au_digen_test(dentry, au_sigen(sb));
29807+ if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
29808+ AuDebugOn(!err && au_dbrange_test(dentry));
29809+ else if (!err)
29810+ err = au_dbrange_test(dentry);
29811+ if (unlikely(err))
29812+ aufs_read_unlock(dentry, flags);
29813+ }
29814+
29815+out:
29816+ return err;
29817+}
29818+
29819+void aufs_read_unlock(struct dentry *dentry, int flags)
29820+{
29821+ if (au_ftest_lock(flags, DW))
29822+ di_write_unlock(dentry);
29823+ else
29824+ di_read_unlock(dentry, flags);
29825+ si_read_unlock(dentry->d_sb);
29826+}
29827+
29828+void aufs_write_lock(struct dentry *dentry)
29829+{
29830+ si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
29831+ di_write_lock_child(dentry);
29832+}
29833+
29834+void aufs_write_unlock(struct dentry *dentry)
29835+{
29836+ di_write_unlock(dentry);
29837+ si_write_unlock(dentry->d_sb);
29838+}
29839+
29840+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
29841+{
29842+ int err;
29843+ unsigned int sigen;
29844+ struct super_block *sb;
29845+
29846+ sb = d1->d_sb;
29847+ err = si_read_lock(sb, flags);
29848+ if (unlikely(err))
29849+ goto out;
29850+
29851+ di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
29852+
29853+ if (au_ftest_lock(flags, GEN)) {
29854+ sigen = au_sigen(sb);
29855+ err = au_digen_test(d1, sigen);
29856+ AuDebugOn(!err && au_dbrange_test(d1));
29857+ if (!err) {
29858+ err = au_digen_test(d2, sigen);
29859+ AuDebugOn(!err && au_dbrange_test(d2));
29860+ }
29861+ if (unlikely(err))
29862+ aufs_read_and_write_unlock2(d1, d2);
29863+ }
29864+
29865+out:
29866+ return err;
29867+}
29868+
29869+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
29870+{
29871+ di_write_unlock2(d1, d2);
29872+ si_read_unlock(d1->d_sb);
29873+}
29874diff -urN /usr/share/empty/fs/aufs/super.c linux/fs/aufs/super.c
29875--- /usr/share/empty/fs/aufs/super.c 1970-01-01 01:00:00.000000000 +0100
29876+++ linux/fs/aufs/super.c 2022-11-05 23:02:18.969222617 +0100
29877@@ -0,0 +1,871 @@
29878+// SPDX-License-Identifier: GPL-2.0
29879+/*
29880+ * Copyright (C) 2005-2022 Junjiro R. Okajima
29881+ *
29882+ * This program is free software; you can redistribute it and/or modify
29883+ * it under the terms of the GNU General Public License as published by
29884+ * the Free Software Foundation; either version 2 of the License, or
29885+ * (at your option) any later version.
29886+ *
29887+ * This program is distributed in the hope that it will be useful,
29888+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
29889+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29890+ * GNU General Public License for more details.
29891+ *
29892+ * You should have received a copy of the GNU General Public License
29893+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
29894+ */
29895+
29896+/*
29897+ * mount and super_block operations
29898+ */
29899+
29900+#include <linux/iversion.h>
29901+#include <linux/mm.h>
29902+#include <linux/seq_file.h>
29903+#include <linux/statfs.h>
29904+#include <linux/vmalloc.h>
29905+#include "aufs.h"
29906+
29907+/*
29908+ * super_operations
29909+ */
29910+static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
29911+{
29912+ struct au_icntnr *c;
29913+
29914+ c = au_cache_alloc_icntnr(sb);
29915+ if (c) {
29916+ au_icntnr_init(c);
29917+ inode_set_iversion(&c->vfs_inode, 1); /* sigen(sb); */
29918+ c->iinfo.ii_hinode = NULL;
29919+ return &c->vfs_inode;
29920+ }
29921+ return NULL;
29922+}
29923+
29924+static void aufs_destroy_inode(struct inode *inode)
29925+{
29926+ if (!au_is_bad_inode(inode))
29927+ au_iinfo_fin(inode);
29928+}
29929+
29930+static void aufs_free_inode(struct inode *inode)
29931+{
29932+ au_cache_free_icntnr(container_of(inode, struct au_icntnr, vfs_inode));
29933+}
29934+
29935+struct inode *au_iget_locked(struct super_block *sb, ino_t ino)
29936+{
29937+ struct inode *inode;
29938+ int err;
29939+
29940+ inode = iget_locked(sb, ino);
29941+ if (unlikely(!inode)) {
29942+ inode = ERR_PTR(-ENOMEM);
29943+ goto out;
29944+ }
29945+ if (!(inode->i_state & I_NEW))
29946+ goto out;
29947+
29948+ err = au_xigen_new(inode);
29949+ if (!err)
29950+ err = au_iinfo_init(inode);
29951+ if (!err)
29952+ inode_inc_iversion(inode);
29953+ else {
29954+ iget_failed(inode);
29955+ inode = ERR_PTR(err);
29956+ }
29957+
29958+out:
29959+ /* never return NULL */
29960+ AuDebugOn(!inode);
29961+ AuTraceErrPtr(inode);
29962+ return inode;
29963+}
29964+
29965+/* lock free root dinfo */
29966+static int au_show_brs(struct seq_file *seq, struct super_block *sb)
29967+{
29968+ int err;
29969+ aufs_bindex_t bindex, bbot;
29970+ struct path path;
29971+ struct au_hdentry *hdp;
29972+ struct au_branch *br;
29973+ au_br_perm_str_t perm;
29974+
29975+ err = 0;
29976+ bbot = au_sbbot(sb);
29977+ bindex = 0;
29978+ hdp = au_hdentry(au_di(sb->s_root), bindex);
29979+ for (; !err && bindex <= bbot; bindex++, hdp++) {
29980+ br = au_sbr(sb, bindex);
29981+ path.mnt = au_br_mnt(br);
29982+ path.dentry = hdp->hd_dentry;
29983+ err = au_seq_path(seq, &path);
29984+ if (!err) {
29985+ au_optstr_br_perm(&perm, br->br_perm);
29986+ seq_printf(seq, "=%s", perm.a);
29987+ if (bindex != bbot)
29988+ seq_putc(seq, ':');
29989+ }
29990+ }
29991+ if (unlikely(err || seq_has_overflowed(seq)))
29992+ err = -E2BIG;
29993+
29994+ return err;
29995+}
29996+
29997+static void au_gen_fmt(char *fmt, int len __maybe_unused, const char *pat,
29998+ const char *append)
29999+{
30000+ char *p;
30001+
30002+ p = fmt;
30003+ while (*pat != ':')
30004+ *p++ = *pat++;
30005+ *p++ = *pat++;
30006+ strcpy(p, append);
30007+ AuDebugOn(strlen(fmt) >= len);
30008+}
30009+
30010+static void au_show_wbr_create(struct seq_file *m, int v,
30011+ struct au_sbinfo *sbinfo)
30012+{
30013+ const char *pat;
30014+ char fmt[32];
30015+ struct au_wbr_mfs *mfs;
30016+
30017+ AuRwMustAnyLock(&sbinfo->si_rwsem);
30018+
30019+ seq_puts(m, ",create=");
30020+ pat = au_optstr_wbr_create(v);
30021+ mfs = &sbinfo->si_wbr_mfs;
30022+ switch (v) {
30023+ case AuWbrCreate_TDP:
30024+ case AuWbrCreate_RR:
30025+ case AuWbrCreate_MFS:
30026+ case AuWbrCreate_PMFS:
30027+ seq_puts(m, pat);
30028+ break;
30029+ case AuWbrCreate_MFSRR:
30030+ case AuWbrCreate_TDMFS:
30031+ case AuWbrCreate_PMFSRR:
30032+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu");
30033+ seq_printf(m, fmt, mfs->mfsrr_watermark);
30034+ break;
30035+ case AuWbrCreate_MFSV:
30036+ case AuWbrCreate_PMFSV:
30037+ au_gen_fmt(fmt, sizeof(fmt), pat, "%lu");
30038+ seq_printf(m, fmt,
30039+ jiffies_to_msecs(mfs->mfs_expire)
30040+ / MSEC_PER_SEC);
30041+ break;
30042+ case AuWbrCreate_MFSRRV:
30043+ case AuWbrCreate_TDMFSV:
30044+ case AuWbrCreate_PMFSRRV:
30045+ au_gen_fmt(fmt, sizeof(fmt), pat, "%llu:%lu");
30046+ seq_printf(m, fmt, mfs->mfsrr_watermark,
30047+ jiffies_to_msecs(mfs->mfs_expire) / MSEC_PER_SEC);
30048+ break;
30049+ default:
30050+ BUG();
30051+ }
30052+}
30053+
30054+static int au_show_xino(struct seq_file *seq, struct super_block *sb)
30055+{
30056+#ifdef CONFIG_SYSFS
30057+ return 0;
30058+#else
30059+ int err;
30060+ const int len = sizeof(AUFS_XINO_FNAME) - 1;
30061+ aufs_bindex_t bindex, brid;
30062+ struct qstr *name;
30063+ struct file *f;
30064+ struct dentry *d, *h_root;
30065+ struct au_branch *br;
30066+
30067+ AuRwMustAnyLock(&sbinfo->si_rwsem);
30068+
30069+ err = 0;
30070+ f = au_sbi(sb)->si_xib;
30071+ if (!f)
30072+ goto out;
30073+
30074+ /* stop printing the default xino path on the first writable branch */
30075+ h_root = NULL;
30076+ bindex = au_xi_root(sb, f->f_path.dentry);
30077+ if (bindex >= 0) {
30078+ br = au_sbr_sb(sb, bindex);
30079+ h_root = au_br_dentry(br);
30080+ }
30081+
30082+ d = f->f_path.dentry;
30083+ name = &d->d_name;
30084+ /* safe ->d_parent because the file is unlinked */
30085+ if (d->d_parent == h_root
30086+ && name->len == len
30087+ && !memcmp(name->name, AUFS_XINO_FNAME, len))
30088+ goto out;
30089+
30090+ seq_puts(seq, ",xino=");
30091+ err = au_xino_path(seq, f);
30092+
30093+out:
30094+ return err;
30095+#endif
30096+}
30097+
30098+/* seq_file will re-call me in case of too long string */
30099+static int aufs_show_options(struct seq_file *m, struct dentry *dentry)
30100+{
30101+ int err;
30102+ unsigned int mnt_flags, v;
30103+ struct super_block *sb;
30104+ struct au_sbinfo *sbinfo;
30105+
30106+#define AuBool(name, str) do { \
30107+ v = au_opt_test(mnt_flags, name); \
30108+ if (v != au_opt_test(AuOpt_Def, name)) \
30109+ seq_printf(m, ",%s" #str, v ? "" : "no"); \
30110+} while (0)
30111+
30112+#define AuStr(name, str) do { \
30113+ v = mnt_flags & AuOptMask_##name; \
30114+ if (v != (AuOpt_Def & AuOptMask_##name)) \
30115+ seq_printf(m, "," #str "=%s", au_optstr_##str(v)); \
30116+} while (0)
30117+
30118+#define AuUInt(name, str, val) do { \
30119+ if (val != AUFS_##name##_DEF) \
30120+ seq_printf(m, "," #str "=%u", val); \
30121+} while (0)
30122+
30123+ sb = dentry->d_sb;
30124+ if (sb->s_flags & SB_POSIXACL)
30125+ seq_puts(m, ",acl");
30126+#if 0 /* reserved for future use */
30127+ if (sb->s_flags & SB_I_VERSION)
30128+ seq_puts(m, ",i_version");
30129+#endif
30130+
30131+ /* lock free root dinfo */
30132+ si_noflush_read_lock(sb);
30133+ sbinfo = au_sbi(sb);
30134+ seq_printf(m, ",si=%lx", sysaufs_si_id(sbinfo));
30135+
30136+ mnt_flags = au_mntflags(sb);
30137+ if (au_opt_test(mnt_flags, XINO)) {
30138+ err = au_show_xino(m, sb);
30139+ if (unlikely(err))
30140+ goto out;
30141+ } else
30142+ seq_puts(m, ",noxino");
30143+
30144+ AuBool(TRUNC_XINO, trunc_xino);
30145+ AuStr(UDBA, udba);
30146+ AuBool(SHWH, shwh);
30147+ AuBool(PLINK, plink);
30148+ AuBool(DIO, dio);
30149+ AuBool(DIRPERM1, dirperm1);
30150+
30151+ v = sbinfo->si_wbr_create;
30152+ if (v != AuWbrCreate_Def)
30153+ au_show_wbr_create(m, v, sbinfo);
30154+
30155+ v = sbinfo->si_wbr_copyup;
30156+ if (v != AuWbrCopyup_Def)
30157+ seq_printf(m, ",cpup=%s", au_optstr_wbr_copyup(v));
30158+
30159+ v = au_opt_test(mnt_flags, ALWAYS_DIROPQ);
30160+ if (v != au_opt_test(AuOpt_Def, ALWAYS_DIROPQ))
30161+ seq_printf(m, ",diropq=%c", v ? 'a' : 'w');
30162+
30163+ AuUInt(DIRWH, dirwh, sbinfo->si_dirwh);
30164+
30165+ v = jiffies_to_msecs(sbinfo->si_rdcache) / MSEC_PER_SEC;
30166+ AuUInt(RDCACHE, rdcache, v);
30167+
30168+ AuUInt(RDBLK, rdblk, sbinfo->si_rdblk);
30169+ AuUInt(RDHASH, rdhash, sbinfo->si_rdhash);
30170+
30171+ au_fhsm_show(m, sbinfo);
30172+
30173+ AuBool(DIRREN, dirren);
30174+ AuBool(SUM, sum);
30175+ /* AuBool(SUM_W, wsum); */
30176+ AuBool(WARN_PERM, warn_perm);
30177+ AuBool(VERBOSE, verbose);
30178+
30179+out:
30180+ /* be sure to print "br:" last */
30181+ if (!sysaufs_brs) {
30182+ seq_puts(m, ",br:");
30183+ au_show_brs(m, sb);
30184+ }
30185+ si_read_unlock(sb);
30186+ return 0;
30187+
30188+#undef AuBool
30189+#undef AuStr
30190+#undef AuUInt
30191+}
30192+
30193+/* ---------------------------------------------------------------------- */
30194+
30195+/* sum mode which returns the summation for statfs(2) */
30196+
30197+static u64 au_add_till_max(u64 a, u64 b)
30198+{
30199+ u64 old;
30200+
30201+ old = a;
30202+ a += b;
30203+ if (old <= a)
30204+ return a;
30205+ return ULLONG_MAX;
30206+}
30207+
30208+static u64 au_mul_till_max(u64 a, long mul)
30209+{
30210+ u64 old;
30211+
30212+ old = a;
30213+ a *= mul;
30214+ if (old <= a)
30215+ return a;
30216+ return ULLONG_MAX;
30217+}
30218+
30219+static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
30220+{
30221+ int err;
30222+ long bsize, factor;
30223+ u64 blocks, bfree, bavail, files, ffree;
30224+ aufs_bindex_t bbot, bindex, i;
30225+ unsigned char shared;
30226+ struct path h_path;
30227+ struct super_block *h_sb;
30228+
30229+ err = 0;
30230+ bsize = LONG_MAX;
30231+ files = 0;
30232+ ffree = 0;
30233+ blocks = 0;
30234+ bfree = 0;
30235+ bavail = 0;
30236+ bbot = au_sbbot(sb);
30237+ for (bindex = 0; bindex <= bbot; bindex++) {
30238+ h_path.mnt = au_sbr_mnt(sb, bindex);
30239+ h_sb = h_path.mnt->mnt_sb;
30240+ shared = 0;
30241+ for (i = 0; !shared && i < bindex; i++)
30242+ shared = (au_sbr_sb(sb, i) == h_sb);
30243+ if (shared)
30244+ continue;
30245+
30246+ /* sb->s_root for NFS is unreliable */
30247+ h_path.dentry = h_path.mnt->mnt_root;
30248+ err = vfs_statfs(&h_path, buf);
30249+ if (unlikely(err))
30250+ goto out;
30251+
30252+ if (bsize > buf->f_bsize) {
30253+ /*
30254+ * we will reduce bsize, so we have to expand blocks
30255+ * etc. to match them again
30256+ */
30257+ factor = (bsize / buf->f_bsize);
30258+ blocks = au_mul_till_max(blocks, factor);
30259+ bfree = au_mul_till_max(bfree, factor);
30260+ bavail = au_mul_till_max(bavail, factor);
30261+ bsize = buf->f_bsize;
30262+ }
30263+
30264+ factor = (buf->f_bsize / bsize);
30265+ blocks = au_add_till_max(blocks,
30266+ au_mul_till_max(buf->f_blocks, factor));
30267+ bfree = au_add_till_max(bfree,
30268+ au_mul_till_max(buf->f_bfree, factor));
30269+ bavail = au_add_till_max(bavail,
30270+ au_mul_till_max(buf->f_bavail, factor));
30271+ files = au_add_till_max(files, buf->f_files);
30272+ ffree = au_add_till_max(ffree, buf->f_ffree);
30273+ }
30274+
30275+ buf->f_bsize = bsize;
30276+ buf->f_blocks = blocks;
30277+ buf->f_bfree = bfree;
30278+ buf->f_bavail = bavail;
30279+ buf->f_files = files;
30280+ buf->f_ffree = ffree;
30281+ buf->f_frsize = 0;
30282+
30283+out:
30284+ return err;
30285+}
30286+
30287+static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
30288+{
30289+ int err;
30290+ struct path h_path;
30291+ struct super_block *sb;
30292+
30293+ /* lock free root dinfo */
30294+ sb = dentry->d_sb;
30295+ si_noflush_read_lock(sb);
30296+ if (!au_opt_test(au_mntflags(sb), SUM)) {
30297+ /* sb->s_root for NFS is unreliable */
30298+ h_path.mnt = au_sbr_mnt(sb, 0);
30299+ h_path.dentry = h_path.mnt->mnt_root;
30300+ err = vfs_statfs(&h_path, buf);
30301+ } else
30302+ err = au_statfs_sum(sb, buf);
30303+ si_read_unlock(sb);
30304+
30305+ if (!err) {
30306+ buf->f_type = AUFS_SUPER_MAGIC;
30307+ buf->f_namelen = AUFS_MAX_NAMELEN;
30308+ memset(&buf->f_fsid, 0, sizeof(buf->f_fsid));
30309+ }
30310+ /* buf->f_bsize = buf->f_blocks = buf->f_bfree = buf->f_bavail = -1; */
30311+
30312+ return err;
30313+}
30314+
30315+/* ---------------------------------------------------------------------- */
30316+
30317+static int aufs_sync_fs(struct super_block *sb, int wait)
30318+{
30319+ int err, e;
30320+ aufs_bindex_t bbot, bindex;
30321+ struct au_branch *br;
30322+ struct super_block *h_sb;
30323+
30324+ err = 0;
30325+ si_noflush_read_lock(sb);
30326+ bbot = au_sbbot(sb);
30327+ for (bindex = 0; bindex <= bbot; bindex++) {
30328+ br = au_sbr(sb, bindex);
30329+ if (!au_br_writable(br->br_perm))
30330+ continue;
30331+
30332+ h_sb = au_sbr_sb(sb, bindex);
30333+ e = vfsub_sync_filesystem(h_sb);
30334+ if (unlikely(e && !err))
30335+ err = e;
30336+ /* go on even if an error happens */
30337+ }
30338+ si_read_unlock(sb);
30339+
30340+ return err;
30341+}
30342+
30343+/* ---------------------------------------------------------------------- */
30344+
30345+/* final actions when unmounting a file system */
30346+static void aufs_put_super(struct super_block *sb)
30347+{
30348+ struct au_sbinfo *sbinfo;
30349+
30350+ sbinfo = au_sbi(sb);
30351+ if (sbinfo)
30352+ kobject_put(&sbinfo->si_kobj);
30353+}
30354+
30355+/* ---------------------------------------------------------------------- */
30356+
30357+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
30358+ struct super_block *sb, void *arg)
30359+{
30360+ void *array;
30361+ unsigned long long n, sz;
30362+
30363+ array = NULL;
30364+ n = 0;
30365+ if (!*hint)
30366+ goto out;
30367+
30368+ if (*hint > ULLONG_MAX / sizeof(array)) {
30369+ array = ERR_PTR(-EMFILE);
30370+ pr_err("hint %llu\n", *hint);
30371+ goto out;
30372+ }
30373+
30374+ sz = sizeof(array) * *hint;
30375+ array = kzalloc(sz, GFP_NOFS);
30376+ if (unlikely(!array))
30377+ array = vzalloc(sz);
30378+ if (unlikely(!array)) {
30379+ array = ERR_PTR(-ENOMEM);
30380+ goto out;
30381+ }
30382+
30383+ n = cb(sb, array, *hint, arg);
30384+ AuDebugOn(n > *hint);
30385+
30386+out:
30387+ *hint = n;
30388+ return array;
30389+}
30390+
30391+static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
30392+ unsigned long long max __maybe_unused,
30393+ void *arg)
30394+{
30395+ unsigned long long n;
30396+ struct inode **p, *inode;
30397+ struct list_head *head;
30398+
30399+ n = 0;
30400+ p = a;
30401+ head = arg;
30402+ spin_lock(&sb->s_inode_list_lock);
30403+ list_for_each_entry(inode, head, i_sb_list) {
30404+ if (!au_is_bad_inode(inode)
30405+ && au_ii(inode)->ii_btop >= 0) {
30406+ spin_lock(&inode->i_lock);
30407+ if (atomic_read(&inode->i_count)) {
30408+ au_igrab(inode);
30409+ *p++ = inode;
30410+ n++;
30411+ AuDebugOn(n > max);
30412+ }
30413+ spin_unlock(&inode->i_lock);
30414+ }
30415+ }
30416+ spin_unlock(&sb->s_inode_list_lock);
30417+
30418+ return n;
30419+}
30420+
30421+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
30422+{
30423+ struct au_sbinfo *sbi;
30424+
30425+ sbi = au_sbi(sb);
30426+ *max = au_lcnt_read(&sbi->si_ninodes, /*do_rev*/1);
30427+ return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
30428+}
30429+
30430+void au_iarray_free(struct inode **a, unsigned long long max)
30431+{
30432+ unsigned long long ull;
30433+
30434+ for (ull = 0; ull < max; ull++)
30435+ iput(a[ull]);
30436+ kvfree(a);
30437+}
30438+
30439+/* ---------------------------------------------------------------------- */
30440+
30441+/*
30442+ * refresh dentry and inode at remount time.
30443+ */
30444+/* todo: consolidate with simple_reval_dpath() and au_reval_for_attr() */
30445+static int au_do_refresh(struct dentry *dentry, unsigned int dir_flags,
30446+ struct dentry *parent)
30447+{
30448+ int err;
30449+
30450+ di_write_lock_child(dentry);
30451+ di_read_lock_parent(parent, AuLock_IR);
30452+ err = au_refresh_dentry(dentry, parent);
30453+ if (!err && dir_flags)
30454+ au_hn_reset(d_inode(dentry), dir_flags);
30455+ di_read_unlock(parent, AuLock_IR);
30456+ di_write_unlock(dentry);
30457+
30458+ return err;
30459+}
30460+
30461+static int au_do_refresh_d(struct dentry *dentry, unsigned int sigen,
30462+ struct au_sbinfo *sbinfo,
30463+ const unsigned int dir_flags, unsigned int do_idop)
30464+{
30465+ int err;
30466+ struct dentry *parent;
30467+
30468+ err = 0;
30469+ parent = dget_parent(dentry);
30470+ if (!au_digen_test(parent, sigen) && au_digen_test(dentry, sigen)) {
30471+ if (d_really_is_positive(dentry)) {
30472+ if (!d_is_dir(dentry))
30473+ err = au_do_refresh(dentry, /*dir_flags*/0,
30474+ parent);
30475+ else {
30476+ err = au_do_refresh(dentry, dir_flags, parent);
30477+ if (unlikely(err))
30478+ au_fset_si(sbinfo, FAILED_REFRESH_DIR);
30479+ }
30480+ } else
30481+ err = au_do_refresh(dentry, /*dir_flags*/0, parent);
30482+ AuDbgDentry(dentry);
30483+ }
30484+ dput(parent);
30485+
30486+ if (!err) {
30487+ if (do_idop)
30488+ au_refresh_dop(dentry, /*force_reval*/0);
30489+ } else
30490+ au_refresh_dop(dentry, /*force_reval*/1);
30491+
30492+ AuTraceErr(err);
30493+ return err;
30494+}
30495+
30496+static int au_refresh_d(struct super_block *sb, unsigned int do_idop)
30497+{
30498+ int err, i, j, ndentry, e;
30499+ unsigned int sigen;
30500+ struct au_dcsub_pages dpages;
30501+ struct au_dpage *dpage;
30502+ struct dentry **dentries, *d;
30503+ struct au_sbinfo *sbinfo;
30504+ struct dentry *root = sb->s_root;
30505+ const unsigned int dir_flags = au_hi_flags(d_inode(root), /*isdir*/1);
30506+
30507+ if (do_idop)
30508+ au_refresh_dop(root, /*force_reval*/0);
30509+
30510+ err = au_dpages_init(&dpages, GFP_NOFS);
30511+ if (unlikely(err))
30512+ goto out;
30513+ err = au_dcsub_pages(&dpages, root, NULL, NULL);
30514+ if (unlikely(err))
30515+ goto out_dpages;
30516+
30517+ sigen = au_sigen(sb);
30518+ sbinfo = au_sbi(sb);
30519+ for (i = 0; i < dpages.ndpage; i++) {
30520+ dpage = dpages.dpages + i;
30521+ dentries = dpage->dentries;
30522+ ndentry = dpage->ndentry;
30523+ for (j = 0; j < ndentry; j++) {
30524+ d = dentries[j];
30525+ e = au_do_refresh_d(d, sigen, sbinfo, dir_flags,
30526+ do_idop);
30527+ if (unlikely(e && !err))
30528+ err = e;
30529+ /* go on even err */
30530+ }
30531+ }
30532+
30533+out_dpages:
30534+ au_dpages_free(&dpages);
30535+out:
30536+ return err;
30537+}
30538+
30539+static int au_refresh_i(struct super_block *sb, unsigned int do_idop)
30540+{
30541+ int err, e;
30542+ unsigned int sigen;
30543+ unsigned long long max, ull;
30544+ struct inode *inode, **array;
30545+
30546+ array = au_iarray_alloc(sb, &max);
30547+ err = PTR_ERR(array);
30548+ if (IS_ERR(array))
30549+ goto out;
30550+
30551+ err = 0;
30552+ sigen = au_sigen(sb);
30553+ for (ull = 0; ull < max; ull++) {
30554+ inode = array[ull];
30555+ if (unlikely(!inode))
30556+ break;
30557+
30558+ e = 0;
30559+ ii_write_lock_child(inode);
30560+ if (au_iigen(inode, NULL) != sigen) {
30561+ e = au_refresh_hinode_self(inode);
30562+ if (unlikely(e)) {
30563+ au_refresh_iop(inode, /*force_getattr*/1);
30564+ pr_err("error %d, i%lu\n", e, inode->i_ino);
30565+ if (!err)
30566+ err = e;
30567+ /* go on even if err */
30568+ }
30569+ }
30570+ if (!e && do_idop)
30571+ au_refresh_iop(inode, /*force_getattr*/0);
30572+ ii_write_unlock(inode);
30573+ }
30574+
30575+ au_iarray_free(array, max);
30576+
30577+out:
30578+ return err;
30579+}
30580+
30581+void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
30582+{
30583+ int err, e;
30584+ unsigned int udba;
30585+ aufs_bindex_t bindex, bbot;
30586+ struct dentry *root;
30587+ struct inode *inode;
30588+ struct au_branch *br;
30589+ struct au_sbinfo *sbi;
30590+
30591+ au_sigen_inc(sb);
30592+ sbi = au_sbi(sb);
30593+ au_fclr_si(sbi, FAILED_REFRESH_DIR);
30594+
30595+ root = sb->s_root;
30596+ DiMustNoWaiters(root);
30597+ inode = d_inode(root);
30598+ IiMustNoWaiters(inode);
30599+
30600+ udba = au_opt_udba(sb);
30601+ bbot = au_sbbot(sb);
30602+ for (bindex = 0; bindex <= bbot; bindex++) {
30603+ br = au_sbr(sb, bindex);
30604+ err = au_hnotify_reset_br(udba, br, br->br_perm);
30605+ if (unlikely(err))
30606+ AuIOErr("hnotify failed on br %d, %d, ignored\n",
30607+ bindex, err);
30608+ /* go on even if err */
30609+ }
30610+ au_hn_reset(inode, au_hi_flags(inode, /*isdir*/1));
30611+
30612+ if (do_idop) {
30613+ if (au_ftest_si(sbi, NO_DREVAL)) {
30614+ AuDebugOn(sb->s_d_op == &aufs_dop_noreval);
30615+ sb->s_d_op = &aufs_dop_noreval;
30616+ AuDebugOn(sbi->si_iop_array == aufs_iop_nogetattr);
30617+ sbi->si_iop_array = aufs_iop_nogetattr;
30618+ } else {
30619+ AuDebugOn(sb->s_d_op == &aufs_dop);
30620+ sb->s_d_op = &aufs_dop;
30621+ AuDebugOn(sbi->si_iop_array == aufs_iop);
30622+ sbi->si_iop_array = aufs_iop;
30623+ }
30624+ pr_info("reset to %ps and %ps\n",
30625+ sb->s_d_op, sbi->si_iop_array);
30626+ }
30627+
30628+ di_write_unlock(root);
30629+ err = au_refresh_d(sb, do_idop);
30630+ e = au_refresh_i(sb, do_idop);
30631+ if (unlikely(e && !err))
30632+ err = e;
30633+ /* aufs_write_lock() calls ..._child() */
30634+ di_write_lock_child(root);
30635+
30636+ au_cpup_attr_all(inode, /*force*/1);
30637+
30638+ if (unlikely(err))
30639+ AuIOErr("refresh failed, ignored, %d\n", err);
30640+}
30641+
30642+const struct super_operations aufs_sop = {
30643+ .alloc_inode = aufs_alloc_inode,
30644+ .destroy_inode = aufs_destroy_inode,
30645+ .free_inode = aufs_free_inode,
30646+ /* always deleting, no clearing */
30647+ .drop_inode = generic_delete_inode,
30648+ .show_options = aufs_show_options,
30649+ .statfs = aufs_statfs,
30650+ .put_super = aufs_put_super,
30651+ .sync_fs = aufs_sync_fs
30652+};
30653+
30654+/* ---------------------------------------------------------------------- */
30655+
30656+int au_alloc_root(struct super_block *sb)
30657+{
30658+ int err;
30659+ struct inode *inode;
30660+ struct dentry *root;
30661+
30662+ err = -ENOMEM;
30663+ inode = au_iget_locked(sb, AUFS_ROOT_INO);
30664+ err = PTR_ERR(inode);
30665+ if (IS_ERR(inode))
30666+ goto out;
30667+
30668+ inode->i_op = aufs_iop + AuIop_DIR; /* with getattr by default */
30669+ inode->i_fop = &aufs_dir_fop;
30670+ inode->i_mode = S_IFDIR;
30671+ set_nlink(inode, 2);
30672+ unlock_new_inode(inode);
30673+
30674+ root = d_make_root(inode);
30675+ if (unlikely(!root))
30676+ goto out;
30677+ err = PTR_ERR(root);
30678+ if (IS_ERR(root))
30679+ goto out;
30680+
30681+ err = au_di_init(root);
30682+ if (!err) {
30683+ sb->s_root = root;
30684+ return 0; /* success */
30685+ }
30686+ dput(root);
30687+
30688+out:
30689+ return err;
30690+}
30691+
30692+/* ---------------------------------------------------------------------- */
30693+
30694+static void aufs_kill_sb(struct super_block *sb)
30695+{
30696+ struct au_sbinfo *sbinfo;
30697+ struct dentry *root;
30698+
30699+ sbinfo = au_sbi(sb);
30700+ if (!sbinfo)
30701+ goto out;
30702+
30703+ au_sbilist_del(sb);
30704+
30705+ root = sb->s_root;
30706+ if (root)
30707+ aufs_write_lock(root);
30708+ else
30709+ __si_write_lock(sb);
30710+
30711+ au_fhsm_fin(sb);
30712+ if (sbinfo->si_wbr_create_ops->fin)
30713+ sbinfo->si_wbr_create_ops->fin(sb);
30714+ if (au_opt_test(sbinfo->si_mntflags, UDBA_HNOTIFY)) {
30715+ au_opt_set_udba(sbinfo->si_mntflags, UDBA_NONE);
30716+ au_remount_refresh(sb, /*do_idop*/0);
30717+ }
30718+ if (au_opt_test(sbinfo->si_mntflags, PLINK))
30719+ au_plink_put(sb, /*verbose*/1);
30720+ au_xino_clr(sb);
30721+ if (root)
30722+ au_dr_opt_flush(sb);
30723+
30724+ if (root)
30725+ aufs_write_unlock(root);
30726+ else
30727+ __si_write_unlock(sb);
30728+
30729+ sbinfo->si_sb = NULL;
30730+ au_nwt_flush(&sbinfo->si_nowait);
30731+
30732+out:
30733+ kill_anon_super(sb);
30734+}
30735+
30736+struct file_system_type aufs_fs_type = {
30737+ .name = AUFS_FSTYPE,
30738+ /* a race between rename and others */
30739+ .fs_flags = FS_RENAME_DOES_D_MOVE
30740+ /* untested */
30741+ /*| FS_ALLOW_IDMAP*/
30742+ ,
30743+ .init_fs_context = aufs_fsctx_init,
30744+ .parameters = aufs_fsctx_paramspec,
30745+ .kill_sb = aufs_kill_sb,
30746+ /* no need to __module_get() and module_put(). */
30747+ .owner = THIS_MODULE,
30748+};
30749diff -urN /usr/share/empty/fs/aufs/super.h linux/fs/aufs/super.h
30750--- /usr/share/empty/fs/aufs/super.h 1970-01-01 01:00:00.000000000 +0100
30751+++ linux/fs/aufs/super.h 2022-11-05 23:02:18.969222617 +0100
30752@@ -0,0 +1,592 @@
30753+/* SPDX-License-Identifier: GPL-2.0 */
30754+/*
30755+ * Copyright (C) 2005-2022 Junjiro R. Okajima
30756+ *
30757+ * This program is free software; you can redistribute it and/or modify
30758+ * it under the terms of the GNU General Public License as published by
30759+ * the Free Software Foundation; either version 2 of the License, or
30760+ * (at your option) any later version.
30761+ *
30762+ * This program is distributed in the hope that it will be useful,
30763+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
30764+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
30765+ * GNU General Public License for more details.
30766+ *
30767+ * You should have received a copy of the GNU General Public License
30768+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
30769+ */
30770+
30771+/*
30772+ * super_block operations
30773+ */
30774+
30775+#ifndef __AUFS_SUPER_H__
30776+#define __AUFS_SUPER_H__
30777+
30778+#ifdef __KERNEL__
30779+
30780+#include <linux/fs.h>
30781+#include <linux/kobject.h>
30782+#include "hbl.h"
30783+#include "lcnt.h"
30784+#include "rwsem.h"
30785+#include "wkq.h"
30786+
30787+/* policies to select one among multiple writable branches */
30788+struct au_wbr_copyup_operations {
30789+ int (*copyup)(struct dentry *dentry);
30790+};
30791+
30792+#define AuWbr_DIR 1 /* target is a dir */
30793+#define AuWbr_PARENT (1 << 1) /* always require a parent */
30794+
30795+#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
30796+#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
30797+#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
30798+
30799+struct au_wbr_create_operations {
30800+ int (*create)(struct dentry *dentry, unsigned int flags);
30801+ int (*init)(struct super_block *sb);
30802+ int (*fin)(struct super_block *sb);
30803+};
30804+
30805+struct au_wbr_mfs {
30806+ struct mutex mfs_lock; /* protect this structure */
30807+ unsigned long mfs_jiffy;
30808+ unsigned long mfs_expire;
30809+ aufs_bindex_t mfs_bindex;
30810+
30811+ unsigned long long mfsrr_bytes;
30812+ unsigned long long mfsrr_watermark;
30813+};
30814+
30815+#define AuPlink_NHASH 100
30816+static inline int au_plink_hash(ino_t ino)
30817+{
30818+ return ino % AuPlink_NHASH;
30819+}
30820+
30821+/* File-based Hierarchical Storage Management */
30822+struct au_fhsm {
30823+#ifdef CONFIG_AUFS_FHSM
30824+ /* allow only one process who can receive the notification */
30825+ spinlock_t fhsm_spin;
30826+ pid_t fhsm_pid;
30827+ wait_queue_head_t fhsm_wqh;
30828+ atomic_t fhsm_readable;
30829+
30830+ /* these are protected by si_rwsem */
30831+ unsigned long fhsm_expire;
30832+ aufs_bindex_t fhsm_bottom;
30833+#endif
30834+};
30835+
30836+struct au_branch;
30837+struct au_sbinfo {
30838+ /* nowait tasks in the system-wide workqueue */
30839+ struct au_nowait_tasks si_nowait;
30840+
30841+ /*
30842+ * tried sb->s_umount, but failed due to the dependency between i_mutex.
30843+ * rwsem for au_sbinfo is necessary.
30844+ */
30845+ struct au_rwsem si_rwsem;
30846+
30847+ /*
30848+ * dirty approach to protect sb->sb_inodes and ->s_files (gone) from
30849+ * remount.
30850+ */
30851+ au_lcnt_t si_ninodes, si_nfiles;
30852+
30853+ /* branch management */
30854+ unsigned int si_generation;
30855+
30856+ /* see AuSi_ flags */
30857+ unsigned char au_si_status;
30858+
30859+ aufs_bindex_t si_bbot;
30860+
30861+ /* dirty trick to keep br_id plus */
30862+ unsigned int si_last_br_id :
30863+ sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
30864+ struct au_branch **si_branch;
30865+
30866+ /* policy to select a writable branch */
30867+ unsigned char si_wbr_copyup;
30868+ unsigned char si_wbr_create;
30869+ struct au_wbr_copyup_operations *si_wbr_copyup_ops;
30870+ struct au_wbr_create_operations *si_wbr_create_ops;
30871+
30872+ /* round robin */
30873+ atomic_t si_wbr_rr_next;
30874+
30875+ /* most free space */
30876+ struct au_wbr_mfs si_wbr_mfs;
30877+
30878+ /* File-based Hierarchical Storage Management */
30879+ struct au_fhsm si_fhsm;
30880+
30881+ /* mount flags */
30882+ /* include/asm-ia64/siginfo.h defines a macro named si_flags */
30883+ unsigned int si_mntflags;
30884+
30885+ /* external inode number (bitmap and translation table) */
30886+ loff_t si_ximaxent; /* max entries in a xino */
30887+
30888+ struct file *si_xib;
30889+ struct mutex si_xib_mtx; /* protect xib members */
30890+ unsigned long *si_xib_buf;
30891+ unsigned long si_xib_last_pindex;
30892+ int si_xib_next_bit;
30893+
30894+ unsigned long si_xino_jiffy;
30895+ unsigned long si_xino_expire;
30896+ /* reserved for future use */
30897+ /* unsigned long long si_xib_limit; */ /* Max xib file size */
30898+
30899+#ifdef CONFIG_AUFS_EXPORT
30900+ /* i_generation */
30901+ /* todo: make xigen file an array to support many inode numbers */
30902+ struct file *si_xigen;
30903+ atomic_t si_xigen_next;
30904+#endif
30905+
30906+ /* dirty trick to support atomic_open */
30907+ struct hlist_bl_head si_aopen;
30908+
30909+ /* vdir parameters */
30910+ unsigned long si_rdcache; /* max cache time in jiffies */
30911+ unsigned int si_rdblk; /* deblk size */
30912+ unsigned int si_rdhash; /* hash size */
30913+
30914+ /*
30915+ * If the number of whiteouts are larger than si_dirwh, leave all of
30916+ * them after au_whtmp_ren to reduce the cost of rmdir(2).
30917+ * future fsck.aufs or kernel thread will remove them later.
30918+ * Otherwise, remove all whiteouts and the dir in rmdir(2).
30919+ */
30920+ unsigned int si_dirwh;
30921+
30922+ /* pseudo_link list */
30923+ struct hlist_bl_head si_plink[AuPlink_NHASH];
30924+ wait_queue_head_t si_plink_wq;
30925+ spinlock_t si_plink_maint_lock;
30926+ pid_t si_plink_maint_pid;
30927+
30928+ /* file list */
30929+ struct hlist_bl_head si_files;
30930+
30931+ /* with/without getattr, brother of sb->s_d_op */
30932+ const struct inode_operations *si_iop_array;
30933+
30934+ /*
30935+ * sysfs and lifetime management.
30936+ * this is not a small structure and it may be a waste of memory in case
30937+ * of sysfs is disabled, particularly when many aufs-es are mounted.
30938+ * but using sysfs is majority.
30939+ */
30940+ struct kobject si_kobj;
30941+#ifdef CONFIG_DEBUG_FS
30942+ struct dentry *si_dbgaufs;
30943+ struct dentry *si_dbgaufs_plink;
30944+ struct dentry *si_dbgaufs_xib;
30945+#ifdef CONFIG_AUFS_EXPORT
30946+ struct dentry *si_dbgaufs_xigen;
30947+#endif
30948+#endif
30949+
30950+#ifdef CONFIG_AUFS_SBILIST
30951+ struct hlist_bl_node si_list;
30952+#endif
30953+
30954+ /* dirty, necessary for unmounting, sysfs and sysrq */
30955+ struct super_block *si_sb;
30956+};
30957+
30958+/* sbinfo status flags */
30959+/*
30960+ * set true when refresh_dirs() failed at remount time.
30961+ * then try refreshing dirs at access time again.
30962+ * if it is false, refreshing dirs at access time is unnecessary
30963+ */
30964+#define AuSi_FAILED_REFRESH_DIR 1
30965+#define AuSi_FHSM (1 << 1) /* fhsm is active now */
30966+#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
30967+
30968+#ifndef CONFIG_AUFS_FHSM
30969+#undef AuSi_FHSM
30970+#define AuSi_FHSM 0
30971+#endif
30972+
30973+static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
30974+ unsigned int flag)
30975+{
30976+ AuRwMustAnyLock(&sbi->si_rwsem);
30977+ return sbi->au_si_status & flag;
30978+}
30979+#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
30980+#define au_fset_si(sbinfo, name) do { \
30981+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
30982+ (sbinfo)->au_si_status |= AuSi_##name; \
30983+} while (0)
30984+#define au_fclr_si(sbinfo, name) do { \
30985+ AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
30986+ (sbinfo)->au_si_status &= ~AuSi_##name; \
30987+} while (0)
30988+
30989+/* ---------------------------------------------------------------------- */
30990+
30991+/* policy to select one among writable branches */
30992+#define AuWbrCopyup(sbinfo, ...) \
30993+ ((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
30994+#define AuWbrCreate(sbinfo, ...) \
30995+ ((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
30996+
30997+/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
30998+#define AuLock_DW 1 /* write-lock dentry */
30999+#define AuLock_IR (1 << 1) /* read-lock inode */
31000+#define AuLock_IW (1 << 2) /* write-lock inode */
31001+#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
31002+#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
31003+ /* except RENAME_EXCHANGE */
31004+#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
31005+#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
31006+#define AuLock_GEN (1 << 7) /* test digen/iigen */
31007+#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
31008+#define au_fset_lock(flags, name) \
31009+ do { (flags) |= AuLock_##name; } while (0)
31010+#define au_fclr_lock(flags, name) \
31011+ do { (flags) &= ~AuLock_##name; } while (0)
31012+
31013+/* ---------------------------------------------------------------------- */
31014+
31015+/* super.c */
31016+struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
31017+
31018+typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
31019+ unsigned long long max, void *arg);
31020+void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
31021+ struct super_block *sb, void *arg);
31022+struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
31023+void au_iarray_free(struct inode **a, unsigned long long max);
31024+
31025+void au_remount_refresh(struct super_block *sb, unsigned int do_idop);
31026+extern const struct super_operations aufs_sop;
31027+int au_alloc_root(struct super_block *sb);
31028+extern struct file_system_type aufs_fs_type;
31029+
31030+/* sbinfo.c */
31031+void au_si_free(struct kobject *kobj);
31032+struct au_sbinfo *au_si_alloc(struct super_block *sb);
31033+int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
31034+
31035+unsigned int au_sigen_inc(struct super_block *sb);
31036+aufs_bindex_t au_new_br_id(struct super_block *sb);
31037+
31038+int si_read_lock(struct super_block *sb, int flags);
31039+int si_write_lock(struct super_block *sb, int flags);
31040+int aufs_read_lock(struct dentry *dentry, int flags);
31041+void aufs_read_unlock(struct dentry *dentry, int flags);
31042+void aufs_write_lock(struct dentry *dentry);
31043+void aufs_write_unlock(struct dentry *dentry);
31044+int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
31045+void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
31046+
31047+/* wbr_policy.c */
31048+extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
31049+extern struct au_wbr_create_operations au_wbr_create_ops[];
31050+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
31051+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
31052+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
31053+
31054+/* mvdown.c */
31055+int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
31056+
31057+#ifdef CONFIG_AUFS_FHSM
31058+/* fhsm.c */
31059+
31060+static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
31061+{
31062+ pid_t pid;
31063+
31064+ spin_lock(&fhsm->fhsm_spin);
31065+ pid = fhsm->fhsm_pid;
31066+ spin_unlock(&fhsm->fhsm_spin);
31067+
31068+ return pid;
31069+}
31070+
31071+void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
31072+void au_fhsm_wrote_all(struct super_block *sb, int force);
31073+int au_fhsm_fd(struct super_block *sb, int oflags);
31074+int au_fhsm_br_alloc(struct au_branch *br);
31075+void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
31076+void au_fhsm_fin(struct super_block *sb);
31077+void au_fhsm_init(struct au_sbinfo *sbinfo);
31078+void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
31079+void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
31080+#else
31081+AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
31082+ int force)
31083+AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
31084+AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
31085+AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
31086+AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
31087+AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
31088+AuStubVoid(au_fhsm_fin, struct super_block *sb)
31089+AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
31090+AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
31091+AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
31092+#endif
31093+
31094+/* ---------------------------------------------------------------------- */
31095+
31096+static inline struct au_sbinfo *au_sbi(struct super_block *sb)
31097+{
31098+ return sb->s_fs_info;
31099+}
31100+
31101+/* ---------------------------------------------------------------------- */
31102+
31103+#ifdef CONFIG_AUFS_EXPORT
31104+int au_test_nfsd(void);
31105+void au_export_init(struct super_block *sb);
31106+void au_xigen_inc(struct inode *inode);
31107+int au_xigen_new(struct inode *inode);
31108+int au_xigen_set(struct super_block *sb, struct path *path);
31109+void au_xigen_clr(struct super_block *sb);
31110+
31111+static inline int au_busy_or_stale(void)
31112+{
31113+ if (!au_test_nfsd())
31114+ return -EBUSY;
31115+ return -ESTALE;
31116+}
31117+#else
31118+AuStubInt0(au_test_nfsd, void)
31119+AuStubVoid(au_export_init, struct super_block *sb)
31120+AuStubVoid(au_xigen_inc, struct inode *inode)
31121+AuStubInt0(au_xigen_new, struct inode *inode)
31122+AuStubInt0(au_xigen_set, struct super_block *sb, struct path *path)
31123+AuStubVoid(au_xigen_clr, struct super_block *sb)
31124+AuStub(int, au_busy_or_stale, return -EBUSY, void)
31125+#endif /* CONFIG_AUFS_EXPORT */
31126+
31127+/* ---------------------------------------------------------------------- */
31128+
31129+#ifdef CONFIG_AUFS_SBILIST
31130+/* module.c */
31131+extern struct hlist_bl_head au_sbilist;
31132+
31133+static inline void au_sbilist_init(void)
31134+{
31135+ INIT_HLIST_BL_HEAD(&au_sbilist);
31136+}
31137+
31138+static inline void au_sbilist_add(struct super_block *sb)
31139+{
31140+ au_hbl_add(&au_sbi(sb)->si_list, &au_sbilist);
31141+}
31142+
31143+static inline void au_sbilist_del(struct super_block *sb)
31144+{
31145+ au_hbl_del(&au_sbi(sb)->si_list, &au_sbilist);
31146+}
31147+
31148+#ifdef CONFIG_AUFS_MAGIC_SYSRQ
31149+static inline void au_sbilist_lock(void)
31150+{
31151+ hlist_bl_lock(&au_sbilist);
31152+}
31153+
31154+static inline void au_sbilist_unlock(void)
31155+{
31156+ hlist_bl_unlock(&au_sbilist);
31157+}
31158+#define AuGFP_SBILIST GFP_ATOMIC
31159+#else
31160+AuStubVoid(au_sbilist_lock, void)
31161+AuStubVoid(au_sbilist_unlock, void)
31162+#define AuGFP_SBILIST GFP_NOFS
31163+#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
31164+#else
31165+AuStubVoid(au_sbilist_init, void)
31166+AuStubVoid(au_sbilist_add, struct super_block *sb)
31167+AuStubVoid(au_sbilist_del, struct super_block *sb)
31168+AuStubVoid(au_sbilist_lock, void)
31169+AuStubVoid(au_sbilist_unlock, void)
31170+#define AuGFP_SBILIST GFP_NOFS
31171+#endif
31172+
31173+/* ---------------------------------------------------------------------- */
31174+
31175+static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
31176+{
31177+ /*
31178+ * This function is a dynamic '__init' function actually,
31179+ * so the tiny check for si_rwsem is unnecessary.
31180+ */
31181+ /* AuRwMustWriteLock(&sbinfo->si_rwsem); */
31182+#ifdef CONFIG_DEBUG_FS
31183+ sbinfo->si_dbgaufs = NULL;
31184+ sbinfo->si_dbgaufs_plink = NULL;
31185+ sbinfo->si_dbgaufs_xib = NULL;
31186+#ifdef CONFIG_AUFS_EXPORT
31187+ sbinfo->si_dbgaufs_xigen = NULL;
31188+#endif
31189+#endif
31190+}
31191+
31192+/* ---------------------------------------------------------------------- */
31193+
31194+/* current->atomic_flags */
31195+/* this value should never corrupt the ones defined in linux/sched.h */
31196+#define PFA_AUFS 0x10
31197+
31198+TASK_PFA_TEST(AUFS, test_aufs) /* task_test_aufs */
31199+TASK_PFA_SET(AUFS, aufs) /* task_set_aufs */
31200+TASK_PFA_CLEAR(AUFS, aufs) /* task_clear_aufs */
31201+
31202+static inline int si_pid_test(struct super_block *sb)
31203+{
31204+ return !!task_test_aufs(current);
31205+}
31206+
31207+static inline void si_pid_clr(struct super_block *sb)
31208+{
31209+ AuDebugOn(!task_test_aufs(current));
31210+ task_clear_aufs(current);
31211+}
31212+
31213+static inline void si_pid_set(struct super_block *sb)
31214+{
31215+ AuDebugOn(task_test_aufs(current));
31216+ task_set_aufs(current);
31217+}
31218+
31219+/* ---------------------------------------------------------------------- */
31220+
31221+/* lock superblock. mainly for entry point functions */
31222+#define __si_read_lock(sb) au_rw_read_lock(&au_sbi(sb)->si_rwsem)
31223+#define __si_write_lock(sb) au_rw_write_lock(&au_sbi(sb)->si_rwsem)
31224+#define __si_read_trylock(sb) au_rw_read_trylock(&au_sbi(sb)->si_rwsem)
31225+#define __si_write_trylock(sb) au_rw_write_trylock(&au_sbi(sb)->si_rwsem)
31226+/*
31227+#define __si_read_trylock_nested(sb) \
31228+ au_rw_read_trylock_nested(&au_sbi(sb)->si_rwsem)
31229+#define __si_write_trylock_nested(sb) \
31230+ au_rw_write_trylock_nested(&au_sbi(sb)->si_rwsem)
31231+*/
31232+
31233+#define __si_read_unlock(sb) au_rw_read_unlock(&au_sbi(sb)->si_rwsem)
31234+#define __si_write_unlock(sb) au_rw_write_unlock(&au_sbi(sb)->si_rwsem)
31235+#define __si_downgrade_lock(sb) au_rw_dgrade_lock(&au_sbi(sb)->si_rwsem)
31236+
31237+#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
31238+#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
31239+#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
31240+
31241+static inline void si_noflush_read_lock(struct super_block *sb)
31242+{
31243+ __si_read_lock(sb);
31244+ si_pid_set(sb);
31245+}
31246+
31247+static inline int si_noflush_read_trylock(struct super_block *sb)
31248+{
31249+ int locked;
31250+
31251+ locked = __si_read_trylock(sb);
31252+ if (locked)
31253+ si_pid_set(sb);
31254+ return locked;
31255+}
31256+
31257+static inline void si_noflush_write_lock(struct super_block *sb)
31258+{
31259+ __si_write_lock(sb);
31260+ si_pid_set(sb);
31261+}
31262+
31263+static inline int si_noflush_write_trylock(struct super_block *sb)
31264+{
31265+ int locked;
31266+
31267+ locked = __si_write_trylock(sb);
31268+ if (locked)
31269+ si_pid_set(sb);
31270+ return locked;
31271+}
31272+
31273+#if 0 /* reserved */
31274+static inline int si_read_trylock(struct super_block *sb, int flags)
31275+{
31276+ if (au_ftest_lock(flags, FLUSH))
31277+ au_nwt_flush(&au_sbi(sb)->si_nowait);
31278+ return si_noflush_read_trylock(sb);
31279+}
31280+#endif
31281+
31282+static inline void si_read_unlock(struct super_block *sb)
31283+{
31284+ si_pid_clr(sb);
31285+ __si_read_unlock(sb);
31286+}
31287+
31288+#if 0 /* reserved */
31289+static inline int si_write_trylock(struct super_block *sb, int flags)
31290+{
31291+ if (au_ftest_lock(flags, FLUSH))
31292+ au_nwt_flush(&au_sbi(sb)->si_nowait);
31293+ return si_noflush_write_trylock(sb);
31294+}
31295+#endif
31296+
31297+static inline void si_write_unlock(struct super_block *sb)
31298+{
31299+ si_pid_clr(sb);
31300+ __si_write_unlock(sb);
31301+}
31302+
31303+#if 0 /* reserved */
31304+static inline void si_downgrade_lock(struct super_block *sb)
31305+{
31306+ __si_downgrade_lock(sb);
31307+}
31308+#endif
31309+
31310+/* ---------------------------------------------------------------------- */
31311+
31312+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
31313+{
31314+ SiMustAnyLock(sb);
31315+ return au_sbi(sb)->si_bbot;
31316+}
31317+
31318+static inline unsigned int au_mntflags(struct super_block *sb)
31319+{
31320+ SiMustAnyLock(sb);
31321+ return au_sbi(sb)->si_mntflags;
31322+}
31323+
31324+static inline unsigned int au_sigen(struct super_block *sb)
31325+{
31326+ SiMustAnyLock(sb);
31327+ return au_sbi(sb)->si_generation;
31328+}
31329+
31330+static inline struct au_branch *au_sbr(struct super_block *sb,
31331+ aufs_bindex_t bindex)
31332+{
31333+ SiMustAnyLock(sb);
31334+ return au_sbi(sb)->si_branch[0 + bindex];
31335+}
31336+
31337+static inline loff_t au_xi_maxent(struct super_block *sb)
31338+{
31339+ SiMustAnyLock(sb);
31340+ return au_sbi(sb)->si_ximaxent;
31341+}
31342+
31343+#endif /* __KERNEL__ */
31344+#endif /* __AUFS_SUPER_H__ */
31345diff -urN /usr/share/empty/fs/aufs/sysaufs.c linux/fs/aufs/sysaufs.c
31346--- /usr/share/empty/fs/aufs/sysaufs.c 1970-01-01 01:00:00.000000000 +0100
31347+++ linux/fs/aufs/sysaufs.c 2022-11-05 23:02:18.969222617 +0100
31348@@ -0,0 +1,94 @@
31349+// SPDX-License-Identifier: GPL-2.0
31350+/*
31351+ * Copyright (C) 2005-2022 Junjiro R. Okajima
31352+ *
31353+ * This program is free software; you can redistribute it and/or modify
31354+ * it under the terms of the GNU General Public License as published by
31355+ * the Free Software Foundation; either version 2 of the License, or
31356+ * (at your option) any later version.
31357+ *
31358+ * This program is distributed in the hope that it will be useful,
31359+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31360+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31361+ * GNU General Public License for more details.
31362+ *
31363+ * You should have received a copy of the GNU General Public License
31364+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
31365+ */
31366+
31367+/*
31368+ * sysfs interface and lifetime management
31369+ * they are necessary regardless sysfs is disabled.
31370+ */
31371+
31372+#include <linux/random.h>
31373+#include "aufs.h"
31374+
31375+unsigned long sysaufs_si_mask;
31376+struct kset *sysaufs_kset;
31377+
31378+#define AuSiAttr(_name) { \
31379+ .attr = { .name = __stringify(_name), .mode = 0444 }, \
31380+ .show = sysaufs_si_##_name, \
31381+}
31382+
31383+static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
31384+struct attribute *sysaufs_si_attrs[] = {
31385+ &sysaufs_si_attr_xi_path.attr,
31386+ NULL,
31387+};
31388+ATTRIBUTE_GROUPS(sysaufs_si);
31389+
31390+static const struct sysfs_ops au_sbi_ops = {
31391+ .show = sysaufs_si_show
31392+};
31393+
31394+static struct kobj_type au_sbi_ktype = {
31395+ .release = au_si_free,
31396+ .sysfs_ops = &au_sbi_ops,
31397+ .default_groups = sysaufs_si_groups
31398+};
31399+
31400+/* ---------------------------------------------------------------------- */
31401+
31402+int sysaufs_si_init(struct au_sbinfo *sbinfo)
31403+{
31404+ int err;
31405+
31406+ sbinfo->si_kobj.kset = sysaufs_kset;
31407+ /* cf. sysaufs_name() */
31408+ err = kobject_init_and_add
31409+ (&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
31410+ SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
31411+
31412+ return err;
31413+}
31414+
31415+void sysaufs_fin(void)
31416+{
31417+ sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
31418+ kset_unregister(sysaufs_kset);
31419+}
31420+
31421+int __init sysaufs_init(void)
31422+{
31423+ int err;
31424+
31425+ do {
31426+ get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
31427+ } while (!sysaufs_si_mask);
31428+
31429+ err = -EINVAL;
31430+ sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
31431+ if (unlikely(!sysaufs_kset))
31432+ goto out;
31433+ err = PTR_ERR(sysaufs_kset);
31434+ if (IS_ERR(sysaufs_kset))
31435+ goto out;
31436+ err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
31437+ if (unlikely(err))
31438+ kset_unregister(sysaufs_kset);
31439+
31440+out:
31441+ return err;
31442+}
31443diff -urN /usr/share/empty/fs/aufs/sysaufs.h linux/fs/aufs/sysaufs.h
31444--- /usr/share/empty/fs/aufs/sysaufs.h 1970-01-01 01:00:00.000000000 +0100
31445+++ linux/fs/aufs/sysaufs.h 2022-11-05 23:02:18.969222617 +0100
31446@@ -0,0 +1,102 @@
31447+/* SPDX-License-Identifier: GPL-2.0 */
31448+/*
31449+ * Copyright (C) 2005-2022 Junjiro R. Okajima
31450+ *
31451+ * This program is free software; you can redistribute it and/or modify
31452+ * it under the terms of the GNU General Public License as published by
31453+ * the Free Software Foundation; either version 2 of the License, or
31454+ * (at your option) any later version.
31455+ *
31456+ * This program is distributed in the hope that it will be useful,
31457+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31458+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31459+ * GNU General Public License for more details.
31460+ *
31461+ * You should have received a copy of the GNU General Public License
31462+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
31463+ */
31464+
31465+/*
31466+ * sysfs interface and mount lifetime management
31467+ */
31468+
31469+#ifndef __SYSAUFS_H__
31470+#define __SYSAUFS_H__
31471+
31472+#ifdef __KERNEL__
31473+
31474+#include <linux/sysfs.h>
31475+#include "module.h"
31476+
31477+struct super_block;
31478+struct au_sbinfo;
31479+
31480+struct sysaufs_si_attr {
31481+ struct attribute attr;
31482+ int (*show)(struct seq_file *seq, struct super_block *sb);
31483+};
31484+
31485+/* ---------------------------------------------------------------------- */
31486+
31487+/* sysaufs.c */
31488+extern unsigned long sysaufs_si_mask;
31489+extern struct kset *sysaufs_kset;
31490+extern struct attribute *sysaufs_si_attrs[];
31491+int sysaufs_si_init(struct au_sbinfo *sbinfo);
31492+int __init sysaufs_init(void);
31493+void sysaufs_fin(void);
31494+
31495+/* ---------------------------------------------------------------------- */
31496+
31497+/* some people doesn't like to show a pointer in kernel */
31498+static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
31499+{
31500+ return sysaufs_si_mask ^ (unsigned long)sbinfo;
31501+}
31502+
31503+#define SysaufsSiNamePrefix "si_"
31504+#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
31505+static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
31506+{
31507+ snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
31508+ sysaufs_si_id(sbinfo));
31509+}
31510+
31511+struct au_branch;
31512+#ifdef CONFIG_SYSFS
31513+/* sysfs.c */
31514+extern struct attribute_group *sysaufs_attr_group;
31515+
31516+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
31517+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
31518+ char *buf);
31519+long au_brinfo_ioctl(struct file *file, unsigned long arg);
31520+#ifdef CONFIG_COMPAT
31521+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
31522+#endif
31523+
31524+void sysaufs_br_init(struct au_branch *br);
31525+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
31526+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
31527+
31528+#define sysaufs_brs_init() do {} while (0)
31529+
31530+#else
31531+#define sysaufs_attr_group NULL
31532+
31533+AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
31534+AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
31535+ struct attribute *attr, char *buf)
31536+AuStubVoid(sysaufs_br_init, struct au_branch *br)
31537+AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
31538+AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
31539+
31540+static inline void sysaufs_brs_init(void)
31541+{
31542+ sysaufs_brs = 0;
31543+}
31544+
31545+#endif /* CONFIG_SYSFS */
31546+
31547+#endif /* __KERNEL__ */
31548+#endif /* __SYSAUFS_H__ */
31549diff -urN /usr/share/empty/fs/aufs/sysfs.c linux/fs/aufs/sysfs.c
31550--- /usr/share/empty/fs/aufs/sysfs.c 1970-01-01 01:00:00.000000000 +0100
31551+++ linux/fs/aufs/sysfs.c 2022-11-05 23:02:18.969222617 +0100
31552@@ -0,0 +1,374 @@
31553+// SPDX-License-Identifier: GPL-2.0
31554+/*
31555+ * Copyright (C) 2005-2022 Junjiro R. Okajima
31556+ *
31557+ * This program is free software; you can redistribute it and/or modify
31558+ * it under the terms of the GNU General Public License as published by
31559+ * the Free Software Foundation; either version 2 of the License, or
31560+ * (at your option) any later version.
31561+ *
31562+ * This program is distributed in the hope that it will be useful,
31563+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31564+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31565+ * GNU General Public License for more details.
31566+ *
31567+ * You should have received a copy of the GNU General Public License
31568+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
31569+ */
31570+
31571+/*
31572+ * sysfs interface
31573+ */
31574+
31575+#include <linux/compat.h>
31576+#include <linux/seq_file.h>
31577+#include "aufs.h"
31578+
31579+#ifdef CONFIG_AUFS_FS_MODULE
31580+/* this entry violates the "one line per file" policy of sysfs */
31581+static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
31582+ char *buf)
31583+{
31584+ ssize_t err;
31585+ static char *conf =
31586+/* this file is generated at compiling */
31587+#include "conf.str"
31588+ ;
31589+
31590+ err = snprintf(buf, PAGE_SIZE, conf);
31591+ if (unlikely(err >= PAGE_SIZE))
31592+ err = -EFBIG;
31593+ return err;
31594+}
31595+
31596+static struct kobj_attribute au_config_attr = __ATTR_RO(config);
31597+#endif
31598+
31599+static struct attribute *au_attr[] = {
31600+#ifdef CONFIG_AUFS_FS_MODULE
31601+ &au_config_attr.attr,
31602+#endif
31603+ NULL, /* need to NULL terminate the list of attributes */
31604+};
31605+
31606+static struct attribute_group sysaufs_attr_group_body = {
31607+ .attrs = au_attr
31608+};
31609+
31610+struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
31611+
31612+/* ---------------------------------------------------------------------- */
31613+
31614+int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
31615+{
31616+ int err;
31617+
31618+ SiMustAnyLock(sb);
31619+
31620+ err = 0;
31621+ if (au_opt_test(au_mntflags(sb), XINO)) {
31622+ err = au_xino_path(seq, au_sbi(sb)->si_xib);
31623+ seq_putc(seq, '\n');
31624+ }
31625+ return err;
31626+}
31627+
31628+/*
31629+ * the lifetime of branch is independent from the entry under sysfs.
31630+ * sysfs handles the lifetime of the entry, and never call ->show() after it is
31631+ * unlinked.
31632+ */
31633+static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
31634+ aufs_bindex_t bindex, int idx)
31635+{
31636+ int err;
31637+ struct path path;
31638+ struct dentry *root;
31639+ struct au_branch *br;
31640+ au_br_perm_str_t perm;
31641+
31642+ AuDbg("b%d\n", bindex);
31643+
31644+ err = 0;
31645+ root = sb->s_root;
31646+ di_read_lock_parent(root, !AuLock_IR);
31647+ br = au_sbr(sb, bindex);
31648+
31649+ switch (idx) {
31650+ case AuBrSysfs_BR:
31651+ path.mnt = au_br_mnt(br);
31652+ path.dentry = au_h_dptr(root, bindex);
31653+ err = au_seq_path(seq, &path);
31654+ if (!err) {
31655+ au_optstr_br_perm(&perm, br->br_perm);
31656+ seq_printf(seq, "=%s\n", perm.a);
31657+ }
31658+ break;
31659+ case AuBrSysfs_BRID:
31660+ seq_printf(seq, "%d\n", br->br_id);
31661+ break;
31662+ }
31663+ di_read_unlock(root, !AuLock_IR);
31664+ if (unlikely(err || seq_has_overflowed(seq)))
31665+ err = -E2BIG;
31666+
31667+ return err;
31668+}
31669+
31670+/* ---------------------------------------------------------------------- */
31671+
31672+static struct seq_file *au_seq(char *p, ssize_t len)
31673+{
31674+ struct seq_file *seq;
31675+
31676+ seq = kzalloc(sizeof(*seq), GFP_NOFS);
31677+ if (seq) {
31678+ /* mutex_init(&seq.lock); */
31679+ seq->buf = p;
31680+ seq->size = len;
31681+ return seq; /* success */
31682+ }
31683+
31684+ seq = ERR_PTR(-ENOMEM);
31685+ return seq;
31686+}
31687+
31688+#define SysaufsBr_PREFIX "br"
31689+#define SysaufsBrid_PREFIX "brid"
31690+
31691+/* todo: file size may exceed PAGE_SIZE */
31692+ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
31693+ char *buf)
31694+{
31695+ ssize_t err;
31696+ int idx;
31697+ long l;
31698+ aufs_bindex_t bbot;
31699+ struct au_sbinfo *sbinfo;
31700+ struct super_block *sb;
31701+ struct seq_file *seq;
31702+ char *name;
31703+ struct attribute **cattr;
31704+
31705+ sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
31706+ sb = sbinfo->si_sb;
31707+
31708+ /*
31709+ * prevent a race condition between sysfs and aufs.
31710+ * for instance, sysfs_file_read() calls sysfs_get_active_two() which
31711+ * prohibits maintaining the sysfs entries.
31712+ * hew we acquire read lock after sysfs_get_active_two().
31713+ * on the other hand, the remount process may maintain the sysfs/aufs
31714+ * entries after acquiring write lock.
31715+ * it can cause a deadlock.
31716+ * simply we gave up processing read here.
31717+ */
31718+ err = -EBUSY;
31719+ if (unlikely(!si_noflush_read_trylock(sb)))
31720+ goto out;
31721+
31722+ seq = au_seq(buf, PAGE_SIZE);
31723+ err = PTR_ERR(seq);
31724+ if (IS_ERR(seq))
31725+ goto out_unlock;
31726+
31727+ name = (void *)attr->name;
31728+ cattr = sysaufs_si_attrs;
31729+ while (*cattr) {
31730+ if (!strcmp(name, (*cattr)->name)) {
31731+ err = container_of(*cattr, struct sysaufs_si_attr, attr)
31732+ ->show(seq, sb);
31733+ goto out_seq;
31734+ }
31735+ cattr++;
31736+ }
31737+
31738+ if (!strncmp(name, SysaufsBrid_PREFIX,
31739+ sizeof(SysaufsBrid_PREFIX) - 1)) {
31740+ idx = AuBrSysfs_BRID;
31741+ name += sizeof(SysaufsBrid_PREFIX) - 1;
31742+ } else if (!strncmp(name, SysaufsBr_PREFIX,
31743+ sizeof(SysaufsBr_PREFIX) - 1)) {
31744+ idx = AuBrSysfs_BR;
31745+ name += sizeof(SysaufsBr_PREFIX) - 1;
31746+ } else
31747+ BUG();
31748+
31749+ err = kstrtol(name, 10, &l);
31750+ if (!err) {
31751+ bbot = au_sbbot(sb);
31752+ if (l <= bbot)
31753+ err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
31754+ else
31755+ err = -ENOENT;
31756+ }
31757+
31758+out_seq:
31759+ if (!err) {
31760+ err = seq->count;
31761+ /* sysfs limit */
31762+ if (unlikely(err == PAGE_SIZE))
31763+ err = -EFBIG;
31764+ }
31765+ au_kfree_rcu(seq);
31766+out_unlock:
31767+ si_read_unlock(sb);
31768+out:
31769+ return err;
31770+}
31771+
31772+/* ---------------------------------------------------------------------- */
31773+
31774+static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
31775+{
31776+ int err;
31777+ int16_t brid;
31778+ aufs_bindex_t bindex, bbot;
31779+ size_t sz;
31780+ char *buf;
31781+ struct seq_file *seq;
31782+ struct au_branch *br;
31783+
31784+ si_read_lock(sb, AuLock_FLUSH);
31785+ bbot = au_sbbot(sb);
31786+ err = bbot + 1;
31787+ if (!arg)
31788+ goto out;
31789+
31790+ err = -ENOMEM;
31791+ buf = (void *)__get_free_page(GFP_NOFS);
31792+ if (unlikely(!buf))
31793+ goto out;
31794+
31795+ seq = au_seq(buf, PAGE_SIZE);
31796+ err = PTR_ERR(seq);
31797+ if (IS_ERR(seq))
31798+ goto out_buf;
31799+
31800+ sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
31801+ for (bindex = 0; bindex <= bbot; bindex++, arg++) {
31802+ /* VERIFY_WRITE */
31803+ err = !access_ok(arg, sizeof(*arg));
31804+ if (unlikely(err))
31805+ break;
31806+
31807+ br = au_sbr(sb, bindex);
31808+ brid = br->br_id;
31809+ BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
31810+ err = __put_user(brid, &arg->id);
31811+ if (unlikely(err))
31812+ break;
31813+
31814+ BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
31815+ err = __put_user(br->br_perm, &arg->perm);
31816+ if (unlikely(err))
31817+ break;
31818+
31819+ err = au_seq_path(seq, &br->br_path);
31820+ if (unlikely(err))
31821+ break;
31822+ seq_putc(seq, '\0');
31823+ if (!seq_has_overflowed(seq)) {
31824+ err = copy_to_user(arg->path, seq->buf, seq->count);
31825+ seq->count = 0;
31826+ if (unlikely(err))
31827+ break;
31828+ } else {
31829+ err = -E2BIG;
31830+ goto out_seq;
31831+ }
31832+ }
31833+ if (unlikely(err))
31834+ err = -EFAULT;
31835+
31836+out_seq:
31837+ au_kfree_rcu(seq);
31838+out_buf:
31839+ free_page((unsigned long)buf);
31840+out:
31841+ si_read_unlock(sb);
31842+ return err;
31843+}
31844+
31845+long au_brinfo_ioctl(struct file *file, unsigned long arg)
31846+{
31847+ return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
31848+}
31849+
31850+#ifdef CONFIG_COMPAT
31851+long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
31852+{
31853+ return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
31854+}
31855+#endif
31856+
31857+/* ---------------------------------------------------------------------- */
31858+
31859+void sysaufs_br_init(struct au_branch *br)
31860+{
31861+ int i;
31862+ struct au_brsysfs *br_sysfs;
31863+ struct attribute *attr;
31864+
31865+ br_sysfs = br->br_sysfs;
31866+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
31867+ attr = &br_sysfs->attr;
31868+ sysfs_attr_init(attr);
31869+ attr->name = br_sysfs->name;
31870+ attr->mode = 0444;
31871+ br_sysfs++;
31872+ }
31873+}
31874+
31875+void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
31876+{
31877+ struct au_branch *br;
31878+ struct kobject *kobj;
31879+ struct au_brsysfs *br_sysfs;
31880+ int i;
31881+ aufs_bindex_t bbot;
31882+
31883+ if (!sysaufs_brs)
31884+ return;
31885+
31886+ kobj = &au_sbi(sb)->si_kobj;
31887+ bbot = au_sbbot(sb);
31888+ for (; bindex <= bbot; bindex++) {
31889+ br = au_sbr(sb, bindex);
31890+ br_sysfs = br->br_sysfs;
31891+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
31892+ sysfs_remove_file(kobj, &br_sysfs->attr);
31893+ br_sysfs++;
31894+ }
31895+ }
31896+}
31897+
31898+void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
31899+{
31900+ int err, i;
31901+ aufs_bindex_t bbot;
31902+ struct kobject *kobj;
31903+ struct au_branch *br;
31904+ struct au_brsysfs *br_sysfs;
31905+
31906+ if (!sysaufs_brs)
31907+ return;
31908+
31909+ kobj = &au_sbi(sb)->si_kobj;
31910+ bbot = au_sbbot(sb);
31911+ for (; bindex <= bbot; bindex++) {
31912+ br = au_sbr(sb, bindex);
31913+ br_sysfs = br->br_sysfs;
31914+ snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
31915+ SysaufsBr_PREFIX "%d", bindex);
31916+ snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
31917+ SysaufsBrid_PREFIX "%d", bindex);
31918+ for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
31919+ err = sysfs_create_file(kobj, &br_sysfs->attr);
31920+ if (unlikely(err))
31921+ pr_warn("failed %s under sysfs(%d)\n",
31922+ br_sysfs->name, err);
31923+ br_sysfs++;
31924+ }
31925+ }
31926+}
31927diff -urN /usr/share/empty/fs/aufs/sysrq.c linux/fs/aufs/sysrq.c
31928--- /usr/share/empty/fs/aufs/sysrq.c 1970-01-01 01:00:00.000000000 +0100
31929+++ linux/fs/aufs/sysrq.c 2022-11-05 23:02:18.969222617 +0100
31930@@ -0,0 +1,149 @@
31931+// SPDX-License-Identifier: GPL-2.0
31932+/*
31933+ * Copyright (C) 2005-2022 Junjiro R. Okajima
31934+ *
31935+ * This program is free software; you can redistribute it and/or modify
31936+ * it under the terms of the GNU General Public License as published by
31937+ * the Free Software Foundation; either version 2 of the License, or
31938+ * (at your option) any later version.
31939+ *
31940+ * This program is distributed in the hope that it will be useful,
31941+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
31942+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31943+ * GNU General Public License for more details.
31944+ *
31945+ * You should have received a copy of the GNU General Public License
31946+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
31947+ */
31948+
31949+/*
31950+ * magic sysrq handler
31951+ */
31952+
31953+/* #include <linux/sysrq.h> */
31954+#include <linux/writeback.h>
31955+#include "aufs.h"
31956+
31957+/* ---------------------------------------------------------------------- */
31958+
31959+static void sysrq_sb(struct super_block *sb)
31960+{
31961+ char *plevel;
31962+ struct au_sbinfo *sbinfo;
31963+ struct file *file;
31964+ struct hlist_bl_head *files;
31965+ struct hlist_bl_node *pos;
31966+ struct au_finfo *finfo;
31967+ struct inode *i;
31968+
31969+ plevel = au_plevel;
31970+ au_plevel = KERN_WARNING;
31971+
31972+ /* since we define pr_fmt, call printk directly */
31973+#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
31974+
31975+ sbinfo = au_sbi(sb);
31976+ printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
31977+ pr("superblock\n");
31978+ au_dpri_sb(sb);
31979+
31980+#if 0 /* reserved */
31981+ do {
31982+ int err, i, j, ndentry;
31983+ struct au_dcsub_pages dpages;
31984+ struct au_dpage *dpage;
31985+
31986+ err = au_dpages_init(&dpages, GFP_ATOMIC);
31987+ if (unlikely(err))
31988+ break;
31989+ err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
31990+ if (!err)
31991+ for (i = 0; i < dpages.ndpage; i++) {
31992+ dpage = dpages.dpages + i;
31993+ ndentry = dpage->ndentry;
31994+ for (j = 0; j < ndentry; j++)
31995+ au_dpri_dentry(dpage->dentries[j]);
31996+ }
31997+ au_dpages_free(&dpages);
31998+ } while (0);
31999+#endif
32000+
32001+ pr("isolated inode\n");
32002+ spin_lock(&sb->s_inode_list_lock);
32003+ list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
32004+ spin_lock(&i->i_lock);
32005+ if (hlist_empty(&i->i_dentry))
32006+ au_dpri_inode(i);
32007+ spin_unlock(&i->i_lock);
32008+ }
32009+ spin_unlock(&sb->s_inode_list_lock);
32010+
32011+ pr("files\n");
32012+ files = &au_sbi(sb)->si_files;
32013+ hlist_bl_lock(files);
32014+ hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
32015+ umode_t mode;
32016+
32017+ file = finfo->fi_file;
32018+ mode = file_inode(file)->i_mode;
32019+ if (!special_file(mode))
32020+ au_dpri_file(file);
32021+ }
32022+ hlist_bl_unlock(files);
32023+ pr("done\n");
32024+
32025+#undef pr
32026+ au_plevel = plevel;
32027+}
32028+
32029+/* ---------------------------------------------------------------------- */
32030+
32031+/* module parameter */
32032+static char *aufs_sysrq_key = "a";
32033+module_param_named(sysrq, aufs_sysrq_key, charp, 0444);
32034+MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
32035+
32036+static void au_sysrq(int key __maybe_unused)
32037+{
32038+ struct au_sbinfo *sbinfo;
32039+ struct hlist_bl_node *pos;
32040+
32041+ lockdep_off();
32042+ au_sbilist_lock();
32043+ hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
32044+ sysrq_sb(sbinfo->si_sb);
32045+ au_sbilist_unlock();
32046+ lockdep_on();
32047+}
32048+
32049+static struct sysrq_key_op au_sysrq_op = {
32050+ .handler = au_sysrq,
32051+ .help_msg = "Aufs",
32052+ .action_msg = "Aufs",
32053+ .enable_mask = SYSRQ_ENABLE_DUMP
32054+};
32055+
32056+/* ---------------------------------------------------------------------- */
32057+
32058+int __init au_sysrq_init(void)
32059+{
32060+ int err;
32061+ char key;
32062+
32063+ err = -1;
32064+ key = *aufs_sysrq_key;
32065+ if ('a' <= key && key <= 'z')
32066+ err = register_sysrq_key(key, &au_sysrq_op);
32067+ if (unlikely(err))
32068+ pr_err("err %d, sysrq=%c\n", err, key);
32069+ return err;
32070+}
32071+
32072+void au_sysrq_fin(void)
32073+{
32074+ int err;
32075+
32076+ err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
32077+ if (unlikely(err))
32078+ pr_err("err %d (ignored)\n", err);
32079+}
32080diff -urN /usr/share/empty/fs/aufs/vdir.c linux/fs/aufs/vdir.c
32081--- /usr/share/empty/fs/aufs/vdir.c 1970-01-01 01:00:00.000000000 +0100
32082+++ linux/fs/aufs/vdir.c 2022-12-17 09:21:34.799855195 +0100
32083@@ -0,0 +1,896 @@
32084+// SPDX-License-Identifier: GPL-2.0
32085+/*
32086+ * Copyright (C) 2005-2022 Junjiro R. Okajima
32087+ *
32088+ * This program is free software; you can redistribute it and/or modify
32089+ * it under the terms of the GNU General Public License as published by
32090+ * the Free Software Foundation; either version 2 of the License, or
32091+ * (at your option) any later version.
32092+ *
32093+ * This program is distributed in the hope that it will be useful,
32094+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32095+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32096+ * GNU General Public License for more details.
32097+ *
32098+ * You should have received a copy of the GNU General Public License
32099+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
32100+ */
32101+
32102+/*
32103+ * virtual or vertical directory
32104+ */
32105+
32106+#include <linux/iversion.h>
32107+#include "aufs.h"
32108+
32109+static unsigned int calc_size(int nlen)
32110+{
32111+ return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
32112+}
32113+
32114+static int set_deblk_end(union au_vdir_deblk_p *p,
32115+ union au_vdir_deblk_p *deblk_end)
32116+{
32117+ if (calc_size(0) <= deblk_end->deblk - p->deblk) {
32118+ p->de->de_str.len = 0;
32119+ /* smp_mb(); */
32120+ return 0;
32121+ }
32122+ return -1; /* error */
32123+}
32124+
32125+/* returns true or false */
32126+static int is_deblk_end(union au_vdir_deblk_p *p,
32127+ union au_vdir_deblk_p *deblk_end)
32128+{
32129+ if (calc_size(0) <= deblk_end->deblk - p->deblk)
32130+ return !p->de->de_str.len;
32131+ return 1;
32132+}
32133+
32134+static unsigned char *last_deblk(struct au_vdir *vdir)
32135+{
32136+ return vdir->vd_deblk[vdir->vd_nblk - 1];
32137+}
32138+
32139+/* ---------------------------------------------------------------------- */
32140+
32141+/* estimate the appropriate size for name hash table */
32142+unsigned int au_rdhash_est(loff_t sz)
32143+{
32144+ unsigned int n;
32145+
32146+ n = UINT_MAX;
32147+ sz >>= 10;
32148+ if (sz < n)
32149+ n = sz;
32150+ if (sz < AUFS_RDHASH_DEF)
32151+ n = AUFS_RDHASH_DEF;
32152+ /* pr_info("n %u\n", n); */
32153+ return n;
32154+}
32155+
32156+/*
32157+ * the allocated memory has to be freed by
32158+ * au_nhash_wh_free() or au_nhash_de_free().
32159+ */
32160+int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
32161+{
32162+ struct hlist_head *head;
32163+ unsigned int u;
32164+ size_t sz;
32165+
32166+ sz = sizeof(*nhash->nh_head) * num_hash;
32167+ head = kmalloc(sz, gfp);
32168+ if (head) {
32169+ nhash->nh_num = num_hash;
32170+ nhash->nh_head = head;
32171+ for (u = 0; u < num_hash; u++)
32172+ INIT_HLIST_HEAD(head++);
32173+ return 0; /* success */
32174+ }
32175+
32176+ return -ENOMEM;
32177+}
32178+
32179+static void nhash_count(struct hlist_head *head)
32180+{
32181+#if 0 /* debugging */
32182+ unsigned long n;
32183+ struct hlist_node *pos;
32184+
32185+ n = 0;
32186+ hlist_for_each(pos, head)
32187+ n++;
32188+ pr_info("%lu\n", n);
32189+#endif
32190+}
32191+
32192+static void au_nhash_wh_do_free(struct hlist_head *head)
32193+{
32194+ struct au_vdir_wh *pos;
32195+ struct hlist_node *node;
32196+
32197+ hlist_for_each_entry_safe(pos, node, head, wh_hash)
32198+ au_kfree_rcu(pos);
32199+}
32200+
32201+static void au_nhash_de_do_free(struct hlist_head *head)
32202+{
32203+ struct au_vdir_dehstr *pos;
32204+ struct hlist_node *node;
32205+
32206+ hlist_for_each_entry_safe(pos, node, head, hash)
32207+ au_cache_free_vdir_dehstr(pos);
32208+}
32209+
32210+static void au_nhash_do_free(struct au_nhash *nhash,
32211+ void (*free)(struct hlist_head *head))
32212+{
32213+ unsigned int n;
32214+ struct hlist_head *head;
32215+
32216+ n = nhash->nh_num;
32217+ if (!n)
32218+ return;
32219+
32220+ head = nhash->nh_head;
32221+ while (n-- > 0) {
32222+ nhash_count(head);
32223+ free(head++);
32224+ }
32225+ au_kfree_try_rcu(nhash->nh_head);
32226+}
32227+
32228+void au_nhash_wh_free(struct au_nhash *whlist)
32229+{
32230+ au_nhash_do_free(whlist, au_nhash_wh_do_free);
32231+}
32232+
32233+static void au_nhash_de_free(struct au_nhash *delist)
32234+{
32235+ au_nhash_do_free(delist, au_nhash_de_do_free);
32236+}
32237+
32238+/* ---------------------------------------------------------------------- */
32239+
32240+int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
32241+ int limit)
32242+{
32243+ int num;
32244+ unsigned int u, n;
32245+ struct hlist_head *head;
32246+ struct au_vdir_wh *pos;
32247+
32248+ num = 0;
32249+ n = whlist->nh_num;
32250+ head = whlist->nh_head;
32251+ for (u = 0; u < n; u++, head++)
32252+ hlist_for_each_entry(pos, head, wh_hash)
32253+ if (pos->wh_bindex == btgt && ++num > limit)
32254+ return 1;
32255+ return 0;
32256+}
32257+
32258+static struct hlist_head *au_name_hash(struct au_nhash *nhash,
32259+ unsigned char *name,
32260+ unsigned int len)
32261+{
32262+ unsigned int v;
32263+ /* const unsigned int magic_bit = 12; */
32264+
32265+ AuDebugOn(!nhash->nh_num || !nhash->nh_head);
32266+
32267+ v = 0;
32268+ if (len > 8)
32269+ len = 8;
32270+ while (len--)
32271+ v += *name++;
32272+ /* v = hash_long(v, magic_bit); */
32273+ v %= nhash->nh_num;
32274+ return nhash->nh_head + v;
32275+}
32276+
32277+static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
32278+ int nlen)
32279+{
32280+ return str->len == nlen && !memcmp(str->name, name, nlen);
32281+}
32282+
32283+/* returns found or not */
32284+int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
32285+{
32286+ struct hlist_head *head;
32287+ struct au_vdir_wh *pos;
32288+ struct au_vdir_destr *str;
32289+
32290+ head = au_name_hash(whlist, name, nlen);
32291+ hlist_for_each_entry(pos, head, wh_hash) {
32292+ str = &pos->wh_str;
32293+ AuDbg("%.*s\n", str->len, str->name);
32294+ if (au_nhash_test_name(str, name, nlen))
32295+ return 1;
32296+ }
32297+ return 0;
32298+}
32299+
32300+/* returns found(true) or not */
32301+static int test_known(struct au_nhash *delist, char *name, int nlen)
32302+{
32303+ struct hlist_head *head;
32304+ struct au_vdir_dehstr *pos;
32305+ struct au_vdir_destr *str;
32306+
32307+ head = au_name_hash(delist, name, nlen);
32308+ hlist_for_each_entry(pos, head, hash) {
32309+ str = pos->str;
32310+ AuDbg("%.*s\n", str->len, str->name);
32311+ if (au_nhash_test_name(str, name, nlen))
32312+ return 1;
32313+ }
32314+ return 0;
32315+}
32316+
32317+static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
32318+ unsigned char d_type)
32319+{
32320+#ifdef CONFIG_AUFS_SHWH
32321+ wh->wh_ino = ino;
32322+ wh->wh_type = d_type;
32323+#endif
32324+}
32325+
32326+/* ---------------------------------------------------------------------- */
32327+
32328+int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
32329+ unsigned int d_type, aufs_bindex_t bindex,
32330+ unsigned char shwh)
32331+{
32332+ int err;
32333+ struct au_vdir_destr *str;
32334+ struct au_vdir_wh *wh;
32335+
32336+ AuDbg("%.*s\n", nlen, name);
32337+ AuDebugOn(!whlist->nh_num || !whlist->nh_head);
32338+
32339+ err = -ENOMEM;
32340+ wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
32341+ if (unlikely(!wh))
32342+ goto out;
32343+
32344+ err = 0;
32345+ wh->wh_bindex = bindex;
32346+ if (shwh)
32347+ au_shwh_init_wh(wh, ino, d_type);
32348+ str = &wh->wh_str;
32349+ str->len = nlen;
32350+ memcpy(str->name, name, nlen);
32351+ hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
32352+ /* smp_mb(); */
32353+
32354+out:
32355+ return err;
32356+}
32357+
32358+static int append_deblk(struct au_vdir *vdir)
32359+{
32360+ int err;
32361+ unsigned long ul;
32362+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
32363+ union au_vdir_deblk_p p, deblk_end;
32364+ unsigned char **o;
32365+
32366+ err = -ENOMEM;
32367+ o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
32368+ GFP_NOFS, /*may_shrink*/0);
32369+ if (unlikely(!o))
32370+ goto out;
32371+
32372+ vdir->vd_deblk = o;
32373+ p.deblk = kmalloc(deblk_sz, GFP_NOFS);
32374+ if (p.deblk) {
32375+ ul = vdir->vd_nblk++;
32376+ vdir->vd_deblk[ul] = p.deblk;
32377+ vdir->vd_last.ul = ul;
32378+ vdir->vd_last.p.deblk = p.deblk;
32379+ deblk_end.deblk = p.deblk + deblk_sz;
32380+ err = set_deblk_end(&p, &deblk_end);
32381+ }
32382+
32383+out:
32384+ return err;
32385+}
32386+
32387+static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
32388+ unsigned int d_type, struct au_nhash *delist)
32389+{
32390+ int err;
32391+ unsigned int sz;
32392+ const unsigned int deblk_sz = vdir->vd_deblk_sz;
32393+ union au_vdir_deblk_p p, *room, deblk_end;
32394+ struct au_vdir_dehstr *dehstr;
32395+
32396+ p.deblk = last_deblk(vdir);
32397+ deblk_end.deblk = p.deblk + deblk_sz;
32398+ room = &vdir->vd_last.p;
32399+ AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
32400+ || !is_deblk_end(room, &deblk_end));
32401+
32402+ sz = calc_size(nlen);
32403+ if (unlikely(sz > deblk_end.deblk - room->deblk)) {
32404+ err = append_deblk(vdir);
32405+ if (unlikely(err))
32406+ goto out;
32407+
32408+ p.deblk = last_deblk(vdir);
32409+ deblk_end.deblk = p.deblk + deblk_sz;
32410+ /* smp_mb(); */
32411+ AuDebugOn(room->deblk != p.deblk);
32412+ }
32413+
32414+ err = -ENOMEM;
32415+ dehstr = au_cache_alloc_vdir_dehstr();
32416+ if (unlikely(!dehstr))
32417+ goto out;
32418+
32419+ dehstr->str = &room->de->de_str;
32420+ hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
32421+ room->de->de_ino = ino;
32422+ room->de->de_type = d_type;
32423+ room->de->de_str.len = nlen;
32424+ memcpy(room->de->de_str.name, name, nlen);
32425+
32426+ err = 0;
32427+ room->deblk += sz;
32428+ if (unlikely(set_deblk_end(room, &deblk_end)))
32429+ err = append_deblk(vdir);
32430+ /* smp_mb(); */
32431+
32432+out:
32433+ return err;
32434+}
32435+
32436+/* ---------------------------------------------------------------------- */
32437+
32438+void au_vdir_free(struct au_vdir *vdir)
32439+{
32440+ unsigned char **deblk;
32441+
32442+ deblk = vdir->vd_deblk;
32443+ while (vdir->vd_nblk--)
32444+ au_kfree_try_rcu(*deblk++);
32445+ au_kfree_try_rcu(vdir->vd_deblk);
32446+ au_cache_free_vdir(vdir);
32447+}
32448+
32449+static struct au_vdir *alloc_vdir(struct file *file)
32450+{
32451+ struct au_vdir *vdir;
32452+ struct super_block *sb;
32453+ int err;
32454+
32455+ sb = file->f_path.dentry->d_sb;
32456+ SiMustAnyLock(sb);
32457+
32458+ err = -ENOMEM;
32459+ vdir = au_cache_alloc_vdir();
32460+ if (unlikely(!vdir))
32461+ goto out;
32462+
32463+ vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
32464+ if (unlikely(!vdir->vd_deblk))
32465+ goto out_free;
32466+
32467+ vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
32468+ if (!vdir->vd_deblk_sz) {
32469+ /* estimate the appropriate size for deblk */
32470+ vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
32471+ /* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
32472+ }
32473+ vdir->vd_nblk = 0;
32474+ vdir->vd_version = 0;
32475+ vdir->vd_jiffy = 0;
32476+ err = append_deblk(vdir);
32477+ if (!err)
32478+ return vdir; /* success */
32479+
32480+ au_kfree_try_rcu(vdir->vd_deblk);
32481+
32482+out_free:
32483+ au_cache_free_vdir(vdir);
32484+out:
32485+ vdir = ERR_PTR(err);
32486+ return vdir;
32487+}
32488+
32489+static int reinit_vdir(struct au_vdir *vdir)
32490+{
32491+ int err;
32492+ union au_vdir_deblk_p p, deblk_end;
32493+
32494+ while (vdir->vd_nblk > 1) {
32495+ au_kfree_try_rcu(vdir->vd_deblk[vdir->vd_nblk - 1]);
32496+ /* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
32497+ vdir->vd_nblk--;
32498+ }
32499+ p.deblk = vdir->vd_deblk[0];
32500+ deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
32501+ err = set_deblk_end(&p, &deblk_end);
32502+ /* keep vd_dblk_sz */
32503+ vdir->vd_last.ul = 0;
32504+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
32505+ vdir->vd_version = 0;
32506+ vdir->vd_jiffy = 0;
32507+ /* smp_mb(); */
32508+ return err;
32509+}
32510+
32511+/* ---------------------------------------------------------------------- */
32512+
32513+#define AuFillVdir_CALLED 1
32514+#define AuFillVdir_WHABLE (1 << 1)
32515+#define AuFillVdir_SHWH (1 << 2)
32516+#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
32517+#define au_fset_fillvdir(flags, name) \
32518+ do { (flags) |= AuFillVdir_##name; } while (0)
32519+#define au_fclr_fillvdir(flags, name) \
32520+ do { (flags) &= ~AuFillVdir_##name; } while (0)
32521+
32522+#ifndef CONFIG_AUFS_SHWH
32523+#undef AuFillVdir_SHWH
32524+#define AuFillVdir_SHWH 0
32525+#endif
32526+
32527+struct fillvdir_arg {
32528+ struct dir_context ctx;
32529+ struct file *file;
32530+ struct au_vdir *vdir;
32531+ struct au_nhash delist;
32532+ struct au_nhash whlist;
32533+ aufs_bindex_t bindex;
32534+ unsigned int flags;
32535+ int err;
32536+};
32537+
32538+static bool fillvdir(struct dir_context *ctx, const char *__name, int nlen,
32539+ loff_t offset __maybe_unused, u64 h_ino,
32540+ unsigned int d_type)
32541+{
32542+ struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
32543+ char *name = (void *)__name;
32544+ struct super_block *sb;
32545+ ino_t ino;
32546+ const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
32547+
32548+ arg->err = 0;
32549+ sb = arg->file->f_path.dentry->d_sb;
32550+ au_fset_fillvdir(arg->flags, CALLED);
32551+ /* smp_mb(); */
32552+ if (nlen <= AUFS_WH_PFX_LEN
32553+ || memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
32554+ if (test_known(&arg->delist, name, nlen)
32555+ || au_nhash_test_known_wh(&arg->whlist, name, nlen))
32556+ goto out; /* already exists or whiteouted */
32557+
32558+ arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
32559+ if (!arg->err) {
32560+ if (unlikely(nlen > AUFS_MAX_NAMELEN))
32561+ d_type = DT_UNKNOWN;
32562+ arg->err = append_de(arg->vdir, name, nlen, ino,
32563+ d_type, &arg->delist);
32564+ }
32565+ } else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
32566+ name += AUFS_WH_PFX_LEN;
32567+ nlen -= AUFS_WH_PFX_LEN;
32568+ if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
32569+ goto out; /* already whiteouted */
32570+
32571+ ino = 0; /* just to suppress a warning */
32572+ if (shwh)
32573+ arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
32574+ &ino);
32575+ if (!arg->err) {
32576+ if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
32577+ d_type = DT_UNKNOWN;
32578+ arg->err = au_nhash_append_wh
32579+ (&arg->whlist, name, nlen, ino, d_type,
32580+ arg->bindex, shwh);
32581+ }
32582+ }
32583+
32584+out:
32585+ if (!arg->err)
32586+ arg->vdir->vd_jiffy = jiffies;
32587+ /* smp_mb(); */
32588+ AuTraceErr(arg->err);
32589+ return !arg->err;
32590+}
32591+
32592+static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
32593+ struct au_nhash *whlist, struct au_nhash *delist)
32594+{
32595+#ifdef CONFIG_AUFS_SHWH
32596+ int err;
32597+ unsigned int nh, u;
32598+ struct hlist_head *head;
32599+ struct au_vdir_wh *pos;
32600+ struct hlist_node *n;
32601+ char *p, *o;
32602+ struct au_vdir_destr *destr;
32603+
32604+ AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
32605+
32606+ err = -ENOMEM;
32607+ o = p = (void *)__get_free_page(GFP_NOFS);
32608+ if (unlikely(!p))
32609+ goto out;
32610+
32611+ err = 0;
32612+ nh = whlist->nh_num;
32613+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
32614+ p += AUFS_WH_PFX_LEN;
32615+ for (u = 0; u < nh; u++) {
32616+ head = whlist->nh_head + u;
32617+ hlist_for_each_entry_safe(pos, n, head, wh_hash) {
32618+ destr = &pos->wh_str;
32619+ memcpy(p, destr->name, destr->len);
32620+ err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
32621+ pos->wh_ino, pos->wh_type, delist);
32622+ if (unlikely(err))
32623+ break;
32624+ }
32625+ }
32626+
32627+ free_page((unsigned long)o);
32628+
32629+out:
32630+ AuTraceErr(err);
32631+ return err;
32632+#else
32633+ return 0;
32634+#endif
32635+}
32636+
32637+static int au_do_read_vdir(struct fillvdir_arg *arg)
32638+{
32639+ int err;
32640+ unsigned int rdhash;
32641+ loff_t offset;
32642+ aufs_bindex_t bbot, bindex, btop;
32643+ unsigned char shwh;
32644+ struct file *hf, *file;
32645+ struct super_block *sb;
32646+
32647+ file = arg->file;
32648+ sb = file->f_path.dentry->d_sb;
32649+ SiMustAnyLock(sb);
32650+
32651+ rdhash = au_sbi(sb)->si_rdhash;
32652+ if (!rdhash)
32653+ rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
32654+ err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
32655+ if (unlikely(err))
32656+ goto out;
32657+ err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
32658+ if (unlikely(err))
32659+ goto out_delist;
32660+
32661+ err = 0;
32662+ arg->flags = 0;
32663+ shwh = 0;
32664+ if (au_opt_test(au_mntflags(sb), SHWH)) {
32665+ shwh = 1;
32666+ au_fset_fillvdir(arg->flags, SHWH);
32667+ }
32668+ btop = au_fbtop(file);
32669+ bbot = au_fbbot_dir(file);
32670+ for (bindex = btop; !err && bindex <= bbot; bindex++) {
32671+ hf = au_hf_dir(file, bindex);
32672+ if (!hf)
32673+ continue;
32674+
32675+ offset = vfsub_llseek(hf, 0, SEEK_SET);
32676+ err = offset;
32677+ if (unlikely(offset))
32678+ break;
32679+
32680+ arg->bindex = bindex;
32681+ au_fclr_fillvdir(arg->flags, WHABLE);
32682+ if (shwh
32683+ || (bindex != bbot
32684+ && au_br_whable(au_sbr_perm(sb, bindex))))
32685+ au_fset_fillvdir(arg->flags, WHABLE);
32686+ do {
32687+ arg->err = 0;
32688+ au_fclr_fillvdir(arg->flags, CALLED);
32689+ /* smp_mb(); */
32690+ err = vfsub_iterate_dir(hf, &arg->ctx);
32691+ if (err >= 0)
32692+ err = arg->err;
32693+ } while (!err && au_ftest_fillvdir(arg->flags, CALLED));
32694+
32695+ /*
32696+ * dir_relax() may be good for concurrency, but aufs should not
32697+ * use it since it will cause a lockdep problem.
32698+ */
32699+ }
32700+
32701+ if (!err && shwh)
32702+ err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
32703+
32704+ au_nhash_wh_free(&arg->whlist);
32705+
32706+out_delist:
32707+ au_nhash_de_free(&arg->delist);
32708+out:
32709+ return err;
32710+}
32711+
32712+static int read_vdir(struct file *file, int may_read)
32713+{
32714+ int err;
32715+ unsigned long expire;
32716+ unsigned char do_read;
32717+ struct fillvdir_arg arg = {
32718+ .ctx = {
32719+ .actor = fillvdir
32720+ }
32721+ };
32722+ struct inode *inode;
32723+ struct au_vdir *vdir, *allocated;
32724+
32725+ err = 0;
32726+ inode = file_inode(file);
32727+ IMustLock(inode);
32728+ IiMustWriteLock(inode);
32729+ SiMustAnyLock(inode->i_sb);
32730+
32731+ allocated = NULL;
32732+ do_read = 0;
32733+ expire = au_sbi(inode->i_sb)->si_rdcache;
32734+ vdir = au_ivdir(inode);
32735+ if (!vdir) {
32736+ do_read = 1;
32737+ vdir = alloc_vdir(file);
32738+ err = PTR_ERR(vdir);
32739+ if (IS_ERR(vdir))
32740+ goto out;
32741+ err = 0;
32742+ allocated = vdir;
32743+ } else if (may_read
32744+ && (!inode_eq_iversion(inode, vdir->vd_version)
32745+ || time_after(jiffies, vdir->vd_jiffy + expire))) {
32746+ do_read = 1;
32747+ err = reinit_vdir(vdir);
32748+ if (unlikely(err))
32749+ goto out;
32750+ }
32751+
32752+ if (!do_read)
32753+ return 0; /* success */
32754+
32755+ arg.file = file;
32756+ arg.vdir = vdir;
32757+ err = au_do_read_vdir(&arg);
32758+ if (!err) {
32759+ /* file->f_pos = 0; */ /* todo: ctx->pos? */
32760+ vdir->vd_version = inode_query_iversion(inode);
32761+ vdir->vd_last.ul = 0;
32762+ vdir->vd_last.p.deblk = vdir->vd_deblk[0];
32763+ if (allocated)
32764+ au_set_ivdir(inode, allocated);
32765+ } else if (allocated)
32766+ au_vdir_free(allocated);
32767+
32768+out:
32769+ return err;
32770+}
32771+
32772+static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
32773+{
32774+ int err, rerr;
32775+ unsigned long ul, n;
32776+ const unsigned int deblk_sz = src->vd_deblk_sz;
32777+
32778+ AuDebugOn(tgt->vd_nblk != 1);
32779+
32780+ err = -ENOMEM;
32781+ if (tgt->vd_nblk < src->vd_nblk) {
32782+ unsigned char **p;
32783+
32784+ p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
32785+ GFP_NOFS, /*may_shrink*/0);
32786+ if (unlikely(!p))
32787+ goto out;
32788+ tgt->vd_deblk = p;
32789+ }
32790+
32791+ if (tgt->vd_deblk_sz != deblk_sz) {
32792+ unsigned char *p;
32793+
32794+ tgt->vd_deblk_sz = deblk_sz;
32795+ p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
32796+ /*may_shrink*/1);
32797+ if (unlikely(!p))
32798+ goto out;
32799+ tgt->vd_deblk[0] = p;
32800+ }
32801+ memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
32802+ tgt->vd_version = src->vd_version;
32803+ tgt->vd_jiffy = src->vd_jiffy;
32804+
32805+ n = src->vd_nblk;
32806+ for (ul = 1; ul < n; ul++) {
32807+ tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
32808+ GFP_NOFS);
32809+ if (unlikely(!tgt->vd_deblk[ul]))
32810+ goto out;
32811+ tgt->vd_nblk++;
32812+ }
32813+ tgt->vd_nblk = n;
32814+ tgt->vd_last.ul = tgt->vd_last.ul;
32815+ tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
32816+ tgt->vd_last.p.deblk += src->vd_last.p.deblk
32817+ - src->vd_deblk[src->vd_last.ul];
32818+ /* smp_mb(); */
32819+ return 0; /* success */
32820+
32821+out:
32822+ rerr = reinit_vdir(tgt);
32823+ BUG_ON(rerr);
32824+ return err;
32825+}
32826+
32827+int au_vdir_init(struct file *file)
32828+{
32829+ int err;
32830+ struct inode *inode;
32831+ struct au_vdir *vdir_cache, *allocated;
32832+
32833+ /* test file->f_pos here instead of ctx->pos */
32834+ err = read_vdir(file, !file->f_pos);
32835+ if (unlikely(err))
32836+ goto out;
32837+
32838+ allocated = NULL;
32839+ vdir_cache = au_fvdir_cache(file);
32840+ if (!vdir_cache) {
32841+ vdir_cache = alloc_vdir(file);
32842+ err = PTR_ERR(vdir_cache);
32843+ if (IS_ERR(vdir_cache))
32844+ goto out;
32845+ allocated = vdir_cache;
32846+ } else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
32847+ /* test file->f_pos here instead of ctx->pos */
32848+ err = reinit_vdir(vdir_cache);
32849+ if (unlikely(err))
32850+ goto out;
32851+ } else
32852+ return 0; /* success */
32853+
32854+ inode = file_inode(file);
32855+ err = copy_vdir(vdir_cache, au_ivdir(inode));
32856+ if (!err) {
32857+ file->f_version = inode_query_iversion(inode);
32858+ if (allocated)
32859+ au_set_fvdir_cache(file, allocated);
32860+ } else if (allocated)
32861+ au_vdir_free(allocated);
32862+
32863+out:
32864+ return err;
32865+}
32866+
32867+static loff_t calc_offset(struct au_vdir *vdir)
32868+{
32869+ loff_t offset;
32870+ union au_vdir_deblk_p p;
32871+
32872+ p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
32873+ offset = vdir->vd_last.p.deblk - p.deblk;
32874+ offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
32875+ return offset;
32876+}
32877+
32878+/* returns true or false */
32879+static int seek_vdir(struct file *file, struct dir_context *ctx)
32880+{
32881+ int valid;
32882+ unsigned int deblk_sz;
32883+ unsigned long ul, n;
32884+ loff_t offset;
32885+ union au_vdir_deblk_p p, deblk_end;
32886+ struct au_vdir *vdir_cache;
32887+
32888+ valid = 1;
32889+ vdir_cache = au_fvdir_cache(file);
32890+ offset = calc_offset(vdir_cache);
32891+ AuDbg("offset %lld\n", offset);
32892+ if (ctx->pos == offset)
32893+ goto out;
32894+
32895+ vdir_cache->vd_last.ul = 0;
32896+ vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
32897+ if (!ctx->pos)
32898+ goto out;
32899+
32900+ valid = 0;
32901+ deblk_sz = vdir_cache->vd_deblk_sz;
32902+ ul = div64_u64(ctx->pos, deblk_sz);
32903+ AuDbg("ul %lu\n", ul);
32904+ if (ul >= vdir_cache->vd_nblk)
32905+ goto out;
32906+
32907+ n = vdir_cache->vd_nblk;
32908+ for (; ul < n; ul++) {
32909+ p.deblk = vdir_cache->vd_deblk[ul];
32910+ deblk_end.deblk = p.deblk + deblk_sz;
32911+ offset = ul;
32912+ offset *= deblk_sz;
32913+ while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
32914+ unsigned int l;
32915+
32916+ l = calc_size(p.de->de_str.len);
32917+ offset += l;
32918+ p.deblk += l;
32919+ }
32920+ if (!is_deblk_end(&p, &deblk_end)) {
32921+ valid = 1;
32922+ vdir_cache->vd_last.ul = ul;
32923+ vdir_cache->vd_last.p = p;
32924+ break;
32925+ }
32926+ }
32927+
32928+out:
32929+ /* smp_mb(); */
32930+ if (!valid)
32931+ AuDbg("valid %d\n", !valid);
32932+ return valid;
32933+}
32934+
32935+int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
32936+{
32937+ unsigned int l, deblk_sz;
32938+ union au_vdir_deblk_p deblk_end;
32939+ struct au_vdir *vdir_cache;
32940+ struct au_vdir_de *de;
32941+
32942+ if (!seek_vdir(file, ctx))
32943+ return 0;
32944+
32945+ vdir_cache = au_fvdir_cache(file);
32946+ deblk_sz = vdir_cache->vd_deblk_sz;
32947+ while (1) {
32948+ deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
32949+ deblk_end.deblk += deblk_sz;
32950+ while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
32951+ de = vdir_cache->vd_last.p.de;
32952+ AuDbg("%.*s, off%lld, i%lu, dt%d\n",
32953+ de->de_str.len, de->de_str.name, ctx->pos,
32954+ (unsigned long)de->de_ino, de->de_type);
32955+ if (unlikely(!dir_emit(ctx, de->de_str.name,
32956+ de->de_str.len, de->de_ino,
32957+ de->de_type))) {
32958+ /* todo: ignore the error caused by udba? */
32959+ /* return err; */
32960+ return 0;
32961+ }
32962+
32963+ l = calc_size(de->de_str.len);
32964+ vdir_cache->vd_last.p.deblk += l;
32965+ ctx->pos += l;
32966+ }
32967+ if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
32968+ vdir_cache->vd_last.ul++;
32969+ vdir_cache->vd_last.p.deblk
32970+ = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
32971+ ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
32972+ continue;
32973+ }
32974+ break;
32975+ }
32976+
32977+ /* smp_mb(); */
32978+ return 0;
32979+}
32980diff -urN /usr/share/empty/fs/aufs/vfsub.c linux/fs/aufs/vfsub.c
32981--- /usr/share/empty/fs/aufs/vfsub.c 1970-01-01 01:00:00.000000000 +0100
32982+++ linux/fs/aufs/vfsub.c 2023-02-20 21:05:51.959693785 +0100
32983@@ -0,0 +1,918 @@
32984+// SPDX-License-Identifier: GPL-2.0
32985+/*
32986+ * Copyright (C) 2005-2022 Junjiro R. Okajima
32987+ *
32988+ * This program is free software; you can redistribute it and/or modify
32989+ * it under the terms of the GNU General Public License as published by
32990+ * the Free Software Foundation; either version 2 of the License, or
32991+ * (at your option) any later version.
32992+ *
32993+ * This program is distributed in the hope that it will be useful,
32994+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
32995+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32996+ * GNU General Public License for more details.
32997+ *
32998+ * You should have received a copy of the GNU General Public License
32999+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33000+ */
33001+
33002+/*
33003+ * sub-routines for VFS
33004+ */
33005+
33006+#include <linux/mnt_namespace.h>
33007+#include <linux/nsproxy.h>
33008+#include <linux/security.h>
33009+#include <linux/splice.h>
33010+#include "aufs.h"
33011+
33012+#ifdef CONFIG_AUFS_BR_FUSE
33013+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
33014+{
33015+ if (!au_test_fuse(h_sb) || !au_userns)
33016+ return 0;
33017+
33018+ return is_current_mnt_ns(mnt) ? 0 : -EACCES;
33019+}
33020+#endif
33021+
33022+int vfsub_sync_filesystem(struct super_block *h_sb)
33023+{
33024+ int err;
33025+
33026+ lockdep_off();
33027+ down_read(&h_sb->s_umount);
33028+ err = sync_filesystem(h_sb);
33029+ up_read(&h_sb->s_umount);
33030+ lockdep_on();
33031+
33032+ return err;
33033+}
33034+
33035+/* ---------------------------------------------------------------------- */
33036+
33037+int vfsub_update_h_iattr(struct path *h_path, int *did)
33038+{
33039+ int err;
33040+ struct kstat st;
33041+ struct super_block *h_sb;
33042+
33043+ /*
33044+ * Always needs h_path->mnt for LSM or FUSE branch.
33045+ */
33046+ AuDebugOn(!h_path->mnt);
33047+
33048+ /* for remote fs, leave work for its getattr or d_revalidate */
33049+ /* for bad i_attr fs, handle them in aufs_getattr() */
33050+ /* still some fs may acquire i_mutex. we need to skip them */
33051+ err = 0;
33052+ if (!did)
33053+ did = &err;
33054+ h_sb = h_path->dentry->d_sb;
33055+ *did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
33056+ if (*did)
33057+ err = vfsub_getattr(h_path, &st);
33058+
33059+ return err;
33060+}
33061+
33062+/* ---------------------------------------------------------------------- */
33063+
33064+struct file *vfsub_dentry_open(struct path *path, int flags)
33065+{
33066+ return dentry_open(path, flags /* | __FMODE_NONOTIFY */,
33067+ current_cred());
33068+}
33069+
33070+struct file *vfsub_filp_open(const char *path, int oflags, int mode)
33071+{
33072+ struct file *file;
33073+
33074+ lockdep_off();
33075+ file = filp_open(path,
33076+ oflags /* | __FMODE_NONOTIFY */,
33077+ mode);
33078+ lockdep_on();
33079+ if (IS_ERR(file))
33080+ goto out;
33081+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
33082+
33083+out:
33084+ return file;
33085+}
33086+
33087+/*
33088+ * Ideally this function should call VFS:do_last() in order to keep all its
33089+ * checkings. But it is very hard for aufs to regenerate several VFS internal
33090+ * structure such as nameidata. This is a second (or third) best approach.
33091+ * cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
33092+ */
33093+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
33094+ struct vfsub_aopen_args *args)
33095+{
33096+ int err;
33097+ struct au_branch *br = args->br;
33098+ struct file *file = args->file;
33099+ /* copied from linux/fs/namei.c:atomic_open() */
33100+ struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
33101+
33102+ IMustLock(dir);
33103+ AuDebugOn(!dir->i_op->atomic_open);
33104+
33105+ err = au_br_test_oflag(args->open_flag, br);
33106+ if (unlikely(err))
33107+ goto out;
33108+
33109+ au_lcnt_inc(&br->br_nfiles);
33110+ file->f_path.dentry = DENTRY_NOT_SET;
33111+ file->f_path.mnt = au_br_mnt(br);
33112+ AuDbg("%ps\n", dir->i_op->atomic_open);
33113+ err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
33114+ args->create_mode);
33115+ if (unlikely(err < 0)) {
33116+ au_lcnt_dec(&br->br_nfiles);
33117+ goto out;
33118+ }
33119+
33120+ /* temporary workaround for nfsv4 branch */
33121+ if (au_test_nfs(dir->i_sb))
33122+ nfs_mark_for_revalidate(dir);
33123+
33124+ if (file->f_mode & FMODE_CREATED)
33125+ fsnotify_create(dir, dentry);
33126+ if (!(file->f_mode & FMODE_OPENED)) {
33127+ au_lcnt_dec(&br->br_nfiles);
33128+ goto out;
33129+ }
33130+
33131+ /* todo: call VFS:may_open() here */
33132+ /* todo: ima_file_check() too? */
33133+ if (!err && (args->open_flag & __FMODE_EXEC))
33134+ err = deny_write_access(file);
33135+ if (!err)
33136+ fsnotify_open(file);
33137+ else
33138+ au_lcnt_dec(&br->br_nfiles);
33139+ /* note that the file is created and still opened */
33140+
33141+out:
33142+ return err;
33143+}
33144+
33145+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
33146+{
33147+ int err;
33148+
33149+ err = kern_path(name, flags, path);
33150+ if (!err && d_is_positive(path->dentry))
33151+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
33152+ return err;
33153+}
33154+
33155+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
33156+ struct path *ppath, int len)
33157+{
33158+ struct path path;
33159+
33160+ path.dentry = lookup_one_len_unlocked(name, ppath->dentry, len);
33161+ if (IS_ERR(path.dentry))
33162+ goto out;
33163+ if (d_is_positive(path.dentry)) {
33164+ path.mnt = ppath->mnt;
33165+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
33166+ }
33167+
33168+out:
33169+ AuTraceErrPtr(path.dentry);
33170+ return path.dentry;
33171+}
33172+
33173+struct dentry *vfsub_lookup_one_len(const char *name, struct path *ppath,
33174+ int len)
33175+{
33176+ struct path path;
33177+
33178+ /* VFS checks it too, but by WARN_ON_ONCE() */
33179+ IMustLock(d_inode(ppath->dentry));
33180+
33181+ path.dentry = lookup_one_len(name, ppath->dentry, len);
33182+ if (IS_ERR(path.dentry))
33183+ goto out;
33184+ if (d_is_positive(path.dentry)) {
33185+ path.mnt = ppath->mnt;
33186+ vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
33187+ }
33188+
33189+out:
33190+ AuTraceErrPtr(path.dentry);
33191+ return path.dentry;
33192+}
33193+
33194+void vfsub_call_lkup_one(void *args)
33195+{
33196+ struct vfsub_lkup_one_args *a = args;
33197+ *a->errp = vfsub_lkup_one(a->name, a->ppath);
33198+}
33199+
33200+/* ---------------------------------------------------------------------- */
33201+
33202+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
33203+ struct dentry *d2, struct au_hinode *hdir2)
33204+{
33205+ struct dentry *d;
33206+
33207+ lockdep_off();
33208+ d = lock_rename(d1, d2);
33209+ lockdep_on();
33210+ au_hn_suspend(hdir1);
33211+ if (hdir1 != hdir2)
33212+ au_hn_suspend(hdir2);
33213+
33214+ return d;
33215+}
33216+
33217+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
33218+ struct dentry *d2, struct au_hinode *hdir2)
33219+{
33220+ au_hn_resume(hdir1);
33221+ if (hdir1 != hdir2)
33222+ au_hn_resume(hdir2);
33223+ lockdep_off();
33224+ unlock_rename(d1, d2);
33225+ lockdep_on();
33226+}
33227+
33228+/* ---------------------------------------------------------------------- */
33229+
33230+int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
33231+{
33232+ int err;
33233+ struct dentry *d;
33234+ struct user_namespace *userns;
33235+
33236+ IMustLock(dir);
33237+
33238+ d = path->dentry;
33239+ path->dentry = d->d_parent;
33240+ err = security_path_mknod(path, d, mode, 0);
33241+ path->dentry = d;
33242+ if (unlikely(err))
33243+ goto out;
33244+ userns = mnt_user_ns(path->mnt);
33245+
33246+ lockdep_off();
33247+ err = vfs_create(userns, dir, path->dentry, mode, want_excl);
33248+ lockdep_on();
33249+ if (!err) {
33250+ struct path tmp = *path;
33251+ int did;
33252+
33253+ vfsub_update_h_iattr(&tmp, &did);
33254+ if (did) {
33255+ tmp.dentry = path->dentry->d_parent;
33256+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33257+ }
33258+ /*ignore*/
33259+ }
33260+
33261+out:
33262+ return err;
33263+}
33264+
33265+int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
33266+{
33267+ int err;
33268+ struct dentry *d;
33269+ struct user_namespace *userns;
33270+
33271+ IMustLock(dir);
33272+
33273+ d = path->dentry;
33274+ path->dentry = d->d_parent;
33275+ err = security_path_symlink(path, d, symname);
33276+ path->dentry = d;
33277+ if (unlikely(err))
33278+ goto out;
33279+ userns = mnt_user_ns(path->mnt);
33280+
33281+ lockdep_off();
33282+ err = vfs_symlink(userns, dir, path->dentry, symname);
33283+ lockdep_on();
33284+ if (!err) {
33285+ struct path tmp = *path;
33286+ int did;
33287+
33288+ vfsub_update_h_iattr(&tmp, &did);
33289+ if (did) {
33290+ tmp.dentry = path->dentry->d_parent;
33291+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33292+ }
33293+ /*ignore*/
33294+ }
33295+
33296+out:
33297+ return err;
33298+}
33299+
33300+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
33301+{
33302+ int err;
33303+ struct dentry *d;
33304+ struct user_namespace *userns;
33305+
33306+ IMustLock(dir);
33307+
33308+ d = path->dentry;
33309+ path->dentry = d->d_parent;
33310+ err = security_path_mknod(path, d, mode, new_encode_dev(dev));
33311+ path->dentry = d;
33312+ if (unlikely(err))
33313+ goto out;
33314+ userns = mnt_user_ns(path->mnt);
33315+
33316+ lockdep_off();
33317+ err = vfs_mknod(userns, dir, path->dentry, mode, dev);
33318+ lockdep_on();
33319+ if (!err) {
33320+ struct path tmp = *path;
33321+ int did;
33322+
33323+ vfsub_update_h_iattr(&tmp, &did);
33324+ if (did) {
33325+ tmp.dentry = path->dentry->d_parent;
33326+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33327+ }
33328+ /*ignore*/
33329+ }
33330+
33331+out:
33332+ return err;
33333+}
33334+
33335+static int au_test_nlink(struct inode *inode)
33336+{
33337+ const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
33338+
33339+ if (!au_test_fs_no_limit_nlink(inode->i_sb)
33340+ || inode->i_nlink < link_max)
33341+ return 0;
33342+ return -EMLINK;
33343+}
33344+
33345+int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
33346+ struct inode **delegated_inode)
33347+{
33348+ int err;
33349+ struct dentry *d;
33350+ struct user_namespace *userns;
33351+
33352+ IMustLock(dir);
33353+
33354+ err = au_test_nlink(d_inode(src_dentry));
33355+ if (unlikely(err))
33356+ return err;
33357+
33358+ /* we don't call may_linkat() */
33359+ d = path->dentry;
33360+ path->dentry = d->d_parent;
33361+ err = security_path_link(src_dentry, path, d);
33362+ path->dentry = d;
33363+ if (unlikely(err))
33364+ goto out;
33365+ userns = mnt_user_ns(path->mnt);
33366+
33367+ lockdep_off();
33368+ err = vfs_link(src_dentry, userns, dir, path->dentry, delegated_inode);
33369+ lockdep_on();
33370+ if (!err) {
33371+ struct path tmp = *path;
33372+ int did;
33373+
33374+ /* fuse has different memory inode for the same inumber */
33375+ vfsub_update_h_iattr(&tmp, &did);
33376+ if (did) {
33377+ tmp.dentry = path->dentry->d_parent;
33378+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33379+ tmp.dentry = src_dentry;
33380+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33381+ }
33382+ /*ignore*/
33383+ }
33384+
33385+out:
33386+ return err;
33387+}
33388+
33389+int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
33390+ struct inode *dir, struct path *path,
33391+ struct inode **delegated_inode, unsigned int flags)
33392+{
33393+ int err;
33394+ struct renamedata rd;
33395+ struct path tmp = {
33396+ .mnt = path->mnt
33397+ };
33398+ struct dentry *d;
33399+
33400+ IMustLock(dir);
33401+ IMustLock(src_dir);
33402+
33403+ d = path->dentry;
33404+ path->dentry = d->d_parent;
33405+ tmp.dentry = src_dentry->d_parent;
33406+ err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
33407+ path->dentry = d;
33408+ if (unlikely(err))
33409+ goto out;
33410+
33411+ rd.old_mnt_userns = mnt_user_ns(path->mnt);
33412+ rd.old_dir = src_dir;
33413+ rd.old_dentry = src_dentry;
33414+ rd.new_mnt_userns = rd.old_mnt_userns;
33415+ rd.new_dir = dir;
33416+ rd.new_dentry = path->dentry;
33417+ rd.delegated_inode = delegated_inode;
33418+ rd.flags = flags;
33419+ lockdep_off();
33420+ err = vfs_rename(&rd);
33421+ lockdep_on();
33422+ if (!err) {
33423+ int did;
33424+
33425+ tmp.dentry = d->d_parent;
33426+ vfsub_update_h_iattr(&tmp, &did);
33427+ if (did) {
33428+ tmp.dentry = src_dentry;
33429+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33430+ tmp.dentry = src_dentry->d_parent;
33431+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33432+ }
33433+ /*ignore*/
33434+ }
33435+
33436+out:
33437+ return err;
33438+}
33439+
33440+int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
33441+{
33442+ int err;
33443+ struct dentry *d;
33444+ struct user_namespace *userns;
33445+
33446+ IMustLock(dir);
33447+
33448+ d = path->dentry;
33449+ path->dentry = d->d_parent;
33450+ err = security_path_mkdir(path, d, mode);
33451+ path->dentry = d;
33452+ if (unlikely(err))
33453+ goto out;
33454+ userns = mnt_user_ns(path->mnt);
33455+
33456+ lockdep_off();
33457+ err = vfs_mkdir(userns, dir, path->dentry, mode);
33458+ lockdep_on();
33459+ if (!err) {
33460+ struct path tmp = *path;
33461+ int did;
33462+
33463+ vfsub_update_h_iattr(&tmp, &did);
33464+ if (did) {
33465+ tmp.dentry = path->dentry->d_parent;
33466+ vfsub_update_h_iattr(&tmp, /*did*/NULL);
33467+ }
33468+ /*ignore*/
33469+ }
33470+
33471+out:
33472+ return err;
33473+}
33474+
33475+int vfsub_rmdir(struct inode *dir, struct path *path)
33476+{
33477+ int err;
33478+ struct dentry *d;
33479+ struct user_namespace *userns;
33480+
33481+ IMustLock(dir);
33482+
33483+ d = path->dentry;
33484+ path->dentry = d->d_parent;
33485+ err = security_path_rmdir(path, d);
33486+ path->dentry = d;
33487+ if (unlikely(err))
33488+ goto out;
33489+ userns = mnt_user_ns(path->mnt);
33490+
33491+ lockdep_off();
33492+ err = vfs_rmdir(userns, dir, path->dentry);
33493+ lockdep_on();
33494+ if (!err) {
33495+ struct path tmp = {
33496+ .dentry = path->dentry->d_parent,
33497+ .mnt = path->mnt
33498+ };
33499+
33500+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
33501+ }
33502+
33503+out:
33504+ return err;
33505+}
33506+
33507+/* ---------------------------------------------------------------------- */
33508+
33509+/* todo: support mmap_sem? */
33510+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
33511+ loff_t *ppos)
33512+{
33513+ ssize_t err;
33514+
33515+ lockdep_off();
33516+ err = vfs_read(file, ubuf, count, ppos);
33517+ lockdep_on();
33518+ if (err >= 0)
33519+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
33520+ return err;
33521+}
33522+
33523+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
33524+ loff_t *ppos)
33525+{
33526+ ssize_t err;
33527+
33528+ lockdep_off();
33529+ err = kernel_read(file, kbuf, count, ppos);
33530+ lockdep_on();
33531+ AuTraceErr(err);
33532+ if (err >= 0)
33533+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
33534+ return err;
33535+}
33536+
33537+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
33538+ loff_t *ppos)
33539+{
33540+ ssize_t err;
33541+
33542+ lockdep_off();
33543+ err = vfs_write(file, ubuf, count, ppos);
33544+ lockdep_on();
33545+ if (err >= 0)
33546+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
33547+ return err;
33548+}
33549+
33550+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
33551+{
33552+ ssize_t err;
33553+
33554+ lockdep_off();
33555+ err = kernel_write(file, kbuf, count, ppos);
33556+ lockdep_on();
33557+ if (err >= 0)
33558+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
33559+ return err;
33560+}
33561+
33562+int vfsub_flush(struct file *file, fl_owner_t id)
33563+{
33564+ int err;
33565+
33566+ err = 0;
33567+ if (file->f_op->flush) {
33568+ if (!au_test_nfs(file->f_path.dentry->d_sb))
33569+ err = file->f_op->flush(file, id);
33570+ else {
33571+ lockdep_off();
33572+ err = file->f_op->flush(file, id);
33573+ lockdep_on();
33574+ }
33575+ if (!err)
33576+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
33577+ /*ignore*/
33578+ }
33579+ return err;
33580+}
33581+
33582+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
33583+{
33584+ int err;
33585+
33586+ AuDbg("%pD, ctx{%ps, %llu}\n", file, ctx->actor, ctx->pos);
33587+
33588+ lockdep_off();
33589+ err = iterate_dir(file, ctx);
33590+ lockdep_on();
33591+ if (err >= 0)
33592+ vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
33593+
33594+ return err;
33595+}
33596+
33597+long vfsub_splice_to(struct file *in, loff_t *ppos,
33598+ struct pipe_inode_info *pipe, size_t len,
33599+ unsigned int flags)
33600+{
33601+ long err;
33602+
33603+ lockdep_off();
33604+ err = do_splice_to(in, ppos, pipe, len, flags);
33605+ lockdep_on();
33606+ file_accessed(in);
33607+ if (err >= 0)
33608+ vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
33609+ return err;
33610+}
33611+
33612+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
33613+ loff_t *ppos, size_t len, unsigned int flags)
33614+{
33615+ long err;
33616+
33617+ lockdep_off();
33618+ err = do_splice_from(pipe, out, ppos, len, flags);
33619+ lockdep_on();
33620+ if (err >= 0)
33621+ vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
33622+ return err;
33623+}
33624+
33625+int vfsub_fsync(struct file *file, struct path *path, int datasync)
33626+{
33627+ int err;
33628+
33629+ /* file can be NULL */
33630+ lockdep_off();
33631+ err = vfs_fsync(file, datasync);
33632+ lockdep_on();
33633+ if (!err) {
33634+ if (!path) {
33635+ AuDebugOn(!file);
33636+ path = &file->f_path;
33637+ }
33638+ vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
33639+ }
33640+ return err;
33641+}
33642+
33643+/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
33644+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
33645+ struct file *h_file)
33646+{
33647+ int err;
33648+ struct inode *h_inode;
33649+ struct super_block *h_sb;
33650+ struct user_namespace *h_userns;
33651+
33652+ if (!h_file) {
33653+ err = vfsub_truncate(h_path, length);
33654+ goto out;
33655+ }
33656+
33657+ h_inode = d_inode(h_path->dentry);
33658+ h_sb = h_inode->i_sb;
33659+ lockdep_off();
33660+ sb_start_write(h_sb);
33661+ lockdep_on();
33662+ err = security_file_truncate(h_file);
33663+ if (!err) {
33664+ h_userns = mnt_user_ns(h_path->mnt);
33665+ lockdep_off();
33666+ err = do_truncate(h_userns, h_path->dentry, length, attr,
33667+ h_file);
33668+ lockdep_on();
33669+ }
33670+ lockdep_off();
33671+ sb_end_write(h_sb);
33672+ lockdep_on();
33673+
33674+out:
33675+ return err;
33676+}
33677+
33678+/* ---------------------------------------------------------------------- */
33679+
33680+struct au_vfsub_mkdir_args {
33681+ int *errp;
33682+ struct inode *dir;
33683+ struct path *path;
33684+ int mode;
33685+};
33686+
33687+static void au_call_vfsub_mkdir(void *args)
33688+{
33689+ struct au_vfsub_mkdir_args *a = args;
33690+ *a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
33691+}
33692+
33693+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
33694+{
33695+ int err, do_sio, wkq_err;
33696+ struct user_namespace *userns;
33697+
33698+ userns = mnt_user_ns(path->mnt);
33699+ do_sio = au_test_h_perm_sio(userns, dir, MAY_EXEC | MAY_WRITE);
33700+ if (!do_sio) {
33701+ lockdep_off();
33702+ err = vfsub_mkdir(dir, path, mode);
33703+ lockdep_on();
33704+ } else {
33705+ struct au_vfsub_mkdir_args args = {
33706+ .errp = &err,
33707+ .dir = dir,
33708+ .path = path,
33709+ .mode = mode
33710+ };
33711+ wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
33712+ if (unlikely(wkq_err))
33713+ err = wkq_err;
33714+ }
33715+
33716+ return err;
33717+}
33718+
33719+struct au_vfsub_rmdir_args {
33720+ int *errp;
33721+ struct inode *dir;
33722+ struct path *path;
33723+};
33724+
33725+static void au_call_vfsub_rmdir(void *args)
33726+{
33727+ struct au_vfsub_rmdir_args *a = args;
33728+ *a->errp = vfsub_rmdir(a->dir, a->path);
33729+}
33730+
33731+int vfsub_sio_rmdir(struct inode *dir, struct path *path)
33732+{
33733+ int err, do_sio, wkq_err;
33734+ struct user_namespace *userns;
33735+
33736+ userns = mnt_user_ns(path->mnt);
33737+ do_sio = au_test_h_perm_sio(userns, dir, MAY_EXEC | MAY_WRITE);
33738+ if (!do_sio) {
33739+ lockdep_off();
33740+ err = vfsub_rmdir(dir, path);
33741+ lockdep_on();
33742+ } else {
33743+ struct au_vfsub_rmdir_args args = {
33744+ .errp = &err,
33745+ .dir = dir,
33746+ .path = path
33747+ };
33748+ wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
33749+ if (unlikely(wkq_err))
33750+ err = wkq_err;
33751+ }
33752+
33753+ return err;
33754+}
33755+
33756+/* ---------------------------------------------------------------------- */
33757+
33758+struct notify_change_args {
33759+ int *errp;
33760+ struct path *path;
33761+ struct iattr *ia;
33762+ struct inode **delegated_inode;
33763+};
33764+
33765+static void call_notify_change(void *args)
33766+{
33767+ struct notify_change_args *a = args;
33768+ struct inode *h_inode;
33769+ struct user_namespace *userns;
33770+
33771+ h_inode = d_inode(a->path->dentry);
33772+ IMustLock(h_inode);
33773+
33774+ *a->errp = -EPERM;
33775+ if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
33776+ userns = mnt_user_ns(a->path->mnt);
33777+ lockdep_off();
33778+ *a->errp = notify_change(userns, a->path->dentry, a->ia,
33779+ a->delegated_inode);
33780+ lockdep_on();
33781+ if (!*a->errp)
33782+ vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
33783+ }
33784+ AuTraceErr(*a->errp);
33785+}
33786+
33787+int vfsub_notify_change(struct path *path, struct iattr *ia,
33788+ struct inode **delegated_inode)
33789+{
33790+ int err;
33791+ struct notify_change_args args = {
33792+ .errp = &err,
33793+ .path = path,
33794+ .ia = ia,
33795+ .delegated_inode = delegated_inode
33796+ };
33797+
33798+ call_notify_change(&args);
33799+
33800+ return err;
33801+}
33802+
33803+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
33804+ struct inode **delegated_inode)
33805+{
33806+ int err, wkq_err;
33807+ struct notify_change_args args = {
33808+ .errp = &err,
33809+ .path = path,
33810+ .ia = ia,
33811+ .delegated_inode = delegated_inode
33812+ };
33813+
33814+ wkq_err = au_wkq_wait(call_notify_change, &args);
33815+ if (unlikely(wkq_err))
33816+ err = wkq_err;
33817+
33818+ return err;
33819+}
33820+
33821+/* ---------------------------------------------------------------------- */
33822+
33823+struct unlink_args {
33824+ int *errp;
33825+ struct inode *dir;
33826+ struct path *path;
33827+ struct inode **delegated_inode;
33828+};
33829+
33830+static void call_unlink(void *args)
33831+{
33832+ struct unlink_args *a = args;
33833+ struct dentry *d = a->path->dentry;
33834+ struct inode *h_inode;
33835+ struct user_namespace *userns;
33836+ const int stop_sillyrename = (au_test_nfs(d->d_sb)
33837+ && au_dcount(d) == 1);
33838+
33839+ IMustLock(a->dir);
33840+
33841+ a->path->dentry = d->d_parent;
33842+ *a->errp = security_path_unlink(a->path, d);
33843+ a->path->dentry = d;
33844+ if (unlikely(*a->errp))
33845+ return;
33846+
33847+ if (!stop_sillyrename)
33848+ dget(d);
33849+ h_inode = NULL;
33850+ if (d_is_positive(d)) {
33851+ h_inode = d_inode(d);
33852+ ihold(h_inode);
33853+ }
33854+
33855+ userns = mnt_user_ns(a->path->mnt);
33856+ lockdep_off();
33857+ *a->errp = vfs_unlink(userns, a->dir, d, a->delegated_inode);
33858+ lockdep_on();
33859+ if (!*a->errp) {
33860+ struct path tmp = {
33861+ .dentry = d->d_parent,
33862+ .mnt = a->path->mnt
33863+ };
33864+ vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
33865+ }
33866+
33867+ if (!stop_sillyrename)
33868+ dput(d);
33869+ if (h_inode)
33870+ iput(h_inode);
33871+
33872+ AuTraceErr(*a->errp);
33873+}
33874+
33875+/*
33876+ * @dir: must be locked.
33877+ * @dentry: target dentry.
33878+ */
33879+int vfsub_unlink(struct inode *dir, struct path *path,
33880+ struct inode **delegated_inode, int force)
33881+{
33882+ int err;
33883+ struct unlink_args args = {
33884+ .errp = &err,
33885+ .dir = dir,
33886+ .path = path,
33887+ .delegated_inode = delegated_inode
33888+ };
33889+
33890+ if (!force)
33891+ call_unlink(&args);
33892+ else {
33893+ int wkq_err;
33894+
33895+ wkq_err = au_wkq_wait(call_unlink, &args);
33896+ if (unlikely(wkq_err))
33897+ err = wkq_err;
33898+ }
33899+
33900+ return err;
33901+}
33902diff -urN /usr/share/empty/fs/aufs/vfsub.h linux/fs/aufs/vfsub.h
33903--- /usr/share/empty/fs/aufs/vfsub.h 1970-01-01 01:00:00.000000000 +0100
33904+++ linux/fs/aufs/vfsub.h 2023-02-20 21:05:51.959693785 +0100
33905@@ -0,0 +1,390 @@
33906+/* SPDX-License-Identifier: GPL-2.0 */
33907+/*
33908+ * Copyright (C) 2005-2022 Junjiro R. Okajima
33909+ *
33910+ * This program is free software; you can redistribute it and/or modify
33911+ * it under the terms of the GNU General Public License as published by
33912+ * the Free Software Foundation; either version 2 of the License, or
33913+ * (at your option) any later version.
33914+ *
33915+ * This program is distributed in the hope that it will be useful,
33916+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
33917+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33918+ * GNU General Public License for more details.
33919+ *
33920+ * You should have received a copy of the GNU General Public License
33921+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
33922+ */
33923+
33924+/*
33925+ * sub-routines for VFS
33926+ */
33927+
33928+#ifndef __AUFS_VFSUB_H__
33929+#define __AUFS_VFSUB_H__
33930+
33931+#ifdef __KERNEL__
33932+
33933+#include <linux/fs.h>
33934+#include <linux/mount.h>
33935+#include <linux/posix_acl.h>
33936+#include <linux/xattr.h>
33937+#include "debug.h"
33938+
33939+/* copied from linux/fs/internal.h */
33940+/* todo: BAD approach!! */
33941+extern void __mnt_drop_write(struct vfsmount *);
33942+extern struct file *alloc_empty_file(int, const struct cred *);
33943+
33944+/* ---------------------------------------------------------------------- */
33945+
33946+/* lock subclass for lower inode */
33947+/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
33948+/* reduce? gave up. */
33949+enum {
33950+ AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
33951+ AuLsc_I_PARENT, /* lower inode, parent first */
33952+ AuLsc_I_PARENT2, /* copyup dirs */
33953+ AuLsc_I_PARENT3, /* copyup wh */
33954+ AuLsc_I_CHILD,
33955+ AuLsc_I_CHILD2,
33956+ AuLsc_I_End
33957+};
33958+
33959+/* to debug easier, do not make them inlined functions */
33960+#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
33961+#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
33962+
33963+/* ---------------------------------------------------------------------- */
33964+
33965+static inline void vfsub_drop_nlink(struct inode *inode)
33966+{
33967+ AuDebugOn(!inode->i_nlink);
33968+ drop_nlink(inode);
33969+}
33970+
33971+static inline void vfsub_dead_dir(struct inode *inode)
33972+{
33973+ AuDebugOn(!S_ISDIR(inode->i_mode));
33974+ inode->i_flags |= S_DEAD;
33975+ clear_nlink(inode);
33976+}
33977+
33978+static inline int vfsub_native_ro(struct inode *inode)
33979+{
33980+ return sb_rdonly(inode->i_sb)
33981+ || IS_RDONLY(inode)
33982+ /* || IS_APPEND(inode) */
33983+ || IS_IMMUTABLE(inode);
33984+}
33985+
33986+#ifdef CONFIG_AUFS_BR_FUSE
33987+int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
33988+#else
33989+AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
33990+#endif
33991+
33992+int vfsub_sync_filesystem(struct super_block *h_sb);
33993+
33994+/* ---------------------------------------------------------------------- */
33995+
33996+int vfsub_update_h_iattr(struct path *h_path, int *did);
33997+struct file *vfsub_dentry_open(struct path *path, int flags);
33998+struct file *vfsub_filp_open(const char *path, int oflags, int mode);
33999+struct au_branch;
34000+struct vfsub_aopen_args {
34001+ struct file *file;
34002+ unsigned int open_flag;
34003+ umode_t create_mode;
34004+ struct au_branch *br;
34005+};
34006+int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
34007+ struct vfsub_aopen_args *args);
34008+int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
34009+
34010+struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
34011+ struct path *ppath, int len);
34012+struct dentry *vfsub_lookup_one_len(const char *name, struct path *ppath,
34013+ int len);
34014+
34015+struct vfsub_lkup_one_args {
34016+ struct dentry **errp;
34017+ struct qstr *name;
34018+ struct path *ppath;
34019+};
34020+
34021+static inline struct dentry *vfsub_lkup_one(struct qstr *name,
34022+ struct path *ppath)
34023+{
34024+ return vfsub_lookup_one_len(name->name, ppath, name->len);
34025+}
34026+
34027+void vfsub_call_lkup_one(void *args);
34028+
34029+/* ---------------------------------------------------------------------- */
34030+
34031+static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
34032+{
34033+ int err;
34034+
34035+ lockdep_off();
34036+ err = mnt_want_write(mnt);
34037+ lockdep_on();
34038+ return err;
34039+}
34040+
34041+static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
34042+{
34043+ lockdep_off();
34044+ mnt_drop_write(mnt);
34045+ lockdep_on();
34046+}
34047+
34048+#if 0 /* reserved */
34049+static inline void vfsub_mnt_drop_write_file(struct file *file)
34050+{
34051+ lockdep_off();
34052+ mnt_drop_write_file(file);
34053+ lockdep_on();
34054+}
34055+#endif
34056+
34057+/* ---------------------------------------------------------------------- */
34058+
34059+struct au_hinode;
34060+struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
34061+ struct dentry *d2, struct au_hinode *hdir2);
34062+void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
34063+ struct dentry *d2, struct au_hinode *hdir2);
34064+
34065+int vfsub_create(struct inode *dir, struct path *path, int mode,
34066+ bool want_excl);
34067+int vfsub_symlink(struct inode *dir, struct path *path,
34068+ const char *symname);
34069+int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
34070+int vfsub_link(struct dentry *src_dentry, struct inode *dir,
34071+ struct path *path, struct inode **delegated_inode);
34072+int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
34073+ struct inode *hdir, struct path *path,
34074+ struct inode **delegated_inode, unsigned int flags);
34075+int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
34076+int vfsub_rmdir(struct inode *dir, struct path *path);
34077+
34078+/* ---------------------------------------------------------------------- */
34079+
34080+ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
34081+ loff_t *ppos);
34082+ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
34083+ loff_t *ppos);
34084+ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
34085+ loff_t *ppos);
34086+ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
34087+ loff_t *ppos);
34088+int vfsub_flush(struct file *file, fl_owner_t id);
34089+int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
34090+
34091+static inline loff_t vfsub_f_size_read(struct file *file)
34092+{
34093+ return i_size_read(file_inode(file));
34094+}
34095+
34096+static inline unsigned int vfsub_file_flags(struct file *file)
34097+{
34098+ unsigned int flags;
34099+
34100+ spin_lock(&file->f_lock);
34101+ flags = file->f_flags;
34102+ spin_unlock(&file->f_lock);
34103+
34104+ return flags;
34105+}
34106+
34107+static inline int vfsub_file_execed(struct file *file)
34108+{
34109+ /* todo: direct access f_flags */
34110+ return !!(vfsub_file_flags(file) & __FMODE_EXEC);
34111+}
34112+
34113+#if 0 /* reserved */
34114+static inline void vfsub_file_accessed(struct file *h_file)
34115+{
34116+ file_accessed(h_file);
34117+ vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
34118+}
34119+#endif
34120+
34121+#if 0 /* reserved */
34122+static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
34123+ struct dentry *h_dentry)
34124+{
34125+ struct path h_path = {
34126+ .dentry = h_dentry,
34127+ .mnt = h_mnt
34128+ };
34129+ touch_atime(&h_path);
34130+ vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
34131+}
34132+#endif
34133+
34134+static inline int vfsub_update_time(struct inode *h_inode,
34135+ struct timespec64 *ts, int flags)
34136+{
34137+ return inode_update_time(h_inode, ts, flags);
34138+ /* no vfsub_update_h_iattr() since we don't have struct path */
34139+}
34140+
34141+#ifdef CONFIG_FS_POSIX_ACL
34142+static inline int vfsub_acl_chmod(struct user_namespace *h_userns,
34143+ struct dentry *h_dentry, umode_t h_mode)
34144+{
34145+ int err;
34146+
34147+ err = posix_acl_chmod(h_userns, h_dentry, h_mode);
34148+ if (err == -EOPNOTSUPP)
34149+ err = 0;
34150+ return err;
34151+}
34152+#else
34153+AuStubInt0(vfsub_acl_chmod, struct user_namespace *h_userns,
34154+ struct dentry *h_dentry, umode_t h_mode);
34155+#endif
34156+
34157+long vfsub_splice_to(struct file *in, loff_t *ppos,
34158+ struct pipe_inode_info *pipe, size_t len,
34159+ unsigned int flags);
34160+long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
34161+ loff_t *ppos, size_t len, unsigned int flags);
34162+
34163+static inline long vfsub_truncate(struct path *path, loff_t length)
34164+{
34165+ long err;
34166+
34167+ lockdep_off();
34168+ err = vfs_truncate(path, length);
34169+ lockdep_on();
34170+ return err;
34171+}
34172+
34173+int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
34174+ struct file *h_file);
34175+int vfsub_fsync(struct file *file, struct path *path, int datasync);
34176+
34177+/*
34178+ * re-use branch fs's ioctl(FICLONE) while aufs itself doesn't support such
34179+ * ioctl.
34180+ */
34181+static inline loff_t vfsub_clone_file_range(struct file *src, struct file *dst,
34182+ loff_t len)
34183+{
34184+ loff_t err;
34185+
34186+ lockdep_off();
34187+ err = vfs_clone_file_range(src, 0, dst, 0, len, /*remap_flags*/0);
34188+ lockdep_on();
34189+
34190+ return err;
34191+}
34192+
34193+/* copy_file_range(2) is a systemcall */
34194+static inline ssize_t vfsub_copy_file_range(struct file *src, loff_t src_pos,
34195+ struct file *dst, loff_t dst_pos,
34196+ size_t len, unsigned int flags)
34197+{
34198+ ssize_t ssz;
34199+
34200+ lockdep_off();
34201+ ssz = vfs_copy_file_range(src, src_pos, dst, dst_pos, len, flags);
34202+ lockdep_on();
34203+
34204+ return ssz;
34205+}
34206+
34207+/* ---------------------------------------------------------------------- */
34208+
34209+static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
34210+{
34211+ loff_t err;
34212+
34213+ lockdep_off();
34214+ err = vfs_llseek(file, offset, origin);
34215+ lockdep_on();
34216+ return err;
34217+}
34218+
34219+/* ---------------------------------------------------------------------- */
34220+
34221+int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
34222+int vfsub_sio_rmdir(struct inode *dir, struct path *path);
34223+int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
34224+ struct inode **delegated_inode);
34225+int vfsub_notify_change(struct path *path, struct iattr *ia,
34226+ struct inode **delegated_inode);
34227+int vfsub_unlink(struct inode *dir, struct path *path,
34228+ struct inode **delegated_inode, int force);
34229+
34230+static inline int vfsub_getattr(const struct path *path, struct kstat *st)
34231+{
34232+ return vfs_getattr(path, st, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
34233+}
34234+
34235+/* ---------------------------------------------------------------------- */
34236+
34237+static inline int vfsub_setxattr(struct user_namespace *userns,
34238+ struct dentry *dentry, const char *name,
34239+ const void *value, size_t size, int flags)
34240+{
34241+ int err;
34242+
34243+ lockdep_off();
34244+ err = vfs_setxattr(userns, dentry, name, value, size, flags);
34245+ lockdep_on();
34246+
34247+ return err;
34248+}
34249+
34250+static inline int vfsub_removexattr(struct user_namespace *userns,
34251+ struct dentry *dentry, const char *name)
34252+{
34253+ int err;
34254+
34255+ lockdep_off();
34256+ err = vfs_removexattr(userns, dentry, name);
34257+ lockdep_on();
34258+
34259+ return err;
34260+}
34261+
34262+#ifdef CONFIG_FS_POSIX_ACL
34263+static inline int vfsub_set_acl(struct user_namespace *userns,
34264+ struct dentry *dentry, const char *name,
34265+ struct posix_acl *acl)
34266+{
34267+ int err;
34268+
34269+ lockdep_off();
34270+ err = vfs_set_acl(userns, dentry, name, acl);
34271+ lockdep_on();
34272+
34273+ return err;
34274+}
34275+
34276+static inline int vfsub_remove_acl(struct user_namespace *userns,
34277+ struct dentry *dentry, const char *name)
34278+{
34279+ int err;
34280+
34281+ lockdep_off();
34282+ err = vfs_remove_acl(userns, dentry, name);
34283+ lockdep_on();
34284+
34285+ return err;
34286+}
34287+#else
34288+AuStubInt0(vfsub_set_acl, struct user_namespace *userns, struct dentry *dentry,
34289+ const char *name, struct posix_acl *acl);
34290+AuStubInt0(vfsub_remove_acl, struct user_namespace *userns,
34291+ struct dentry *dentry, const char *name);
34292+#endif
34293+
34294+#endif /* __KERNEL__ */
34295+#endif /* __AUFS_VFSUB_H__ */
34296diff -urN /usr/share/empty/fs/aufs/wbr_policy.c linux/fs/aufs/wbr_policy.c
34297--- /usr/share/empty/fs/aufs/wbr_policy.c 1970-01-01 01:00:00.000000000 +0100
34298+++ linux/fs/aufs/wbr_policy.c 2022-11-05 23:02:18.969222617 +0100
34299@@ -0,0 +1,830 @@
34300+// SPDX-License-Identifier: GPL-2.0
34301+/*
34302+ * Copyright (C) 2005-2022 Junjiro R. Okajima
34303+ *
34304+ * This program is free software; you can redistribute it and/or modify
34305+ * it under the terms of the GNU General Public License as published by
34306+ * the Free Software Foundation; either version 2 of the License, or
34307+ * (at your option) any later version.
34308+ *
34309+ * This program is distributed in the hope that it will be useful,
34310+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
34311+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34312+ * GNU General Public License for more details.
34313+ *
34314+ * You should have received a copy of the GNU General Public License
34315+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
34316+ */
34317+
34318+/*
34319+ * policies for selecting one among multiple writable branches
34320+ */
34321+
34322+#include <linux/statfs.h>
34323+#include "aufs.h"
34324+
34325+/* subset of cpup_attr() */
34326+static noinline_for_stack
34327+int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
34328+{
34329+ int err, sbits;
34330+ struct iattr ia;
34331+ struct inode *h_isrc;
34332+
34333+ h_isrc = d_inode(h_src);
34334+ ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
34335+ ia.ia_mode = h_isrc->i_mode;
34336+ ia.ia_uid = h_isrc->i_uid;
34337+ ia.ia_gid = h_isrc->i_gid;
34338+ sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
34339+ au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
34340+ /* no delegation since it is just created */
34341+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
34342+
34343+ /* is this nfs only? */
34344+ if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
34345+ ia.ia_valid = ATTR_FORCE | ATTR_MODE;
34346+ ia.ia_mode = h_isrc->i_mode;
34347+ err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
34348+ }
34349+
34350+ return err;
34351+}
34352+
34353+#define AuCpdown_PARENT_OPQ 1
34354+#define AuCpdown_WHED (1 << 1)
34355+#define AuCpdown_MADE_DIR (1 << 2)
34356+#define AuCpdown_DIROPQ (1 << 3)
34357+#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
34358+#define au_fset_cpdown(flags, name) \
34359+ do { (flags) |= AuCpdown_##name; } while (0)
34360+#define au_fclr_cpdown(flags, name) \
34361+ do { (flags) &= ~AuCpdown_##name; } while (0)
34362+
34363+static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
34364+ unsigned int *flags)
34365+{
34366+ int err;
34367+ struct dentry *opq_dentry;
34368+
34369+ opq_dentry = au_diropq_create(dentry, bdst);
34370+ err = PTR_ERR(opq_dentry);
34371+ if (IS_ERR(opq_dentry))
34372+ goto out;
34373+ dput(opq_dentry);
34374+ au_fset_cpdown(*flags, DIROPQ);
34375+
34376+out:
34377+ return err;
34378+}
34379+
34380+static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
34381+ struct inode *dir, aufs_bindex_t bdst)
34382+{
34383+ int err;
34384+ struct path h_path;
34385+ struct au_branch *br;
34386+
34387+ br = au_sbr(dentry->d_sb, bdst);
34388+ h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
34389+ err = PTR_ERR(h_path.dentry);
34390+ if (IS_ERR(h_path.dentry))
34391+ goto out;
34392+
34393+ err = 0;
34394+ if (d_is_positive(h_path.dentry)) {
34395+ h_path.mnt = au_br_mnt(br);
34396+ err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
34397+ dentry);
34398+ }
34399+ dput(h_path.dentry);
34400+
34401+out:
34402+ return err;
34403+}
34404+
34405+static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
34406+ struct au_pin *pin,
34407+ struct dentry *h_parent, void *arg)
34408+{
34409+ int err, rerr;
34410+ aufs_bindex_t bopq, btop;
34411+ struct path h_path;
34412+ struct dentry *parent;
34413+ struct inode *h_dir, *h_inode, *inode, *dir;
34414+ unsigned int *flags = arg;
34415+
34416+ btop = au_dbtop(dentry);
34417+ /* dentry is di-locked */
34418+ parent = dget_parent(dentry);
34419+ dir = d_inode(parent);
34420+ h_dir = d_inode(h_parent);
34421+ AuDebugOn(h_dir != au_h_iptr(dir, bdst));
34422+ IMustLock(h_dir);
34423+
34424+ err = au_lkup_neg(dentry, bdst, /*wh*/0);
34425+ if (unlikely(err < 0))
34426+ goto out;
34427+ h_path.dentry = au_h_dptr(dentry, bdst);
34428+ h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
34429+ err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path, 0755);
34430+ if (unlikely(err))
34431+ goto out_put;
34432+ au_fset_cpdown(*flags, MADE_DIR);
34433+
34434+ bopq = au_dbdiropq(dentry);
34435+ au_fclr_cpdown(*flags, WHED);
34436+ au_fclr_cpdown(*flags, DIROPQ);
34437+ if (au_dbwh(dentry) == bdst)
34438+ au_fset_cpdown(*flags, WHED);
34439+ if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
34440+ au_fset_cpdown(*flags, PARENT_OPQ);
34441+ h_inode = d_inode(h_path.dentry);
34442+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
34443+ if (au_ftest_cpdown(*flags, WHED)) {
34444+ err = au_cpdown_dir_opq(dentry, bdst, flags);
34445+ if (unlikely(err)) {
34446+ inode_unlock(h_inode);
34447+ goto out_dir;
34448+ }
34449+ }
34450+
34451+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
34452+ inode_unlock(h_inode);
34453+ if (unlikely(err))
34454+ goto out_opq;
34455+
34456+ if (au_ftest_cpdown(*flags, WHED)) {
34457+ err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
34458+ if (unlikely(err))
34459+ goto out_opq;
34460+ }
34461+
34462+ inode = d_inode(dentry);
34463+ if (au_ibbot(inode) < bdst)
34464+ au_set_ibbot(inode, bdst);
34465+ au_set_h_iptr(inode, bdst, au_igrab(h_inode),
34466+ au_hi_flags(inode, /*isdir*/1));
34467+ au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
34468+ goto out; /* success */
34469+
34470+ /* revert */
34471+out_opq:
34472+ if (au_ftest_cpdown(*flags, DIROPQ)) {
34473+ inode_lock_nested(h_inode, AuLsc_I_CHILD);
34474+ rerr = au_diropq_remove(dentry, bdst);
34475+ inode_unlock(h_inode);
34476+ if (unlikely(rerr)) {
34477+ AuIOErr("failed removing diropq for %pd b%d (%d)\n",
34478+ dentry, bdst, rerr);
34479+ err = -EIO;
34480+ goto out;
34481+ }
34482+ }
34483+out_dir:
34484+ if (au_ftest_cpdown(*flags, MADE_DIR)) {
34485+ rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
34486+ if (unlikely(rerr)) {
34487+ AuIOErr("failed removing %pd b%d (%d)\n",
34488+ dentry, bdst, rerr);
34489+ err = -EIO;
34490+ }
34491+ }
34492+out_put:
34493+ au_set_h_dptr(dentry, bdst, NULL);
34494+ if (au_dbbot(dentry) == bdst)
34495+ au_update_dbbot(dentry);
34496+out:
34497+ dput(parent);
34498+ return err;
34499+}
34500+
34501+int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
34502+{
34503+ int err;
34504+ unsigned int flags;
34505+
34506+ flags = 0;
34507+ err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
34508+
34509+ return err;
34510+}
34511+
34512+/* ---------------------------------------------------------------------- */
34513+
34514+/* policies for create */
34515+
34516+int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
34517+{
34518+ int err, i, j, ndentry;
34519+ aufs_bindex_t bopq;
34520+ struct au_dcsub_pages dpages;
34521+ struct au_dpage *dpage;
34522+ struct dentry **dentries, *parent, *d;
34523+
34524+ err = au_dpages_init(&dpages, GFP_NOFS);
34525+ if (unlikely(err))
34526+ goto out;
34527+ parent = dget_parent(dentry);
34528+ err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
34529+ if (unlikely(err))
34530+ goto out_free;
34531+
34532+ err = bindex;
34533+ for (i = 0; i < dpages.ndpage; i++) {
34534+ dpage = dpages.dpages + i;
34535+ dentries = dpage->dentries;
34536+ ndentry = dpage->ndentry;
34537+ for (j = 0; j < ndentry; j++) {
34538+ d = dentries[j];
34539+ di_read_lock_parent2(d, !AuLock_IR);
34540+ bopq = au_dbdiropq(d);
34541+ di_read_unlock(d, !AuLock_IR);
34542+ if (bopq >= 0 && bopq < err)
34543+ err = bopq;
34544+ }
34545+ }
34546+
34547+out_free:
34548+ dput(parent);
34549+ au_dpages_free(&dpages);
34550+out:
34551+ return err;
34552+}
34553+
34554+static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
34555+{
34556+ for (; bindex >= 0; bindex--)
34557+ if (!au_br_rdonly(au_sbr(sb, bindex)))
34558+ return bindex;
34559+ return -EROFS;
34560+}
34561+
34562+/* top down parent */
34563+static int au_wbr_create_tdp(struct dentry *dentry,
34564+ unsigned int flags __maybe_unused)
34565+{
34566+ int err;
34567+ aufs_bindex_t btop, bindex;
34568+ struct super_block *sb;
34569+ struct dentry *parent, *h_parent;
34570+
34571+ sb = dentry->d_sb;
34572+ btop = au_dbtop(dentry);
34573+ err = btop;
34574+ if (!au_br_rdonly(au_sbr(sb, btop)))
34575+ goto out;
34576+
34577+ err = -EROFS;
34578+ parent = dget_parent(dentry);
34579+ for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
34580+ h_parent = au_h_dptr(parent, bindex);
34581+ if (!h_parent || d_is_negative(h_parent))
34582+ continue;
34583+
34584+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
34585+ err = bindex;
34586+ break;
34587+ }
34588+ }
34589+ dput(parent);
34590+
34591+ /* bottom up here */
34592+ if (unlikely(err < 0)) {
34593+ err = au_wbr_bu(sb, btop - 1);
34594+ if (err >= 0)
34595+ err = au_wbr_nonopq(dentry, err);
34596+ }
34597+
34598+out:
34599+ AuDbg("b%d\n", err);
34600+ return err;
34601+}
34602+
34603+/* ---------------------------------------------------------------------- */
34604+
34605+/* an exception for the policy other than tdp */
34606+static int au_wbr_create_exp(struct dentry *dentry)
34607+{
34608+ int err;
34609+ aufs_bindex_t bwh, bdiropq;
34610+ struct dentry *parent;
34611+
34612+ err = -1;
34613+ bwh = au_dbwh(dentry);
34614+ parent = dget_parent(dentry);
34615+ bdiropq = au_dbdiropq(parent);
34616+ if (bwh >= 0) {
34617+ if (bdiropq >= 0)
34618+ err = min(bdiropq, bwh);
34619+ else
34620+ err = bwh;
34621+ AuDbg("%d\n", err);
34622+ } else if (bdiropq >= 0) {
34623+ err = bdiropq;
34624+ AuDbg("%d\n", err);
34625+ }
34626+ dput(parent);
34627+
34628+ if (err >= 0)
34629+ err = au_wbr_nonopq(dentry, err);
34630+
34631+ if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
34632+ err = -1;
34633+
34634+ AuDbg("%d\n", err);
34635+ return err;
34636+}
34637+
34638+/* ---------------------------------------------------------------------- */
34639+
34640+/* round robin */
34641+static int au_wbr_create_init_rr(struct super_block *sb)
34642+{
34643+ int err;
34644+
34645+ err = au_wbr_bu(sb, au_sbbot(sb));
34646+ atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
34647+ /* smp_mb(); */
34648+
34649+ AuDbg("b%d\n", err);
34650+ return err;
34651+}
34652+
34653+static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
34654+{
34655+ int err, nbr;
34656+ unsigned int u;
34657+ aufs_bindex_t bindex, bbot;
34658+ struct super_block *sb;
34659+ atomic_t *next;
34660+
34661+ err = au_wbr_create_exp(dentry);
34662+ if (err >= 0)
34663+ goto out;
34664+
34665+ sb = dentry->d_sb;
34666+ next = &au_sbi(sb)->si_wbr_rr_next;
34667+ bbot = au_sbbot(sb);
34668+ nbr = bbot + 1;
34669+ for (bindex = 0; bindex <= bbot; bindex++) {
34670+ if (!au_ftest_wbr(flags, DIR)) {
34671+ err = atomic_dec_return(next) + 1;
34672+ /* modulo for 0 is meaningless */
34673+ if (unlikely(!err))
34674+ err = atomic_dec_return(next) + 1;
34675+ } else
34676+ err = atomic_read(next);
34677+ AuDbg("%d\n", err);
34678+ u = err;
34679+ err = u % nbr;
34680+ AuDbg("%d\n", err);
34681+ if (!au_br_rdonly(au_sbr(sb, err)))
34682+ break;
34683+ err = -EROFS;
34684+ }
34685+
34686+ if (err >= 0)
34687+ err = au_wbr_nonopq(dentry, err);
34688+
34689+out:
34690+ AuDbg("%d\n", err);
34691+ return err;
34692+}
34693+
34694+/* ---------------------------------------------------------------------- */
34695+
34696+/* most free space */
34697+static void au_mfs(struct dentry *dentry, struct dentry *parent)
34698+{
34699+ struct super_block *sb;
34700+ struct au_branch *br;
34701+ struct au_wbr_mfs *mfs;
34702+ struct dentry *h_parent;
34703+ aufs_bindex_t bindex, bbot;
34704+ int err;
34705+ unsigned long long b, bavail;
34706+ struct path h_path;
34707+ /* reduce the stack usage */
34708+ struct kstatfs *st;
34709+
34710+ st = kmalloc(sizeof(*st), GFP_NOFS);
34711+ if (unlikely(!st)) {
34712+ AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
34713+ return;
34714+ }
34715+
34716+ bavail = 0;
34717+ sb = dentry->d_sb;
34718+ mfs = &au_sbi(sb)->si_wbr_mfs;
34719+ MtxMustLock(&mfs->mfs_lock);
34720+ mfs->mfs_bindex = -EROFS;
34721+ mfs->mfsrr_bytes = 0;
34722+ if (!parent) {
34723+ bindex = 0;
34724+ bbot = au_sbbot(sb);
34725+ } else {
34726+ bindex = au_dbtop(parent);
34727+ bbot = au_dbtaildir(parent);
34728+ }
34729+
34730+ for (; bindex <= bbot; bindex++) {
34731+ if (parent) {
34732+ h_parent = au_h_dptr(parent, bindex);
34733+ if (!h_parent || d_is_negative(h_parent))
34734+ continue;
34735+ }
34736+ br = au_sbr(sb, bindex);
34737+ if (au_br_rdonly(br))
34738+ continue;
34739+
34740+ /* sb->s_root for NFS is unreliable */
34741+ h_path.mnt = au_br_mnt(br);
34742+ h_path.dentry = h_path.mnt->mnt_root;
34743+ err = vfs_statfs(&h_path, st);
34744+ if (unlikely(err)) {
34745+ AuWarn1("failed statfs, b%d, %d\n", bindex, err);
34746+ continue;
34747+ }
34748+
34749+ /* when the available size is equal, select the lower one */
34750+ BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
34751+ || sizeof(b) < sizeof(st->f_bsize));
34752+ b = st->f_bavail * st->f_bsize;
34753+ br->br_wbr->wbr_bytes = b;
34754+ if (b >= bavail) {
34755+ bavail = b;
34756+ mfs->mfs_bindex = bindex;
34757+ mfs->mfs_jiffy = jiffies;
34758+ }
34759+ }
34760+
34761+ mfs->mfsrr_bytes = bavail;
34762+ AuDbg("b%d\n", mfs->mfs_bindex);
34763+ au_kfree_rcu(st);
34764+}
34765+
34766+static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
34767+{
34768+ int err;
34769+ struct dentry *parent;
34770+ struct super_block *sb;
34771+ struct au_wbr_mfs *mfs;
34772+
34773+ err = au_wbr_create_exp(dentry);
34774+ if (err >= 0)
34775+ goto out;
34776+
34777+ sb = dentry->d_sb;
34778+ parent = NULL;
34779+ if (au_ftest_wbr(flags, PARENT))
34780+ parent = dget_parent(dentry);
34781+ mfs = &au_sbi(sb)->si_wbr_mfs;
34782+ mutex_lock(&mfs->mfs_lock);
34783+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
34784+ || mfs->mfs_bindex < 0
34785+ || au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
34786+ au_mfs(dentry, parent);
34787+ mutex_unlock(&mfs->mfs_lock);
34788+ err = mfs->mfs_bindex;
34789+ dput(parent);
34790+
34791+ if (err >= 0)
34792+ err = au_wbr_nonopq(dentry, err);
34793+
34794+out:
34795+ AuDbg("b%d\n", err);
34796+ return err;
34797+}
34798+
34799+static int au_wbr_create_init_mfs(struct super_block *sb)
34800+{
34801+ struct au_wbr_mfs *mfs;
34802+
34803+ mfs = &au_sbi(sb)->si_wbr_mfs;
34804+ mutex_init(&mfs->mfs_lock);
34805+ mfs->mfs_jiffy = 0;
34806+ mfs->mfs_bindex = -EROFS;
34807+
34808+ return 0;
34809+}
34810+
34811+static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
34812+{
34813+ mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
34814+ return 0;
34815+}
34816+
34817+/* ---------------------------------------------------------------------- */
34818+
34819+/* top down regardless parent, and then mfs */
34820+static int au_wbr_create_tdmfs(struct dentry *dentry,
34821+ unsigned int flags __maybe_unused)
34822+{
34823+ int err;
34824+ aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
34825+ unsigned long long watermark;
34826+ struct super_block *sb;
34827+ struct au_wbr_mfs *mfs;
34828+ struct au_branch *br;
34829+ struct dentry *parent;
34830+
34831+ sb = dentry->d_sb;
34832+ mfs = &au_sbi(sb)->si_wbr_mfs;
34833+ mutex_lock(&mfs->mfs_lock);
34834+ if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
34835+ || mfs->mfs_bindex < 0)
34836+ au_mfs(dentry, /*parent*/NULL);
34837+ watermark = mfs->mfsrr_watermark;
34838+ bmfs = mfs->mfs_bindex;
34839+ mutex_unlock(&mfs->mfs_lock);
34840+
34841+ /* another style of au_wbr_create_exp() */
34842+ bwh = au_dbwh(dentry);
34843+ parent = dget_parent(dentry);
34844+ btail = au_dbtaildir(parent);
34845+ if (bwh >= 0 && bwh < btail)
34846+ btail = bwh;
34847+
34848+ err = au_wbr_nonopq(dentry, btail);
34849+ if (unlikely(err < 0))
34850+ goto out;
34851+ btail = err;
34852+ bfound = -1;
34853+ for (bindex = 0; bindex <= btail; bindex++) {
34854+ br = au_sbr(sb, bindex);
34855+ if (au_br_rdonly(br))
34856+ continue;
34857+ if (br->br_wbr->wbr_bytes > watermark) {
34858+ bfound = bindex;
34859+ break;
34860+ }
34861+ }
34862+ err = bfound;
34863+ if (err < 0)
34864+ err = bmfs;
34865+
34866+out:
34867+ dput(parent);
34868+ AuDbg("b%d\n", err);
34869+ return err;
34870+}
34871+
34872+/* ---------------------------------------------------------------------- */
34873+
34874+/* most free space and then round robin */
34875+static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
34876+{
34877+ int err;
34878+ struct au_wbr_mfs *mfs;
34879+
34880+ err = au_wbr_create_mfs(dentry, flags);
34881+ if (err >= 0) {
34882+ mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
34883+ mutex_lock(&mfs->mfs_lock);
34884+ if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
34885+ err = au_wbr_create_rr(dentry, flags);
34886+ mutex_unlock(&mfs->mfs_lock);
34887+ }
34888+
34889+ AuDbg("b%d\n", err);
34890+ return err;
34891+}
34892+
34893+static int au_wbr_create_init_mfsrr(struct super_block *sb)
34894+{
34895+ int err;
34896+
34897+ au_wbr_create_init_mfs(sb); /* ignore */
34898+ err = au_wbr_create_init_rr(sb);
34899+
34900+ return err;
34901+}
34902+
34903+/* ---------------------------------------------------------------------- */
34904+
34905+/* top down parent and most free space */
34906+static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
34907+{
34908+ int err, e2;
34909+ unsigned long long b;
34910+ aufs_bindex_t bindex, btop, bbot;
34911+ struct super_block *sb;
34912+ struct dentry *parent, *h_parent;
34913+ struct au_branch *br;
34914+
34915+ err = au_wbr_create_tdp(dentry, flags);
34916+ if (unlikely(err < 0))
34917+ goto out;
34918+ parent = dget_parent(dentry);
34919+ btop = au_dbtop(parent);
34920+ bbot = au_dbtaildir(parent);
34921+ if (btop == bbot)
34922+ goto out_parent; /* success */
34923+
34924+ e2 = au_wbr_create_mfs(dentry, flags);
34925+ if (e2 < 0)
34926+ goto out_parent; /* success */
34927+
34928+ /* when the available size is equal, select upper one */
34929+ sb = dentry->d_sb;
34930+ br = au_sbr(sb, err);
34931+ b = br->br_wbr->wbr_bytes;
34932+ AuDbg("b%d, %llu\n", err, b);
34933+
34934+ for (bindex = btop; bindex <= bbot; bindex++) {
34935+ h_parent = au_h_dptr(parent, bindex);
34936+ if (!h_parent || d_is_negative(h_parent))
34937+ continue;
34938+
34939+ br = au_sbr(sb, bindex);
34940+ if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
34941+ b = br->br_wbr->wbr_bytes;
34942+ err = bindex;
34943+ AuDbg("b%d, %llu\n", err, b);
34944+ }
34945+ }
34946+
34947+ if (err >= 0)
34948+ err = au_wbr_nonopq(dentry, err);
34949+
34950+out_parent:
34951+ dput(parent);
34952+out:
34953+ AuDbg("b%d\n", err);
34954+ return err;
34955+}
34956+
34957+/* ---------------------------------------------------------------------- */
34958+
34959+/*
34960+ * - top down parent
34961+ * - most free space with parent
34962+ * - most free space round-robin regardless parent
34963+ */
34964+static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
34965+{
34966+ int err;
34967+ unsigned long long watermark;
34968+ struct super_block *sb;
34969+ struct au_branch *br;
34970+ struct au_wbr_mfs *mfs;
34971+
34972+ err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
34973+ if (unlikely(err < 0))
34974+ goto out;
34975+
34976+ sb = dentry->d_sb;
34977+ br = au_sbr(sb, err);
34978+ mfs = &au_sbi(sb)->si_wbr_mfs;
34979+ mutex_lock(&mfs->mfs_lock);
34980+ watermark = mfs->mfsrr_watermark;
34981+ mutex_unlock(&mfs->mfs_lock);
34982+ if (br->br_wbr->wbr_bytes < watermark)
34983+ /* regardless the parent dir */
34984+ err = au_wbr_create_mfsrr(dentry, flags);
34985+
34986+out:
34987+ AuDbg("b%d\n", err);
34988+ return err;
34989+}
34990+
34991+/* ---------------------------------------------------------------------- */
34992+
34993+/* policies for copyup */
34994+
34995+/* top down parent */
34996+static int au_wbr_copyup_tdp(struct dentry *dentry)
34997+{
34998+ return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
34999+}
35000+
35001+/* bottom up parent */
35002+static int au_wbr_copyup_bup(struct dentry *dentry)
35003+{
35004+ int err;
35005+ aufs_bindex_t bindex, btop;
35006+ struct dentry *parent, *h_parent;
35007+ struct super_block *sb;
35008+
35009+ err = -EROFS;
35010+ sb = dentry->d_sb;
35011+ parent = dget_parent(dentry);
35012+ btop = au_dbtop(parent);
35013+ for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
35014+ h_parent = au_h_dptr(parent, bindex);
35015+ if (!h_parent || d_is_negative(h_parent))
35016+ continue;
35017+
35018+ if (!au_br_rdonly(au_sbr(sb, bindex))) {
35019+ err = bindex;
35020+ break;
35021+ }
35022+ }
35023+ dput(parent);
35024+
35025+ /* bottom up here */
35026+ if (unlikely(err < 0))
35027+ err = au_wbr_bu(sb, btop - 1);
35028+
35029+ AuDbg("b%d\n", err);
35030+ return err;
35031+}
35032+
35033+/* bottom up */
35034+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
35035+{
35036+ int err;
35037+
35038+ err = au_wbr_bu(dentry->d_sb, btop);
35039+ AuDbg("b%d\n", err);
35040+ if (err > btop)
35041+ err = au_wbr_nonopq(dentry, err);
35042+
35043+ AuDbg("b%d\n", err);
35044+ return err;
35045+}
35046+
35047+static int au_wbr_copyup_bu(struct dentry *dentry)
35048+{
35049+ int err;
35050+ aufs_bindex_t btop;
35051+
35052+ btop = au_dbtop(dentry);
35053+ err = au_wbr_do_copyup_bu(dentry, btop);
35054+ return err;
35055+}
35056+
35057+/* ---------------------------------------------------------------------- */
35058+
35059+struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
35060+ [AuWbrCopyup_TDP] = {
35061+ .copyup = au_wbr_copyup_tdp
35062+ },
35063+ [AuWbrCopyup_BUP] = {
35064+ .copyup = au_wbr_copyup_bup
35065+ },
35066+ [AuWbrCopyup_BU] = {
35067+ .copyup = au_wbr_copyup_bu
35068+ }
35069+};
35070+
35071+struct au_wbr_create_operations au_wbr_create_ops[] = {
35072+ [AuWbrCreate_TDP] = {
35073+ .create = au_wbr_create_tdp
35074+ },
35075+ [AuWbrCreate_RR] = {
35076+ .create = au_wbr_create_rr,
35077+ .init = au_wbr_create_init_rr
35078+ },
35079+ [AuWbrCreate_MFS] = {
35080+ .create = au_wbr_create_mfs,
35081+ .init = au_wbr_create_init_mfs,
35082+ .fin = au_wbr_create_fin_mfs
35083+ },
35084+ [AuWbrCreate_MFSV] = {
35085+ .create = au_wbr_create_mfs,
35086+ .init = au_wbr_create_init_mfs,
35087+ .fin = au_wbr_create_fin_mfs
35088+ },
35089+ [AuWbrCreate_MFSRR] = {
35090+ .create = au_wbr_create_mfsrr,
35091+ .init = au_wbr_create_init_mfsrr,
35092+ .fin = au_wbr_create_fin_mfs
35093+ },
35094+ [AuWbrCreate_MFSRRV] = {
35095+ .create = au_wbr_create_mfsrr,
35096+ .init = au_wbr_create_init_mfsrr,
35097+ .fin = au_wbr_create_fin_mfs
35098+ },
35099+ [AuWbrCreate_TDMFS] = {
35100+ .create = au_wbr_create_tdmfs,
35101+ .init = au_wbr_create_init_mfs,
35102+ .fin = au_wbr_create_fin_mfs
35103+ },
35104+ [AuWbrCreate_TDMFSV] = {
35105+ .create = au_wbr_create_tdmfs,
35106+ .init = au_wbr_create_init_mfs,
35107+ .fin = au_wbr_create_fin_mfs
35108+ },
35109+ [AuWbrCreate_PMFS] = {
35110+ .create = au_wbr_create_pmfs,
35111+ .init = au_wbr_create_init_mfs,
35112+ .fin = au_wbr_create_fin_mfs
35113+ },
35114+ [AuWbrCreate_PMFSV] = {
35115+ .create = au_wbr_create_pmfs,
35116+ .init = au_wbr_create_init_mfs,
35117+ .fin = au_wbr_create_fin_mfs
35118+ },
35119+ [AuWbrCreate_PMFSRR] = {
35120+ .create = au_wbr_create_pmfsrr,
35121+ .init = au_wbr_create_init_mfsrr,
35122+ .fin = au_wbr_create_fin_mfs
35123+ },
35124+ [AuWbrCreate_PMFSRRV] = {
35125+ .create = au_wbr_create_pmfsrr,
35126+ .init = au_wbr_create_init_mfsrr,
35127+ .fin = au_wbr_create_fin_mfs
35128+ }
35129+};
35130diff -urN /usr/share/empty/fs/aufs/whout.c linux/fs/aufs/whout.c
35131--- /usr/share/empty/fs/aufs/whout.c 1970-01-01 01:00:00.000000000 +0100
35132+++ linux/fs/aufs/whout.c 2022-11-05 23:02:18.972555950 +0100
35133@@ -0,0 +1,1072 @@
35134+// SPDX-License-Identifier: GPL-2.0
35135+/*
35136+ * Copyright (C) 2005-2022 Junjiro R. Okajima
35137+ *
35138+ * This program is free software; you can redistribute it and/or modify
35139+ * it under the terms of the GNU General Public License as published by
35140+ * the Free Software Foundation; either version 2 of the License, or
35141+ * (at your option) any later version.
35142+ *
35143+ * This program is distributed in the hope that it will be useful,
35144+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
35145+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35146+ * GNU General Public License for more details.
35147+ *
35148+ * You should have received a copy of the GNU General Public License
35149+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
35150+ */
35151+
35152+/*
35153+ * whiteout for logical deletion and opaque directory
35154+ */
35155+
35156+#include "aufs.h"
35157+
35158+#define WH_MASK 0444
35159+
35160+/*
35161+ * If a directory contains this file, then it is opaque. We start with the
35162+ * .wh. flag so that it is blocked by lookup.
35163+ */
35164+static struct qstr diropq_name = QSTR_INIT(AUFS_WH_DIROPQ,
35165+ sizeof(AUFS_WH_DIROPQ) - 1);
35166+
35167+/*
35168+ * generate whiteout name, which is NOT terminated by NULL.
35169+ * @name: original d_name.name
35170+ * @len: original d_name.len
35171+ * @wh: whiteout qstr
35172+ * returns zero when succeeds, otherwise error.
35173+ * succeeded value as wh->name should be freed by kfree().
35174+ */
35175+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name)
35176+{
35177+ char *p;
35178+
35179+ if (unlikely(name->len > PATH_MAX - AUFS_WH_PFX_LEN))
35180+ return -ENAMETOOLONG;
35181+
35182+ wh->len = name->len + AUFS_WH_PFX_LEN;
35183+ p = kmalloc(wh->len, GFP_NOFS);
35184+ wh->name = p;
35185+ if (p) {
35186+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
35187+ memcpy(p + AUFS_WH_PFX_LEN, name->name, name->len);
35188+ /* smp_mb(); */
35189+ return 0;
35190+ }
35191+ return -ENOMEM;
35192+}
35193+
35194+/* ---------------------------------------------------------------------- */
35195+
35196+/*
35197+ * test if the @wh_name exists under @h_ppath.
35198+ * @try_sio specifies the necessary of super-io.
35199+ */
35200+int au_wh_test(struct user_namespace *h_userns, struct path *h_ppath,
35201+ struct qstr *wh_name, int try_sio)
35202+{
35203+ int err;
35204+ struct dentry *wh_dentry;
35205+
35206+ if (!try_sio)
35207+ wh_dentry = vfsub_lkup_one(wh_name, h_ppath);
35208+ else
35209+ wh_dentry = au_sio_lkup_one(h_userns, wh_name, h_ppath);
35210+ err = PTR_ERR(wh_dentry);
35211+ if (IS_ERR(wh_dentry)) {
35212+ if (err == -ENAMETOOLONG)
35213+ err = 0;
35214+ goto out;
35215+ }
35216+
35217+ err = 0;
35218+ if (d_is_negative(wh_dentry))
35219+ goto out_wh; /* success */
35220+
35221+ err = 1;
35222+ if (d_is_reg(wh_dentry))
35223+ goto out_wh; /* success */
35224+
35225+ err = -EIO;
35226+ AuIOErr("%pd Invalid whiteout entry type 0%o.\n",
35227+ wh_dentry, d_inode(wh_dentry)->i_mode);
35228+
35229+out_wh:
35230+ dput(wh_dentry);
35231+out:
35232+ return err;
35233+}
35234+
35235+/*
35236+ * test if the @h_path->dentry sets opaque or not.
35237+ */
35238+int au_diropq_test(struct user_namespace *h_userns, struct path *h_path)
35239+{
35240+ int err;
35241+ struct inode *h_dir;
35242+
35243+ h_dir = d_inode(h_path->dentry);
35244+ err = au_wh_test(h_userns, h_path, &diropq_name,
35245+ au_test_h_perm_sio(h_userns, h_dir, MAY_EXEC));
35246+ return err;
35247+}
35248+
35249+/*
35250+ * returns a negative dentry whose name is unique and temporary.
35251+ */
35252+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
35253+ struct qstr *prefix)
35254+{
35255+ struct dentry *dentry;
35256+ int i;
35257+ char defname[NAME_MAX - AUFS_MAX_NAMELEN + DNAME_INLINE_LEN + 1],
35258+ *name, *p;
35259+ /* strict atomic_t is unnecessary here */
35260+ static unsigned short cnt;
35261+ struct qstr qs;
35262+ struct path h_ppath;
35263+ struct user_namespace *h_userns;
35264+
35265+ BUILD_BUG_ON(sizeof(cnt) * 2 > AUFS_WH_TMP_LEN);
35266+
35267+ name = defname;
35268+ qs.len = sizeof(defname) - DNAME_INLINE_LEN + prefix->len - 1;
35269+ if (unlikely(prefix->len > DNAME_INLINE_LEN)) {
35270+ dentry = ERR_PTR(-ENAMETOOLONG);
35271+ if (unlikely(qs.len > NAME_MAX))
35272+ goto out;
35273+ dentry = ERR_PTR(-ENOMEM);
35274+ name = kmalloc(qs.len + 1, GFP_NOFS);
35275+ if (unlikely(!name))
35276+ goto out;
35277+ }
35278+
35279+ /* doubly whiteout-ed */
35280+ memcpy(name, AUFS_WH_PFX AUFS_WH_PFX, AUFS_WH_PFX_LEN * 2);
35281+ p = name + AUFS_WH_PFX_LEN * 2;
35282+ memcpy(p, prefix->name, prefix->len);
35283+ p += prefix->len;
35284+ *p++ = '.';
35285+ AuDebugOn(name + qs.len + 1 - p <= AUFS_WH_TMP_LEN);
35286+
35287+ h_ppath.dentry = h_parent;
35288+ h_ppath.mnt = au_br_mnt(br);
35289+ h_userns = au_br_userns(br);
35290+ qs.name = name;
35291+ for (i = 0; i < 3; i++) {
35292+ sprintf(p, "%.*x", AUFS_WH_TMP_LEN, cnt++);
35293+ dentry = au_sio_lkup_one(h_userns, &qs, &h_ppath);
35294+ if (IS_ERR(dentry) || d_is_negative(dentry))
35295+ goto out_name;
35296+ dput(dentry);
35297+ }
35298+ /* pr_warn("could not get random name\n"); */
35299+ dentry = ERR_PTR(-EEXIST);
35300+ AuDbg("%.*s\n", AuLNPair(&qs));
35301+ BUG();
35302+
35303+out_name:
35304+ if (name != defname)
35305+ au_kfree_try_rcu(name);
35306+out:
35307+ AuTraceErrPtr(dentry);
35308+ return dentry;
35309+}
35310+
35311+/*
35312+ * rename the @h_dentry on @br to the whiteouted temporary name.
35313+ */
35314+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br)
35315+{
35316+ int err;
35317+ struct path h_path = {
35318+ .mnt = au_br_mnt(br)
35319+ };
35320+ struct inode *h_dir, *delegated;
35321+ struct dentry *h_parent;
35322+
35323+ h_parent = h_dentry->d_parent; /* dir inode is locked */
35324+ h_dir = d_inode(h_parent);
35325+ IMustLock(h_dir);
35326+
35327+ h_path.dentry = au_whtmp_lkup(h_parent, br, &h_dentry->d_name);
35328+ err = PTR_ERR(h_path.dentry);
35329+ if (IS_ERR(h_path.dentry))
35330+ goto out;
35331+
35332+ /* under the same dir, no need to lock_rename() */
35333+ delegated = NULL;
35334+ err = vfsub_rename(h_dir, h_dentry, h_dir, &h_path, &delegated,
35335+ /*flags*/0);
35336+ AuTraceErr(err);
35337+ if (unlikely(err == -EWOULDBLOCK)) {
35338+ pr_warn("cannot retry for NFSv4 delegation"
35339+ " for an internal rename\n");
35340+ iput(delegated);
35341+ }
35342+ dput(h_path.dentry);
35343+
35344+out:
35345+ AuTraceErr(err);
35346+ return err;
35347+}
35348+
35349+/* ---------------------------------------------------------------------- */
35350+/*
35351+ * functions for removing a whiteout
35352+ */
35353+
35354+static int do_unlink_wh(struct inode *h_dir, struct path *h_path)
35355+{
35356+ int err, force;
35357+ struct inode *delegated;
35358+
35359+ /*
35360+ * forces superio when the dir has a sticky bit.
35361+ * this may be a violation of unix fs semantics.
35362+ */
35363+ force = (h_dir->i_mode & S_ISVTX)
35364+ && !uid_eq(current_fsuid(), d_inode(h_path->dentry)->i_uid);
35365+ delegated = NULL;
35366+ err = vfsub_unlink(h_dir, h_path, &delegated, force);
35367+ if (unlikely(err == -EWOULDBLOCK)) {
35368+ pr_warn("cannot retry for NFSv4 delegation"
35369+ " for an internal unlink\n");
35370+ iput(delegated);
35371+ }
35372+ return err;
35373+}
35374+
35375+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
35376+ struct dentry *dentry)
35377+{
35378+ int err;
35379+
35380+ err = do_unlink_wh(h_dir, h_path);
35381+ if (!err && dentry)
35382+ au_set_dbwh(dentry, -1);
35383+
35384+ return err;
35385+}
35386+
35387+static int unlink_wh_name(struct path *h_ppath, struct qstr *wh)
35388+{
35389+ int err;
35390+ struct path h_path;
35391+
35392+ err = 0;
35393+ h_path.dentry = vfsub_lkup_one(wh, h_ppath);
35394+ if (IS_ERR(h_path.dentry))
35395+ err = PTR_ERR(h_path.dentry);
35396+ else {
35397+ if (d_is_reg(h_path.dentry)) {
35398+ h_path.mnt = h_ppath->mnt;
35399+ err = do_unlink_wh(d_inode(h_ppath->dentry), &h_path);
35400+ }
35401+ dput(h_path.dentry);
35402+ }
35403+
35404+ return err;
35405+}
35406+
35407+/* ---------------------------------------------------------------------- */
35408+/*
35409+ * initialize/clean whiteout for a branch
35410+ */
35411+
35412+static void au_wh_clean(struct inode *h_dir, struct path *whpath,
35413+ const int isdir)
35414+{
35415+ int err;
35416+ struct inode *delegated;
35417+
35418+ if (d_is_negative(whpath->dentry))
35419+ return;
35420+
35421+ if (isdir)
35422+ err = vfsub_rmdir(h_dir, whpath);
35423+ else {
35424+ delegated = NULL;
35425+ err = vfsub_unlink(h_dir, whpath, &delegated, /*force*/0);
35426+ if (unlikely(err == -EWOULDBLOCK)) {
35427+ pr_warn("cannot retry for NFSv4 delegation"
35428+ " for an internal unlink\n");
35429+ iput(delegated);
35430+ }
35431+ }
35432+ if (unlikely(err))
35433+ pr_warn("failed removing %pd (%d), ignored.\n",
35434+ whpath->dentry, err);
35435+}
35436+
35437+static int test_linkable(struct dentry *h_root)
35438+{
35439+ struct inode *h_dir = d_inode(h_root);
35440+
35441+ if (h_dir->i_op->link)
35442+ return 0;
35443+
35444+ pr_err("%pd (%s) doesn't support link(2), use noplink and rw+nolwh\n",
35445+ h_root, au_sbtype(h_root->d_sb));
35446+ return -ENOSYS; /* the branch doesn't have its ->link() */
35447+}
35448+
35449+/* todo: should this mkdir be done in /sbin/mount.aufs helper? */
35450+static int au_whdir(struct inode *h_dir, struct path *path)
35451+{
35452+ int err;
35453+
35454+ err = -EEXIST;
35455+ if (d_is_negative(path->dentry)) {
35456+ int mode = 0700;
35457+
35458+ if (au_test_nfs(path->dentry->d_sb))
35459+ mode |= 0111;
35460+ err = vfsub_mkdir(h_dir, path, mode);
35461+ } else if (d_is_dir(path->dentry))
35462+ err = 0;
35463+ else
35464+ pr_err("unknown %pd exists\n", path->dentry);
35465+
35466+ return err;
35467+}
35468+
35469+struct au_wh_base {
35470+ const struct qstr *name;
35471+ struct dentry *dentry;
35472+};
35473+
35474+static void au_wh_init_ro(struct inode *h_dir, struct au_wh_base base[],
35475+ struct path *h_path)
35476+{
35477+ h_path->dentry = base[AuBrWh_BASE].dentry;
35478+ au_wh_clean(h_dir, h_path, /*isdir*/0);
35479+ h_path->dentry = base[AuBrWh_PLINK].dentry;
35480+ au_wh_clean(h_dir, h_path, /*isdir*/1);
35481+ h_path->dentry = base[AuBrWh_ORPH].dentry;
35482+ au_wh_clean(h_dir, h_path, /*isdir*/1);
35483+}
35484+
35485+/*
35486+ * returns tri-state,
35487+ * minus: error, caller should print the message
35488+ * zero: success
35489+ * plus: error, caller should NOT print the message
35490+ */
35491+static int au_wh_init_rw_nolink(struct dentry *h_root, struct au_wbr *wbr,
35492+ int do_plink, struct au_wh_base base[],
35493+ struct path *h_path)
35494+{
35495+ int err;
35496+ struct inode *h_dir;
35497+
35498+ h_dir = d_inode(h_root);
35499+ h_path->dentry = base[AuBrWh_BASE].dentry;
35500+ au_wh_clean(h_dir, h_path, /*isdir*/0);
35501+ h_path->dentry = base[AuBrWh_PLINK].dentry;
35502+ if (do_plink) {
35503+ err = test_linkable(h_root);
35504+ if (unlikely(err)) {
35505+ err = 1;
35506+ goto out;
35507+ }
35508+
35509+ err = au_whdir(h_dir, h_path);
35510+ if (unlikely(err))
35511+ goto out;
35512+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
35513+ } else
35514+ au_wh_clean(h_dir, h_path, /*isdir*/1);
35515+ h_path->dentry = base[AuBrWh_ORPH].dentry;
35516+ err = au_whdir(h_dir, h_path);
35517+ if (unlikely(err))
35518+ goto out;
35519+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
35520+
35521+out:
35522+ return err;
35523+}
35524+
35525+/*
35526+ * for the moment, aufs supports the branch filesystem which does not support
35527+ * link(2). testing on FAT which does not support i_op->setattr() fully either,
35528+ * copyup failed. finally, such filesystem will not be used as the writable
35529+ * branch.
35530+ *
35531+ * returns tri-state, see above.
35532+ */
35533+static int au_wh_init_rw(struct dentry *h_root, struct au_wbr *wbr,
35534+ int do_plink, struct au_wh_base base[],
35535+ struct path *h_path)
35536+{
35537+ int err;
35538+ struct inode *h_dir;
35539+
35540+ WbrWhMustWriteLock(wbr);
35541+
35542+ err = test_linkable(h_root);
35543+ if (unlikely(err)) {
35544+ err = 1;
35545+ goto out;
35546+ }
35547+
35548+ /*
35549+ * todo: should this create be done in /sbin/mount.aufs helper?
35550+ */
35551+ err = -EEXIST;
35552+ h_dir = d_inode(h_root);
35553+ if (d_is_negative(base[AuBrWh_BASE].dentry)) {
35554+ h_path->dentry = base[AuBrWh_BASE].dentry;
35555+ err = vfsub_create(h_dir, h_path, WH_MASK, /*want_excl*/true);
35556+ } else if (d_is_reg(base[AuBrWh_BASE].dentry))
35557+ err = 0;
35558+ else
35559+ pr_err("unknown %pd2 exists\n", base[AuBrWh_BASE].dentry);
35560+ if (unlikely(err))
35561+ goto out;
35562+
35563+ h_path->dentry = base[AuBrWh_PLINK].dentry;
35564+ if (do_plink) {
35565+ err = au_whdir(h_dir, h_path);
35566+ if (unlikely(err))
35567+ goto out;
35568+ wbr->wbr_plink = dget(base[AuBrWh_PLINK].dentry);
35569+ } else
35570+ au_wh_clean(h_dir, h_path, /*isdir*/1);
35571+ wbr->wbr_whbase = dget(base[AuBrWh_BASE].dentry);
35572+
35573+ h_path->dentry = base[AuBrWh_ORPH].dentry;
35574+ err = au_whdir(h_dir, h_path);
35575+ if (unlikely(err))
35576+ goto out;
35577+ wbr->wbr_orph = dget(base[AuBrWh_ORPH].dentry);
35578+
35579+out:
35580+ return err;
35581+}
35582+
35583+/*
35584+ * initialize the whiteout base file/dir for @br.
35585+ */
35586+int au_wh_init(struct au_branch *br, struct super_block *sb)
35587+{
35588+ int err, i;
35589+ const unsigned char do_plink
35590+ = !!au_opt_test(au_mntflags(sb), PLINK);
35591+ struct inode *h_dir;
35592+ struct path path = br->br_path;
35593+ struct dentry *h_root = path.dentry;
35594+ struct au_wbr *wbr = br->br_wbr;
35595+ static const struct qstr base_name[] = {
35596+ [AuBrWh_BASE] = QSTR_INIT(AUFS_BASE_NAME,
35597+ sizeof(AUFS_BASE_NAME) - 1),
35598+ [AuBrWh_PLINK] = QSTR_INIT(AUFS_PLINKDIR_NAME,
35599+ sizeof(AUFS_PLINKDIR_NAME) - 1),
35600+ [AuBrWh_ORPH] = QSTR_INIT(AUFS_ORPHDIR_NAME,
35601+ sizeof(AUFS_ORPHDIR_NAME) - 1)
35602+ };
35603+ struct au_wh_base base[] = {
35604+ [AuBrWh_BASE] = {
35605+ .name = base_name + AuBrWh_BASE,
35606+ .dentry = NULL
35607+ },
35608+ [AuBrWh_PLINK] = {
35609+ .name = base_name + AuBrWh_PLINK,
35610+ .dentry = NULL
35611+ },
35612+ [AuBrWh_ORPH] = {
35613+ .name = base_name + AuBrWh_ORPH,
35614+ .dentry = NULL
35615+ }
35616+ };
35617+
35618+ if (wbr)
35619+ WbrWhMustWriteLock(wbr);
35620+
35621+ for (i = 0; i < AuBrWh_Last; i++) {
35622+ /* doubly whiteouted */
35623+ struct dentry *d;
35624+
35625+ d = au_wh_lkup(h_root, (void *)base[i].name, br);
35626+ err = PTR_ERR(d);
35627+ if (IS_ERR(d))
35628+ goto out;
35629+
35630+ base[i].dentry = d;
35631+ AuDebugOn(wbr
35632+ && wbr->wbr_wh[i]
35633+ && wbr->wbr_wh[i] != base[i].dentry);
35634+ }
35635+
35636+ if (wbr)
35637+ for (i = 0; i < AuBrWh_Last; i++) {
35638+ dput(wbr->wbr_wh[i]);
35639+ wbr->wbr_wh[i] = NULL;
35640+ }
35641+
35642+ err = 0;
35643+ if (!au_br_writable(br->br_perm)) {
35644+ h_dir = d_inode(h_root);
35645+ au_wh_init_ro(h_dir, base, &path);
35646+ } else if (!au_br_wh_linkable(br->br_perm)) {
35647+ err = au_wh_init_rw_nolink(h_root, wbr, do_plink, base, &path);
35648+ if (err > 0)
35649+ goto out;
35650+ else if (err)
35651+ goto out_err;
35652+ } else {
35653+ err = au_wh_init_rw(h_root, wbr, do_plink, base, &path);
35654+ if (err > 0)
35655+ goto out;
35656+ else if (err)
35657+ goto out_err;
35658+ }
35659+ goto out; /* success */
35660+
35661+out_err:
35662+ pr_err("an error(%d) on the writable branch %pd(%s)\n",
35663+ err, h_root, au_sbtype(h_root->d_sb));
35664+out:
35665+ for (i = 0; i < AuBrWh_Last; i++)
35666+ dput(base[i].dentry);
35667+ return err;
35668+}
35669+
35670+/* ---------------------------------------------------------------------- */
35671+/*
35672+ * whiteouts are all hard-linked usually.
35673+ * when its link count reaches a ceiling, we create a new whiteout base
35674+ * asynchronously.
35675+ */
35676+
35677+struct reinit_br_wh {
35678+ struct super_block *sb;
35679+ struct au_branch *br;
35680+};
35681+
35682+static void reinit_br_wh(void *arg)
35683+{
35684+ int err;
35685+ aufs_bindex_t bindex;
35686+ struct path h_path;
35687+ struct reinit_br_wh *a = arg;
35688+ struct au_wbr *wbr;
35689+ struct inode *dir, *delegated;
35690+ struct dentry *h_root;
35691+ struct au_hinode *hdir;
35692+
35693+ err = 0;
35694+ wbr = a->br->br_wbr;
35695+ /* big aufs lock */
35696+ si_noflush_write_lock(a->sb);
35697+ if (!au_br_writable(a->br->br_perm))
35698+ goto out;
35699+ bindex = au_br_index(a->sb, a->br->br_id);
35700+ if (unlikely(bindex < 0))
35701+ goto out;
35702+
35703+ di_read_lock_parent(a->sb->s_root, AuLock_IR);
35704+ dir = d_inode(a->sb->s_root);
35705+ hdir = au_hi(dir, bindex);
35706+ h_root = au_h_dptr(a->sb->s_root, bindex);
35707+ AuDebugOn(h_root != au_br_dentry(a->br));
35708+
35709+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
35710+ wbr_wh_write_lock(wbr);
35711+ err = au_h_verify(wbr->wbr_whbase, au_opt_udba(a->sb), hdir->hi_inode,
35712+ h_root, a->br);
35713+ if (!err) {
35714+ h_path.dentry = wbr->wbr_whbase;
35715+ h_path.mnt = au_br_mnt(a->br);
35716+ delegated = NULL;
35717+ err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated,
35718+ /*force*/0);
35719+ if (unlikely(err == -EWOULDBLOCK)) {
35720+ pr_warn("cannot retry for NFSv4 delegation"
35721+ " for an internal unlink\n");
35722+ iput(delegated);
35723+ }
35724+ } else {
35725+ pr_warn("%pd is moved, ignored\n", wbr->wbr_whbase);
35726+ err = 0;
35727+ }
35728+ dput(wbr->wbr_whbase);
35729+ wbr->wbr_whbase = NULL;
35730+ if (!err)
35731+ err = au_wh_init(a->br, a->sb);
35732+ wbr_wh_write_unlock(wbr);
35733+ au_hn_inode_unlock(hdir);
35734+ di_read_unlock(a->sb->s_root, AuLock_IR);
35735+ if (!err)
35736+ au_fhsm_wrote(a->sb, bindex, /*force*/0);
35737+
35738+out:
35739+ if (wbr)
35740+ atomic_dec(&wbr->wbr_wh_running);
35741+ au_lcnt_dec(&a->br->br_count);
35742+ si_write_unlock(a->sb);
35743+ au_nwt_done(&au_sbi(a->sb)->si_nowait);
35744+ au_kfree_rcu(a);
35745+ if (unlikely(err))
35746+ AuIOErr("err %d\n", err);
35747+}
35748+
35749+static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
35750+{
35751+ int do_dec, wkq_err;
35752+ struct reinit_br_wh *arg;
35753+
35754+ do_dec = 1;
35755+ if (atomic_inc_return(&br->br_wbr->wbr_wh_running) != 1)
35756+ goto out;
35757+
35758+ /* ignore ENOMEM */
35759+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
35760+ if (arg) {
35761+ /*
35762+ * dec(wh_running), kfree(arg) and dec(br_count)
35763+ * in reinit function
35764+ */
35765+ arg->sb = sb;
35766+ arg->br = br;
35767+ au_lcnt_inc(&br->br_count);
35768+ wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
35769+ if (unlikely(wkq_err)) {
35770+ atomic_dec(&br->br_wbr->wbr_wh_running);
35771+ au_lcnt_dec(&br->br_count);
35772+ au_kfree_rcu(arg);
35773+ }
35774+ do_dec = 0;
35775+ }
35776+
35777+out:
35778+ if (do_dec)
35779+ atomic_dec(&br->br_wbr->wbr_wh_running);
35780+}
35781+
35782+/* ---------------------------------------------------------------------- */
35783+
35784+/*
35785+ * create the whiteout @wh.
35786+ */
35787+static int link_or_create_wh(struct super_block *sb, aufs_bindex_t bindex,
35788+ struct dentry *wh)
35789+{
35790+ int err;
35791+ struct path h_path = {
35792+ .dentry = wh
35793+ };
35794+ struct au_branch *br;
35795+ struct au_wbr *wbr;
35796+ struct dentry *h_parent;
35797+ struct inode *h_dir, *delegated;
35798+
35799+ h_parent = wh->d_parent; /* dir inode is locked */
35800+ h_dir = d_inode(h_parent);
35801+ IMustLock(h_dir);
35802+
35803+ br = au_sbr(sb, bindex);
35804+ h_path.mnt = au_br_mnt(br);
35805+ wbr = br->br_wbr;
35806+ wbr_wh_read_lock(wbr);
35807+ if (wbr->wbr_whbase) {
35808+ delegated = NULL;
35809+ err = vfsub_link(wbr->wbr_whbase, h_dir, &h_path, &delegated);
35810+ if (unlikely(err == -EWOULDBLOCK)) {
35811+ pr_warn("cannot retry for NFSv4 delegation"
35812+ " for an internal link\n");
35813+ iput(delegated);
35814+ }
35815+ if (!err || err != -EMLINK)
35816+ goto out;
35817+
35818+ /* link count full. re-initialize br_whbase. */
35819+ kick_reinit_br_wh(sb, br);
35820+ }
35821+
35822+ /* return this error in this context */
35823+ err = vfsub_create(h_dir, &h_path, WH_MASK, /*want_excl*/true);
35824+ if (!err)
35825+ au_fhsm_wrote(sb, bindex, /*force*/0);
35826+
35827+out:
35828+ wbr_wh_read_unlock(wbr);
35829+ return err;
35830+}
35831+
35832+/* ---------------------------------------------------------------------- */
35833+
35834+/*
35835+ * create or remove the diropq.
35836+ */
35837+static struct dentry *do_diropq(struct dentry *dentry, aufs_bindex_t bindex,
35838+ unsigned int flags)
35839+{
35840+ struct dentry *opq_dentry;
35841+ struct super_block *sb;
35842+ struct au_branch *br;
35843+ struct path h_path;
35844+ int err;
35845+
35846+ sb = dentry->d_sb;
35847+ br = au_sbr(sb, bindex);
35848+ h_path.dentry = au_h_dptr(dentry, bindex);
35849+ h_path.mnt = au_br_mnt(br);
35850+ opq_dentry = vfsub_lkup_one(&diropq_name, &h_path);
35851+ if (IS_ERR(opq_dentry))
35852+ goto out;
35853+
35854+ if (au_ftest_diropq(flags, CREATE)) {
35855+ err = link_or_create_wh(sb, bindex, opq_dentry);
35856+ if (!err) {
35857+ au_set_dbdiropq(dentry, bindex);
35858+ goto out; /* success */
35859+ }
35860+ } else {
35861+ h_path.dentry = opq_dentry;
35862+ err = do_unlink_wh(au_h_iptr(d_inode(dentry), bindex), &h_path);
35863+ if (!err)
35864+ au_set_dbdiropq(dentry, -1);
35865+ }
35866+ dput(opq_dentry);
35867+ opq_dentry = ERR_PTR(err);
35868+
35869+out:
35870+ return opq_dentry;
35871+}
35872+
35873+struct do_diropq_args {
35874+ struct dentry **errp;
35875+ struct dentry *dentry;
35876+ aufs_bindex_t bindex;
35877+ unsigned int flags;
35878+};
35879+
35880+static void call_do_diropq(void *args)
35881+{
35882+ struct do_diropq_args *a = args;
35883+ *a->errp = do_diropq(a->dentry, a->bindex, a->flags);
35884+}
35885+
35886+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
35887+ unsigned int flags)
35888+{
35889+ struct dentry *diropq, *h_dentry;
35890+ struct user_namespace *h_userns;
35891+
35892+ h_userns = au_sbr_userns(dentry->d_sb, bindex);
35893+ h_dentry = au_h_dptr(dentry, bindex);
35894+ if (!au_test_h_perm_sio(h_userns, d_inode(h_dentry),
35895+ MAY_EXEC | MAY_WRITE))
35896+ diropq = do_diropq(dentry, bindex, flags);
35897+ else {
35898+ int wkq_err;
35899+ struct do_diropq_args args = {
35900+ .errp = &diropq,
35901+ .dentry = dentry,
35902+ .bindex = bindex,
35903+ .flags = flags
35904+ };
35905+
35906+ wkq_err = au_wkq_wait(call_do_diropq, &args);
35907+ if (unlikely(wkq_err))
35908+ diropq = ERR_PTR(wkq_err);
35909+ }
35910+
35911+ return diropq;
35912+}
35913+
35914+/* ---------------------------------------------------------------------- */
35915+
35916+/*
35917+ * lookup whiteout dentry.
35918+ * @h_parent: lower parent dentry which must exist and be locked
35919+ * @base_name: name of dentry which will be whiteouted
35920+ * returns dentry for whiteout.
35921+ */
35922+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
35923+ struct au_branch *br)
35924+{
35925+ int err;
35926+ struct qstr wh_name;
35927+ struct dentry *wh_dentry;
35928+ struct path h_path;
35929+
35930+ err = au_wh_name_alloc(&wh_name, base_name);
35931+ wh_dentry = ERR_PTR(err);
35932+ if (!err) {
35933+ h_path.dentry = h_parent;
35934+ h_path.mnt = au_br_mnt(br);
35935+ wh_dentry = vfsub_lkup_one(&wh_name, &h_path);
35936+ au_kfree_try_rcu(wh_name.name);
35937+ }
35938+ return wh_dentry;
35939+}
35940+
35941+/*
35942+ * link/create a whiteout for @dentry on @bindex.
35943+ */
35944+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
35945+ struct dentry *h_parent)
35946+{
35947+ struct dentry *wh_dentry;
35948+ struct super_block *sb;
35949+ int err;
35950+
35951+ sb = dentry->d_sb;
35952+ wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, au_sbr(sb, bindex));
35953+ if (!IS_ERR(wh_dentry) && d_is_negative(wh_dentry)) {
35954+ err = link_or_create_wh(sb, bindex, wh_dentry);
35955+ if (!err) {
35956+ au_set_dbwh(dentry, bindex);
35957+ au_fhsm_wrote(sb, bindex, /*force*/0);
35958+ } else {
35959+ dput(wh_dentry);
35960+ wh_dentry = ERR_PTR(err);
35961+ }
35962+ }
35963+
35964+ return wh_dentry;
35965+}
35966+
35967+/* ---------------------------------------------------------------------- */
35968+
35969+/* Delete all whiteouts in this directory on branch bindex. */
35970+static int del_wh_children(struct path *h_path, struct au_nhash *whlist,
35971+ aufs_bindex_t bindex)
35972+{
35973+ int err;
35974+ unsigned long ul, n;
35975+ struct qstr wh_name;
35976+ char *p;
35977+ struct hlist_head *head;
35978+ struct au_vdir_wh *pos;
35979+ struct au_vdir_destr *str;
35980+
35981+ err = -ENOMEM;
35982+ p = (void *)__get_free_page(GFP_NOFS);
35983+ wh_name.name = p;
35984+ if (unlikely(!wh_name.name))
35985+ goto out;
35986+
35987+ err = 0;
35988+ memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
35989+ p += AUFS_WH_PFX_LEN;
35990+ n = whlist->nh_num;
35991+ head = whlist->nh_head;
35992+ for (ul = 0; !err && ul < n; ul++, head++) {
35993+ hlist_for_each_entry(pos, head, wh_hash) {
35994+ if (pos->wh_bindex != bindex)
35995+ continue;
35996+
35997+ str = &pos->wh_str;
35998+ if (str->len + AUFS_WH_PFX_LEN <= PATH_MAX) {
35999+ memcpy(p, str->name, str->len);
36000+ wh_name.len = AUFS_WH_PFX_LEN + str->len;
36001+ err = unlink_wh_name(h_path, &wh_name);
36002+ if (!err)
36003+ continue;
36004+ break;
36005+ }
36006+ AuIOErr("whiteout name too long %.*s\n",
36007+ str->len, str->name);
36008+ err = -EIO;
36009+ break;
36010+ }
36011+ }
36012+ free_page((unsigned long)wh_name.name);
36013+
36014+out:
36015+ return err;
36016+}
36017+
36018+struct del_wh_children_args {
36019+ int *errp;
36020+ struct path *h_path;
36021+ struct au_nhash *whlist;
36022+ aufs_bindex_t bindex;
36023+};
36024+
36025+static void call_del_wh_children(void *args)
36026+{
36027+ struct del_wh_children_args *a = args;
36028+ *a->errp = del_wh_children(a->h_path, a->whlist, a->bindex);
36029+}
36030+
36031+/* ---------------------------------------------------------------------- */
36032+
36033+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp)
36034+{
36035+ struct au_whtmp_rmdir *whtmp;
36036+ int err;
36037+ unsigned int rdhash;
36038+
36039+ SiMustAnyLock(sb);
36040+
36041+ whtmp = kzalloc(sizeof(*whtmp), gfp);
36042+ if (unlikely(!whtmp)) {
36043+ whtmp = ERR_PTR(-ENOMEM);
36044+ goto out;
36045+ }
36046+
36047+ /* no estimation for dir size */
36048+ rdhash = au_sbi(sb)->si_rdhash;
36049+ if (!rdhash)
36050+ rdhash = AUFS_RDHASH_DEF;
36051+ err = au_nhash_alloc(&whtmp->whlist, rdhash, gfp);
36052+ if (unlikely(err)) {
36053+ au_kfree_rcu(whtmp);
36054+ whtmp = ERR_PTR(err);
36055+ }
36056+
36057+out:
36058+ return whtmp;
36059+}
36060+
36061+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
36062+{
36063+ if (whtmp->br)
36064+ au_lcnt_dec(&whtmp->br->br_count);
36065+ dput(whtmp->wh_dentry);
36066+ iput(whtmp->dir);
36067+ au_nhash_wh_free(&whtmp->whlist);
36068+ au_kfree_rcu(whtmp);
36069+}
36070+
36071+/*
36072+ * rmdir the whiteouted temporary named dir @h_dentry.
36073+ * @whlist: whiteouted children.
36074+ */
36075+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
36076+ struct dentry *wh_dentry, struct au_nhash *whlist)
36077+{
36078+ int err;
36079+ unsigned int h_nlink;
36080+ struct path wh_path;
36081+ struct inode *wh_inode, *h_dir;
36082+ struct au_branch *br;
36083+ struct user_namespace *h_userns;
36084+
36085+ h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
36086+ IMustLock(h_dir);
36087+
36088+ br = au_sbr(dir->i_sb, bindex);
36089+ wh_path.dentry = wh_dentry;
36090+ wh_path.mnt = au_br_mnt(br);
36091+ h_userns = au_br_userns(br);
36092+ wh_inode = d_inode(wh_dentry);
36093+ inode_lock_nested(wh_inode, AuLsc_I_CHILD);
36094+
36095+ /*
36096+ * someone else might change some whiteouts while we were sleeping.
36097+ * it means this whlist may have an obsoleted entry.
36098+ */
36099+ if (!au_test_h_perm_sio(h_userns, wh_inode, MAY_EXEC | MAY_WRITE))
36100+ err = del_wh_children(&wh_path, whlist, bindex);
36101+ else {
36102+ int wkq_err;
36103+ struct del_wh_children_args args = {
36104+ .errp = &err,
36105+ .h_path = &wh_path,
36106+ .whlist = whlist,
36107+ .bindex = bindex
36108+ };
36109+
36110+ wkq_err = au_wkq_wait(call_del_wh_children, &args);
36111+ if (unlikely(wkq_err))
36112+ err = wkq_err;
36113+ }
36114+ inode_unlock(wh_inode);
36115+
36116+ if (!err) {
36117+ h_nlink = h_dir->i_nlink;
36118+ err = vfsub_rmdir(h_dir, &wh_path);
36119+ /* some fs doesn't change the parent nlink in some cases */
36120+ h_nlink -= h_dir->i_nlink;
36121+ }
36122+
36123+ if (!err) {
36124+ if (au_ibtop(dir) == bindex) {
36125+ /* todo: dir->i_mutex is necessary */
36126+ au_cpup_attr_timesizes(dir);
36127+ if (h_nlink)
36128+ vfsub_drop_nlink(dir);
36129+ }
36130+ return 0; /* success */
36131+ }
36132+
36133+ pr_warn("failed removing %pd(%d), ignored\n", wh_dentry, err);
36134+ return err;
36135+}
36136+
36137+static void call_rmdir_whtmp(void *args)
36138+{
36139+ int err;
36140+ aufs_bindex_t bindex;
36141+ struct au_whtmp_rmdir *a = args;
36142+ struct super_block *sb;
36143+ struct dentry *h_parent;
36144+ struct inode *h_dir;
36145+ struct au_hinode *hdir;
36146+
36147+ /* rmdir by nfsd may cause deadlock with this i_mutex */
36148+ /* inode_lock(a->dir); */
36149+ err = -EROFS;
36150+ sb = a->dir->i_sb;
36151+ si_read_lock(sb, !AuLock_FLUSH);
36152+ if (!au_br_writable(a->br->br_perm))
36153+ goto out;
36154+ bindex = au_br_index(sb, a->br->br_id);
36155+ if (unlikely(bindex < 0))
36156+ goto out;
36157+
36158+ err = -EIO;
36159+ ii_write_lock_parent(a->dir);
36160+ h_parent = dget_parent(a->wh_dentry);
36161+ h_dir = d_inode(h_parent);
36162+ hdir = au_hi(a->dir, bindex);
36163+ err = vfsub_mnt_want_write(au_br_mnt(a->br));
36164+ if (unlikely(err))
36165+ goto out_mnt;
36166+ au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
36167+ err = au_h_verify(a->wh_dentry, au_opt_udba(sb), h_dir, h_parent,
36168+ a->br);
36169+ if (!err)
36170+ err = au_whtmp_rmdir(a->dir, bindex, a->wh_dentry, &a->whlist);
36171+ au_hn_inode_unlock(hdir);
36172+ vfsub_mnt_drop_write(au_br_mnt(a->br));
36173+
36174+out_mnt:
36175+ dput(h_parent);
36176+ ii_write_unlock(a->dir);
36177+out:
36178+ /* inode_unlock(a->dir); */
36179+ au_whtmp_rmdir_free(a);
36180+ si_read_unlock(sb);
36181+ au_nwt_done(&au_sbi(sb)->si_nowait);
36182+ if (unlikely(err))
36183+ AuIOErr("err %d\n", err);
36184+}
36185+
36186+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
36187+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args)
36188+{
36189+ int wkq_err;
36190+ struct super_block *sb;
36191+
36192+ IMustLock(dir);
36193+
36194+ /* all post-process will be done in do_rmdir_whtmp(). */
36195+ sb = dir->i_sb;
36196+ args->dir = au_igrab(dir);
36197+ args->br = au_sbr(sb, bindex);
36198+ au_lcnt_inc(&args->br->br_count);
36199+ args->wh_dentry = dget(wh_dentry);
36200+ wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
36201+ if (unlikely(wkq_err)) {
36202+ pr_warn("rmdir error %pd (%d), ignored\n", wh_dentry, wkq_err);
36203+ au_whtmp_rmdir_free(args);
36204+ }
36205+}
36206diff -urN /usr/share/empty/fs/aufs/whout.h linux/fs/aufs/whout.h
36207--- /usr/share/empty/fs/aufs/whout.h 1970-01-01 01:00:00.000000000 +0100
36208+++ linux/fs/aufs/whout.h 2022-11-05 23:02:18.972555950 +0100
36209@@ -0,0 +1,87 @@
36210+/* SPDX-License-Identifier: GPL-2.0 */
36211+/*
36212+ * Copyright (C) 2005-2022 Junjiro R. Okajima
36213+ *
36214+ * This program is free software; you can redistribute it and/or modify
36215+ * it under the terms of the GNU General Public License as published by
36216+ * the Free Software Foundation; either version 2 of the License, or
36217+ * (at your option) any later version.
36218+ *
36219+ * This program is distributed in the hope that it will be useful,
36220+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36221+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36222+ * GNU General Public License for more details.
36223+ *
36224+ * You should have received a copy of the GNU General Public License
36225+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
36226+ */
36227+
36228+/*
36229+ * whiteout for logical deletion and opaque directory
36230+ */
36231+
36232+#ifndef __AUFS_WHOUT_H__
36233+#define __AUFS_WHOUT_H__
36234+
36235+#ifdef __KERNEL__
36236+
36237+#include "dir.h"
36238+
36239+/* whout.c */
36240+int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
36241+int au_wh_test(struct user_namespace *h_userns, struct path *h_ppath,
36242+ struct qstr *wh_name, int try_sio);
36243+int au_diropq_test(struct user_namespace *h_userns, struct path *h_path);
36244+struct au_branch;
36245+struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
36246+ struct qstr *prefix);
36247+int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
36248+int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
36249+ struct dentry *dentry);
36250+int au_wh_init(struct au_branch *br, struct super_block *sb);
36251+
36252+/* diropq flags */
36253+#define AuDiropq_CREATE 1
36254+#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
36255+#define au_fset_diropq(flags, name) \
36256+ do { (flags) |= AuDiropq_##name; } while (0)
36257+#define au_fclr_diropq(flags, name) \
36258+ do { (flags) &= ~AuDiropq_##name; } while (0)
36259+
36260+struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
36261+ unsigned int flags);
36262+struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
36263+ struct au_branch *br);
36264+struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
36265+ struct dentry *h_parent);
36266+
36267+/* real rmdir for the whiteout-ed dir */
36268+struct au_whtmp_rmdir {
36269+ struct inode *dir;
36270+ struct au_branch *br;
36271+ struct dentry *wh_dentry;
36272+ struct au_nhash whlist;
36273+};
36274+
36275+struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
36276+void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
36277+int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
36278+ struct dentry *wh_dentry, struct au_nhash *whlist);
36279+void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
36280+ struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
36281+
36282+/* ---------------------------------------------------------------------- */
36283+
36284+static inline struct dentry *au_diropq_create(struct dentry *dentry,
36285+ aufs_bindex_t bindex)
36286+{
36287+ return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
36288+}
36289+
36290+static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
36291+{
36292+ return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
36293+}
36294+
36295+#endif /* __KERNEL__ */
36296+#endif /* __AUFS_WHOUT_H__ */
36297diff -urN /usr/share/empty/fs/aufs/wkq.c linux/fs/aufs/wkq.c
36298--- /usr/share/empty/fs/aufs/wkq.c 1970-01-01 01:00:00.000000000 +0100
36299+++ linux/fs/aufs/wkq.c 2022-11-05 23:02:18.972555950 +0100
36300@@ -0,0 +1,372 @@
36301+// SPDX-License-Identifier: GPL-2.0
36302+/*
36303+ * Copyright (C) 2005-2022 Junjiro R. Okajima
36304+ *
36305+ * This program is free software; you can redistribute it and/or modify
36306+ * it under the terms of the GNU General Public License as published by
36307+ * the Free Software Foundation; either version 2 of the License, or
36308+ * (at your option) any later version.
36309+ *
36310+ * This program is distributed in the hope that it will be useful,
36311+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36312+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36313+ * GNU General Public License for more details.
36314+ *
36315+ * You should have received a copy of the GNU General Public License
36316+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
36317+ */
36318+
36319+/*
36320+ * workqueue for asynchronous/super-io operations
36321+ * todo: try new credential scheme
36322+ */
36323+
36324+#include <linux/module.h>
36325+#include "aufs.h"
36326+
36327+/* internal workqueue named AUFS_WKQ_NAME */
36328+
36329+static struct workqueue_struct *au_wkq;
36330+
36331+struct au_wkinfo {
36332+ struct work_struct wk;
36333+ struct kobject *kobj;
36334+
36335+ unsigned int flags; /* see wkq.h */
36336+
36337+ au_wkq_func_t func;
36338+ void *args;
36339+
36340+#ifdef CONFIG_LOCKDEP
36341+ int dont_check;
36342+ struct held_lock **hlock;
36343+#endif
36344+
36345+ struct completion *comp;
36346+};
36347+
36348+/* ---------------------------------------------------------------------- */
36349+/*
36350+ * Aufs passes some operations to the workqueue such as the internal copyup.
36351+ * This scheme looks rather unnatural for LOCKDEP debugging feature, since the
36352+ * job run by workqueue depends upon the locks acquired in the other task.
36353+ * Delegating a small operation to the workqueue, aufs passes its lockdep
36354+ * information too. And the job in the workqueue restores the info in order to
36355+ * pretend as if it acquired those locks. This is just to make LOCKDEP work
36356+ * correctly and expectedly.
36357+ */
36358+
36359+#ifndef CONFIG_LOCKDEP
36360+AuStubInt0(au_wkq_lockdep_alloc, struct au_wkinfo *wkinfo);
36361+AuStubVoid(au_wkq_lockdep_free, struct au_wkinfo *wkinfo);
36362+AuStubVoid(au_wkq_lockdep_pre, struct au_wkinfo *wkinfo);
36363+AuStubVoid(au_wkq_lockdep_post, struct au_wkinfo *wkinfo);
36364+AuStubVoid(au_wkq_lockdep_init, struct au_wkinfo *wkinfo);
36365+#else
36366+static void au_wkq_lockdep_init(struct au_wkinfo *wkinfo)
36367+{
36368+ wkinfo->hlock = NULL;
36369+ wkinfo->dont_check = 0;
36370+}
36371+
36372+/*
36373+ * 1: matched
36374+ * 0: unmatched
36375+ */
36376+static int au_wkq_lockdep_test(struct lock_class_key *key, const char *name)
36377+{
36378+ static DEFINE_SPINLOCK(spin);
36379+ static struct {
36380+ char *name;
36381+ struct lock_class_key *key;
36382+ } a[] = {
36383+ { .name = "&sbinfo->si_rwsem" },
36384+ { .name = "&finfo->fi_rwsem" },
36385+ { .name = "&dinfo->di_rwsem" },
36386+ { .name = "&iinfo->ii_rwsem" }
36387+ };
36388+ static int set;
36389+ int i;
36390+
36391+ /* lockless read from 'set.' see below */
36392+ if (set == ARRAY_SIZE(a)) {
36393+ for (i = 0; i < ARRAY_SIZE(a); i++)
36394+ if (a[i].key == key)
36395+ goto match;
36396+ goto unmatch;
36397+ }
36398+
36399+ spin_lock(&spin);
36400+ if (set)
36401+ for (i = 0; i < ARRAY_SIZE(a); i++)
36402+ if (a[i].key == key) {
36403+ spin_unlock(&spin);
36404+ goto match;
36405+ }
36406+ for (i = 0; i < ARRAY_SIZE(a); i++) {
36407+ if (a[i].key) {
36408+ if (unlikely(a[i].key == key)) { /* rare but possible */
36409+ spin_unlock(&spin);
36410+ goto match;
36411+ } else
36412+ continue;
36413+ }
36414+ if (strstr(a[i].name, name)) {
36415+ /*
36416+ * the order of these three lines is important for the
36417+ * lockless read above.
36418+ */
36419+ a[i].key = key;
36420+ spin_unlock(&spin);
36421+ set++;
36422+ /* AuDbg("%d, %s\n", set, name); */
36423+ goto match;
36424+ }
36425+ }
36426+ spin_unlock(&spin);
36427+ goto unmatch;
36428+
36429+match:
36430+ return 1;
36431+unmatch:
36432+ return 0;
36433+}
36434+
36435+static int au_wkq_lockdep_alloc(struct au_wkinfo *wkinfo)
36436+{
36437+ int err, n;
36438+ struct task_struct *curr;
36439+ struct held_lock **hl, *held_locks, *p;
36440+
36441+ err = 0;
36442+ curr = current;
36443+ wkinfo->dont_check = lockdep_recursing(curr);
36444+ if (wkinfo->dont_check)
36445+ goto out;
36446+ n = curr->lockdep_depth;
36447+ if (!n)
36448+ goto out;
36449+
36450+ err = -ENOMEM;
36451+ wkinfo->hlock = kmalloc_array(n + 1, sizeof(*wkinfo->hlock), GFP_NOFS);
36452+ if (unlikely(!wkinfo->hlock))
36453+ goto out;
36454+
36455+ err = 0;
36456+#if 0 /* left for debugging */
36457+ if (0 && au_debug_test())
36458+ lockdep_print_held_locks(curr);
36459+#endif
36460+ held_locks = curr->held_locks;
36461+ hl = wkinfo->hlock;
36462+ while (n--) {
36463+ p = held_locks++;
36464+ if (au_wkq_lockdep_test(p->instance->key, p->instance->name))
36465+ *hl++ = p;
36466+ }
36467+ *hl = NULL;
36468+
36469+out:
36470+ return err;
36471+}
36472+
36473+static void au_wkq_lockdep_free(struct au_wkinfo *wkinfo)
36474+{
36475+ au_kfree_try_rcu(wkinfo->hlock);
36476+}
36477+
36478+static void au_wkq_lockdep_pre(struct au_wkinfo *wkinfo)
36479+{
36480+ struct held_lock *p, **hl = wkinfo->hlock;
36481+ int subclass;
36482+
36483+ if (wkinfo->dont_check)
36484+ lockdep_off();
36485+ if (!hl)
36486+ return;
36487+ while ((p = *hl++)) { /* assignment */
36488+ subclass = lockdep_hlock_class(p)->subclass;
36489+ /* AuDbg("%s, %d\n", p->instance->name, subclass); */
36490+ if (p->read)
36491+ rwsem_acquire_read(p->instance, subclass, 0,
36492+ /*p->acquire_ip*/_RET_IP_);
36493+ else
36494+ rwsem_acquire(p->instance, subclass, 0,
36495+ /*p->acquire_ip*/_RET_IP_);
36496+ }
36497+}
36498+
36499+static void au_wkq_lockdep_post(struct au_wkinfo *wkinfo)
36500+{
36501+ struct held_lock *p, **hl = wkinfo->hlock;
36502+
36503+ if (wkinfo->dont_check)
36504+ lockdep_on();
36505+ if (!hl)
36506+ return;
36507+ while ((p = *hl++)) /* assignment */
36508+ rwsem_release(p->instance, /*p->acquire_ip*/_RET_IP_);
36509+}
36510+#endif
36511+
36512+static void wkq_func(struct work_struct *wk)
36513+{
36514+ struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
36515+
36516+ AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
36517+ AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
36518+
36519+ au_wkq_lockdep_pre(wkinfo);
36520+ wkinfo->func(wkinfo->args);
36521+ au_wkq_lockdep_post(wkinfo);
36522+ if (au_ftest_wkq(wkinfo->flags, WAIT))
36523+ complete(wkinfo->comp);
36524+ else {
36525+ kobject_put(wkinfo->kobj);
36526+ module_put(THIS_MODULE); /* todo: ?? */
36527+ au_kfree_rcu(wkinfo);
36528+ }
36529+}
36530+
36531+/*
36532+ * Since struct completion is large, try allocating it dynamically.
36533+ */
36534+#define AuWkqCompDeclare(name) struct completion *comp = NULL
36535+
36536+static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
36537+{
36538+ *comp = kmalloc(sizeof(**comp), GFP_NOFS);
36539+ if (*comp) {
36540+ init_completion(*comp);
36541+ wkinfo->comp = *comp;
36542+ return 0;
36543+ }
36544+ return -ENOMEM;
36545+}
36546+
36547+static void au_wkq_comp_free(struct completion *comp)
36548+{
36549+ au_kfree_rcu(comp);
36550+}
36551+
36552+static void au_wkq_run(struct au_wkinfo *wkinfo)
36553+{
36554+ if (au_ftest_wkq(wkinfo->flags, NEST)) {
36555+ if (au_wkq_test()) {
36556+ AuWarn1("wkq from wkq, unless silly-rename on NFS,"
36557+ " due to a dead dir by UDBA,"
36558+ " or async xino write?\n");
36559+ AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
36560+ }
36561+ } else
36562+ au_dbg_verify_kthread();
36563+
36564+ if (au_ftest_wkq(wkinfo->flags, WAIT)) {
36565+ INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
36566+ queue_work(au_wkq, &wkinfo->wk);
36567+ } else {
36568+ INIT_WORK(&wkinfo->wk, wkq_func);
36569+ schedule_work(&wkinfo->wk);
36570+ }
36571+}
36572+
36573+/*
36574+ * Be careful. It is easy to make deadlock happen.
36575+ * processA: lock, wkq and wait
36576+ * processB: wkq and wait, lock in wkq
36577+ * --> deadlock
36578+ */
36579+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
36580+{
36581+ int err;
36582+ AuWkqCompDeclare(comp);
36583+ struct au_wkinfo wkinfo = {
36584+ .flags = flags,
36585+ .func = func,
36586+ .args = args
36587+ };
36588+
36589+ err = au_wkq_comp_alloc(&wkinfo, &comp);
36590+ if (unlikely(err))
36591+ goto out;
36592+ err = au_wkq_lockdep_alloc(&wkinfo);
36593+ if (unlikely(err))
36594+ goto out_comp;
36595+ if (!err) {
36596+ au_wkq_run(&wkinfo);
36597+ /* no timeout, no interrupt */
36598+ wait_for_completion(wkinfo.comp);
36599+ }
36600+ au_wkq_lockdep_free(&wkinfo);
36601+
36602+out_comp:
36603+ au_wkq_comp_free(comp);
36604+out:
36605+ destroy_work_on_stack(&wkinfo.wk);
36606+ return err;
36607+}
36608+
36609+/*
36610+ * Note: dget/dput() in func for aufs dentries are not supported. It will be a
36611+ * problem in a concurrent umounting.
36612+ */
36613+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
36614+ unsigned int flags)
36615+{
36616+ int err;
36617+ struct au_wkinfo *wkinfo;
36618+
36619+ atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
36620+
36621+ /*
36622+ * wkq_func() must free this wkinfo.
36623+ * it highly depends upon the implementation of workqueue.
36624+ */
36625+ err = 0;
36626+ wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
36627+ if (wkinfo) {
36628+ wkinfo->kobj = &au_sbi(sb)->si_kobj;
36629+ wkinfo->flags = flags & ~AuWkq_WAIT;
36630+ wkinfo->func = func;
36631+ wkinfo->args = args;
36632+ wkinfo->comp = NULL;
36633+ au_wkq_lockdep_init(wkinfo);
36634+ kobject_get(wkinfo->kobj);
36635+ __module_get(THIS_MODULE); /* todo: ?? */
36636+
36637+ au_wkq_run(wkinfo);
36638+ } else {
36639+ err = -ENOMEM;
36640+ au_nwt_done(&au_sbi(sb)->si_nowait);
36641+ }
36642+
36643+ return err;
36644+}
36645+
36646+/* ---------------------------------------------------------------------- */
36647+
36648+void au_nwt_init(struct au_nowait_tasks *nwt)
36649+{
36650+ atomic_set(&nwt->nw_len, 0);
36651+ /* smp_mb(); */ /* atomic_set */
36652+ init_waitqueue_head(&nwt->nw_wq);
36653+}
36654+
36655+void au_wkq_fin(void)
36656+{
36657+ destroy_workqueue(au_wkq);
36658+}
36659+
36660+int __init au_wkq_init(void)
36661+{
36662+ int err;
36663+
36664+ err = 0;
36665+ au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
36666+ if (IS_ERR(au_wkq))
36667+ err = PTR_ERR(au_wkq);
36668+ else if (!au_wkq)
36669+ err = -ENOMEM;
36670+
36671+ return err;
36672+}
36673diff -urN /usr/share/empty/fs/aufs/wkq.h linux/fs/aufs/wkq.h
36674--- /usr/share/empty/fs/aufs/wkq.h 1970-01-01 01:00:00.000000000 +0100
36675+++ linux/fs/aufs/wkq.h 2022-11-05 23:02:18.972555950 +0100
36676@@ -0,0 +1,89 @@
36677+/* SPDX-License-Identifier: GPL-2.0 */
36678+/*
36679+ * Copyright (C) 2005-2022 Junjiro R. Okajima
36680+ *
36681+ * This program is free software; you can redistribute it and/or modify
36682+ * it under the terms of the GNU General Public License as published by
36683+ * the Free Software Foundation; either version 2 of the License, or
36684+ * (at your option) any later version.
36685+ *
36686+ * This program is distributed in the hope that it will be useful,
36687+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36688+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36689+ * GNU General Public License for more details.
36690+ *
36691+ * You should have received a copy of the GNU General Public License
36692+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
36693+ */
36694+
36695+/*
36696+ * workqueue for asynchronous/super-io operations
36697+ * todo: try new credentials management scheme
36698+ */
36699+
36700+#ifndef __AUFS_WKQ_H__
36701+#define __AUFS_WKQ_H__
36702+
36703+#ifdef __KERNEL__
36704+
36705+#include <linux/wait.h>
36706+
36707+struct super_block;
36708+
36709+/* ---------------------------------------------------------------------- */
36710+
36711+/*
36712+ * in the next operation, wait for the 'nowait' tasks in system-wide workqueue
36713+ */
36714+struct au_nowait_tasks {
36715+ atomic_t nw_len;
36716+ wait_queue_head_t nw_wq;
36717+};
36718+
36719+/* ---------------------------------------------------------------------- */
36720+
36721+typedef void (*au_wkq_func_t)(void *args);
36722+
36723+/* wkq flags */
36724+#define AuWkq_WAIT 1
36725+#define AuWkq_NEST (1 << 1)
36726+#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
36727+#define au_fset_wkq(flags, name) \
36728+ do { (flags) |= AuWkq_##name; } while (0)
36729+#define au_fclr_wkq(flags, name) \
36730+ do { (flags) &= ~AuWkq_##name; } while (0)
36731+
36732+/* wkq.c */
36733+int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
36734+int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
36735+ unsigned int flags);
36736+void au_nwt_init(struct au_nowait_tasks *nwt);
36737+int __init au_wkq_init(void);
36738+void au_wkq_fin(void);
36739+
36740+/* ---------------------------------------------------------------------- */
36741+
36742+static inline int au_wkq_test(void)
36743+{
36744+ return current->flags & PF_WQ_WORKER;
36745+}
36746+
36747+static inline int au_wkq_wait(au_wkq_func_t func, void *args)
36748+{
36749+ return au_wkq_do_wait(AuWkq_WAIT, func, args);
36750+}
36751+
36752+static inline void au_nwt_done(struct au_nowait_tasks *nwt)
36753+{
36754+ if (atomic_dec_and_test(&nwt->nw_len))
36755+ wake_up_all(&nwt->nw_wq);
36756+}
36757+
36758+static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
36759+{
36760+ wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
36761+ return 0;
36762+}
36763+
36764+#endif /* __KERNEL__ */
36765+#endif /* __AUFS_WKQ_H__ */
36766diff -urN /usr/share/empty/fs/aufs/xattr.c linux/fs/aufs/xattr.c
36767--- /usr/share/empty/fs/aufs/xattr.c 1970-01-01 01:00:00.000000000 +0100
36768+++ linux/fs/aufs/xattr.c 2023-02-20 21:05:51.959693785 +0100
36769@@ -0,0 +1,365 @@
36770+// SPDX-License-Identifier: GPL-2.0
36771+/*
36772+ * Copyright (C) 2014-2022 Junjiro R. Okajima
36773+ *
36774+ * This program is free software; you can redistribute it and/or modify
36775+ * it under the terms of the GNU General Public License as published by
36776+ * the Free Software Foundation; either version 2 of the License, or
36777+ * (at your option) any later version.
36778+ *
36779+ * This program is distributed in the hope that it will be useful,
36780+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
36781+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36782+ * GNU General Public License for more details.
36783+ *
36784+ * You should have received a copy of the GNU General Public License
36785+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
36786+ */
36787+
36788+/*
36789+ * handling xattr functions
36790+ */
36791+
36792+#include <linux/fs.h>
36793+#include <linux/posix_acl_xattr.h>
36794+#include <linux/xattr.h>
36795+#include "aufs.h"
36796+
36797+static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
36798+{
36799+ if (!ignore_flags)
36800+ goto out;
36801+ switch (err) {
36802+ case -ENOMEM:
36803+ case -EDQUOT:
36804+ goto out;
36805+ }
36806+
36807+ if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
36808+ err = 0;
36809+ goto out;
36810+ }
36811+
36812+#define cmp(brattr, prefix) do { \
36813+ if (!strncmp(name, XATTR_##prefix##_PREFIX, \
36814+ XATTR_##prefix##_PREFIX_LEN)) { \
36815+ if (ignore_flags & AuBrAttr_ICEX_##brattr) \
36816+ err = 0; \
36817+ goto out; \
36818+ } \
36819+ } while (0)
36820+
36821+ cmp(SEC, SECURITY);
36822+ cmp(SYS, SYSTEM);
36823+ cmp(TR, TRUSTED);
36824+ cmp(USR, USER);
36825+#undef cmp
36826+
36827+ if (ignore_flags & AuBrAttr_ICEX_OTH)
36828+ err = 0;
36829+
36830+out:
36831+ return err;
36832+}
36833+
36834+static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
36835+
36836+static int au_do_cpup_xattr(struct path *h_dst, struct path *h_src,
36837+ char *name, char **buf, unsigned int ignore_flags,
36838+ unsigned int verbose)
36839+{
36840+ int err, is_acl;
36841+ ssize_t ssz;
36842+ struct inode *h_idst;
36843+ struct dentry *h_dst_dentry, *h_src_dentry;
36844+ struct user_namespace *h_dst_userns, *h_src_userns;
36845+ struct posix_acl *acl;
36846+
36847+ is_acl = !!is_posix_acl_xattr(name);
36848+ h_src_userns = mnt_user_ns(h_src->mnt);
36849+ h_src_dentry = h_src->dentry;
36850+ if (is_acl) {
36851+ acl = vfs_get_acl(h_src_userns, h_src_dentry, name);
36852+ AuDebugOn(!acl);
36853+ if (unlikely(IS_ERR(acl))) {
36854+ err = PTR_ERR(acl);
36855+ if (err == -ENODATA)
36856+ err = 0;
36857+ else if (err == -EOPNOTSUPP
36858+ && au_test_nfs_noacl(d_inode(h_src_dentry)))
36859+ err = 0;
36860+ else if (verbose || au_debug_test())
36861+ pr_err("%s, err %d\n", name, err);
36862+ goto out;
36863+ }
36864+ } else {
36865+ ssz = vfs_getxattr_alloc(h_src_userns, h_src_dentry, name, buf,
36866+ 0, GFP_NOFS);
36867+ if (unlikely(ssz <= 0)) {
36868+ err = ssz;
36869+ if (err == -ENODATA)
36870+ err = 0;
36871+ else if (err == -EOPNOTSUPP
36872+ && (ignore_flags & au_xattr_out_of_list))
36873+ err = 0;
36874+ else if (err && (verbose || au_debug_test()))
36875+ pr_err("%s, err %d\n", name, err);
36876+ goto out;
36877+ }
36878+ }
36879+
36880+ /* unlock it temporary */
36881+ h_dst_userns = mnt_user_ns(h_dst->mnt);
36882+ h_dst_dentry = h_dst->dentry;
36883+ h_idst = d_inode(h_dst_dentry);
36884+ inode_unlock(h_idst);
36885+ if (is_acl) {
36886+ err = vfsub_set_acl(h_dst_userns, h_dst_dentry, name, acl);
36887+ posix_acl_release(acl);
36888+ } else
36889+ err = vfsub_setxattr(h_dst_userns, h_dst_dentry, name, *buf,
36890+ ssz, /*flags*/0);
36891+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
36892+ if (unlikely(err)) {
36893+ if (verbose || au_debug_test())
36894+ pr_err("%s, err %d\n", name, err);
36895+ err = au_xattr_ignore(err, name, ignore_flags);
36896+ }
36897+
36898+out:
36899+ return err;
36900+}
36901+
36902+int au_cpup_xattr(struct path *h_dst, struct path *h_src, int ignore_flags,
36903+ unsigned int verbose)
36904+{
36905+ int err, unlocked;
36906+ ssize_t ssz;
36907+ struct dentry *h_dst_dentry, *h_src_dentry;
36908+ struct inode *h_isrc, *h_idst;
36909+ char *value, *p, *o, *e;
36910+
36911+ /* try stopping to update the source inode while we are referencing */
36912+ /* there should not be the parent-child relationship between them */
36913+ h_dst_dentry = h_dst->dentry;
36914+ h_idst = d_inode(h_dst_dentry);
36915+ h_src_dentry = h_src->dentry;
36916+ h_isrc = d_inode(h_src_dentry);
36917+ inode_unlock(h_idst);
36918+ inode_lock_shared_nested(h_isrc, AuLsc_I_CHILD);
36919+ inode_lock_nested(h_idst, AuLsc_I_CHILD2);
36920+ unlocked = 0;
36921+
36922+ /* some filesystems don't list POSIX ACL, for example tmpfs */
36923+ ssz = vfs_listxattr(h_src_dentry, NULL, 0);
36924+ err = ssz;
36925+ if (unlikely(err < 0)) {
36926+ AuTraceErr(err);
36927+ if (err == -ENODATA
36928+ || err == -EOPNOTSUPP)
36929+ err = 0; /* ignore */
36930+ goto out;
36931+ }
36932+
36933+ err = 0;
36934+ p = NULL;
36935+ o = NULL;
36936+ if (ssz) {
36937+ err = -ENOMEM;
36938+ p = kmalloc(ssz, GFP_NOFS);
36939+ o = p;
36940+ if (unlikely(!p))
36941+ goto out;
36942+ err = vfs_listxattr(h_src_dentry, p, ssz);
36943+ }
36944+ inode_unlock_shared(h_isrc);
36945+ unlocked = 1;
36946+ AuDbg("err %d, ssz %zd\n", err, ssz);
36947+ if (unlikely(err < 0))
36948+ goto out_free;
36949+
36950+ err = 0;
36951+ e = p + ssz;
36952+ value = NULL;
36953+ ignore_flags |= au_xattr_out_of_list;
36954+ while (!err && p < e) {
36955+ err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
36956+ verbose);
36957+ p += strlen(p) + 1;
36958+ }
36959+ au_kfree_try_rcu(value);
36960+
36961+out_free:
36962+ au_kfree_try_rcu(o);
36963+out:
36964+ if (!unlocked)
36965+ inode_unlock_shared(h_isrc);
36966+ AuTraceErr(err);
36967+ return err;
36968+}
36969+
36970+/* ---------------------------------------------------------------------- */
36971+
36972+static int au_smack_reentering(struct super_block *sb)
36973+{
36974+#if IS_ENABLED(CONFIG_SECURITY_SMACK) || IS_ENABLED(CONFIG_SECURITY_SELINUX)
36975+ /*
36976+ * as a part of lookup, smack_d_instantiate() is called, and it calls
36977+ * i_op->getxattr(). ouch.
36978+ */
36979+ return si_pid_test(sb);
36980+#else
36981+ return 0;
36982+#endif
36983+}
36984+
36985+enum {
36986+ AU_XATTR_LIST,
36987+ AU_XATTR_GET
36988+};
36989+
36990+struct au_lgxattr {
36991+ int type;
36992+ union {
36993+ struct {
36994+ char *list;
36995+ size_t size;
36996+ } list;
36997+ struct {
36998+ const char *name;
36999+ void *value;
37000+ size_t size;
37001+ } get;
37002+ } u;
37003+};
37004+
37005+static ssize_t au_lgxattr(struct dentry *dentry, struct inode *inode,
37006+ struct au_lgxattr *arg)
37007+{
37008+ ssize_t err;
37009+ int reenter;
37010+ struct path h_path;
37011+ struct super_block *sb;
37012+
37013+ sb = dentry->d_sb;
37014+ reenter = au_smack_reentering(sb);
37015+ if (!reenter) {
37016+ err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
37017+ if (unlikely(err))
37018+ goto out;
37019+ }
37020+ err = au_h_path_getattr(dentry, inode, /*force*/1, &h_path, reenter);
37021+ if (unlikely(err))
37022+ goto out_si;
37023+ if (unlikely(!h_path.dentry))
37024+ /* illegally overlapped or something */
37025+ goto out_di; /* pretending success */
37026+
37027+ /* always topmost entry only */
37028+ switch (arg->type) {
37029+ case AU_XATTR_LIST:
37030+ err = vfs_listxattr(h_path.dentry,
37031+ arg->u.list.list, arg->u.list.size);
37032+ break;
37033+ case AU_XATTR_GET:
37034+ AuDebugOn(d_is_negative(h_path.dentry));
37035+ err = vfs_getxattr(mnt_user_ns(h_path.mnt), h_path.dentry,
37036+ arg->u.get.name, arg->u.get.value,
37037+ arg->u.get.size);
37038+ break;
37039+ }
37040+
37041+out_di:
37042+ if (!reenter)
37043+ di_read_unlock(dentry, AuLock_IR);
37044+out_si:
37045+ if (!reenter)
37046+ si_read_unlock(sb);
37047+out:
37048+ AuTraceErr(err);
37049+ return err;
37050+}
37051+
37052+ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
37053+{
37054+ struct au_lgxattr arg = {
37055+ .type = AU_XATTR_LIST,
37056+ .u.list = {
37057+ .list = list,
37058+ .size = size
37059+ },
37060+ };
37061+
37062+ return au_lgxattr(dentry, /*inode*/NULL, &arg);
37063+}
37064+
37065+static ssize_t au_getxattr(struct dentry *dentry, struct inode *inode,
37066+ const char *name, void *value, size_t size)
37067+{
37068+ struct au_lgxattr arg = {
37069+ .type = AU_XATTR_GET,
37070+ .u.get = {
37071+ .name = name,
37072+ .value = value,
37073+ .size = size
37074+ },
37075+ };
37076+
37077+ return au_lgxattr(dentry, inode, &arg);
37078+}
37079+
37080+static int au_setxattr(struct dentry *dentry, struct inode *inode,
37081+ const char *name, const void *value, size_t size,
37082+ int flags)
37083+{
37084+ struct au_sxattr arg = {
37085+ .type = AU_XATTR_SET,
37086+ .u.set = {
37087+ .name = name,
37088+ .value = value,
37089+ .size = size,
37090+ .flags = flags
37091+ },
37092+ };
37093+
37094+ return au_sxattr(dentry, inode, &arg);
37095+}
37096+
37097+/* ---------------------------------------------------------------------- */
37098+
37099+static int au_xattr_get(const struct xattr_handler *handler,
37100+ struct dentry *dentry, struct inode *inode,
37101+ const char *name, void *buffer, size_t size)
37102+{
37103+ return au_getxattr(dentry, inode, name, buffer, size);
37104+}
37105+
37106+static int au_xattr_set(const struct xattr_handler *handler,
37107+ struct user_namespace *userns,
37108+ struct dentry *dentry, struct inode *inode,
37109+ const char *name, const void *value, size_t size,
37110+ int flags)
37111+{
37112+ return au_setxattr(dentry, inode, name, value, size, flags);
37113+}
37114+
37115+static const struct xattr_handler au_xattr_handler = {
37116+ .name = "",
37117+ .prefix = "",
37118+ .get = au_xattr_get,
37119+ .set = au_xattr_set
37120+};
37121+
37122+static const struct xattr_handler *au_xattr_handlers[] = {
37123+#ifdef CONFIG_FS_POSIX_ACL
37124+ &posix_acl_access_xattr_handler,
37125+ &posix_acl_default_xattr_handler,
37126+#endif
37127+ &au_xattr_handler, /* must be last */
37128+ NULL
37129+};
37130+
37131+void au_xattr_init(struct super_block *sb)
37132+{
37133+ sb->s_xattr = au_xattr_handlers;
37134+}
37135diff -urN /usr/share/empty/fs/aufs/xino.c linux/fs/aufs/xino.c
37136--- /usr/share/empty/fs/aufs/xino.c 1970-01-01 01:00:00.000000000 +0100
37137+++ linux/fs/aufs/xino.c 2022-11-05 23:02:18.972555950 +0100
37138@@ -0,0 +1,1926 @@
37139+// SPDX-License-Identifier: GPL-2.0
37140+/*
37141+ * Copyright (C) 2005-2022 Junjiro R. Okajima
37142+ *
37143+ * This program is free software; you can redistribute it and/or modify
37144+ * it under the terms of the GNU General Public License as published by
37145+ * the Free Software Foundation; either version 2 of the License, or
37146+ * (at your option) any later version.
37147+ *
37148+ * This program is distributed in the hope that it will be useful,
37149+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
37150+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37151+ * GNU General Public License for more details.
37152+ *
37153+ * You should have received a copy of the GNU General Public License
37154+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
37155+ */
37156+
37157+/*
37158+ * external inode number translation table and bitmap
37159+ *
37160+ * things to consider
37161+ * - the lifetime
37162+ * + au_xino object
37163+ * + XINO files (xino, xib, xigen)
37164+ * + dynamic debugfs entries (xiN)
37165+ * + static debugfs entries (xib, xigen)
37166+ * + static sysfs entry (xi_path)
37167+ * - several entry points to handle them.
37168+ * + mount(2) without xino option (default)
37169+ * + mount(2) with xino option
37170+ * + mount(2) with noxino option
37171+ * + umount(2)
37172+ * + remount with add/del branches
37173+ * + remount with xino/noxino options
37174+ */
37175+
37176+#include <linux/seq_file.h>
37177+#include <linux/statfs.h>
37178+#include "aufs.h"
37179+
37180+static aufs_bindex_t sbr_find_shared(struct super_block *sb, aufs_bindex_t btop,
37181+ aufs_bindex_t bbot,
37182+ struct super_block *h_sb)
37183+{
37184+ /* todo: try binary-search if the branches are many */
37185+ for (; btop <= bbot; btop++)
37186+ if (h_sb == au_sbr_sb(sb, btop))
37187+ return btop;
37188+ return -1;
37189+}
37190+
37191+/*
37192+ * find another branch who is on the same filesystem of the specified
37193+ * branch{@btgt}. search until @bbot.
37194+ */
37195+static aufs_bindex_t is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
37196+ aufs_bindex_t bbot)
37197+{
37198+ aufs_bindex_t bindex;
37199+ struct super_block *tgt_sb;
37200+
37201+ tgt_sb = au_sbr_sb(sb, btgt);
37202+ bindex = sbr_find_shared(sb, /*btop*/0, btgt - 1, tgt_sb);
37203+ if (bindex < 0)
37204+ bindex = sbr_find_shared(sb, btgt + 1, bbot, tgt_sb);
37205+
37206+ return bindex;
37207+}
37208+
37209+/* ---------------------------------------------------------------------- */
37210+
37211+/*
37212+ * stop unnecessary notify events at creating xino files
37213+ */
37214+
37215+aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry)
37216+{
37217+ aufs_bindex_t bfound, bindex, bbot;
37218+ struct dentry *parent;
37219+ struct au_branch *br;
37220+
37221+ bfound = -1;
37222+ parent = dentry->d_parent; /* safe d_parent access */
37223+ bbot = au_sbbot(sb);
37224+ for (bindex = 0; bindex <= bbot; bindex++) {
37225+ br = au_sbr(sb, bindex);
37226+ if (au_br_dentry(br) == parent) {
37227+ bfound = bindex;
37228+ break;
37229+ }
37230+ }
37231+
37232+ AuDbg("bfound b%d\n", bfound);
37233+ return bfound;
37234+}
37235+
37236+struct au_xino_lock_dir {
37237+ struct au_hinode *hdir;
37238+ struct dentry *parent;
37239+ struct inode *dir;
37240+};
37241+
37242+static struct dentry *au_dget_parent_lock(struct dentry *dentry,
37243+ unsigned int lsc)
37244+{
37245+ struct dentry *parent;
37246+ struct inode *dir;
37247+
37248+ parent = dget_parent(dentry);
37249+ dir = d_inode(parent);
37250+ inode_lock_nested(dir, lsc);
37251+#if 0 /* it should not happen */
37252+ spin_lock(&dentry->d_lock);
37253+ if (unlikely(dentry->d_parent != parent)) {
37254+ spin_unlock(&dentry->d_lock);
37255+ inode_unlock(dir);
37256+ dput(parent);
37257+ parent = NULL;
37258+ goto out;
37259+ }
37260+ spin_unlock(&dentry->d_lock);
37261+
37262+out:
37263+#endif
37264+ return parent;
37265+}
37266+
37267+static void au_xino_lock_dir(struct super_block *sb, struct path *xipath,
37268+ struct au_xino_lock_dir *ldir)
37269+{
37270+ aufs_bindex_t bindex;
37271+
37272+ ldir->hdir = NULL;
37273+ bindex = au_xi_root(sb, xipath->dentry);
37274+ if (bindex >= 0) {
37275+ /* rw branch root */
37276+ ldir->hdir = au_hi(d_inode(sb->s_root), bindex);
37277+ au_hn_inode_lock_nested(ldir->hdir, AuLsc_I_PARENT);
37278+ } else {
37279+ /* other */
37280+ ldir->parent = au_dget_parent_lock(xipath->dentry,
37281+ AuLsc_I_PARENT);
37282+ ldir->dir = d_inode(ldir->parent);
37283+ }
37284+}
37285+
37286+static void au_xino_unlock_dir(struct au_xino_lock_dir *ldir)
37287+{
37288+ if (ldir->hdir)
37289+ au_hn_inode_unlock(ldir->hdir);
37290+ else {
37291+ inode_unlock(ldir->dir);
37292+ dput(ldir->parent);
37293+ }
37294+}
37295+
37296+/* ---------------------------------------------------------------------- */
37297+
37298+/*
37299+ * create and set a new xino file
37300+ */
37301+struct file *au_xino_create(struct super_block *sb, char *fpath, int silent,
37302+ int wbrtop)
37303+{
37304+ struct file *file;
37305+ struct dentry *h_parent, *d;
37306+ struct inode *h_dir, *inode;
37307+ int err;
37308+ static DEFINE_MUTEX(mtx);
37309+
37310+ /*
37311+ * at mount-time, and the xino file is the default path,
37312+ * hnotify is disabled so we have no notify events to ignore.
37313+ * when a user specified the xino, we cannot get au_hdir to be ignored.
37314+ */
37315+ if (!wbrtop)
37316+ mutex_lock(&mtx);
37317+ file = vfsub_filp_open(fpath, O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
37318+ /* | __FMODE_NONOTIFY */,
37319+ 0666);
37320+ if (IS_ERR(file)) {
37321+ if (!wbrtop)
37322+ mutex_unlock(&mtx);
37323+ if (!silent)
37324+ pr_err("open %s(%ld)\n", fpath, PTR_ERR(file));
37325+ return file;
37326+ }
37327+
37328+ /* keep file count */
37329+ err = 0;
37330+ d = file->f_path.dentry;
37331+ h_parent = au_dget_parent_lock(d, AuLsc_I_PARENT);
37332+ if (!wbrtop)
37333+ mutex_unlock(&mtx);
37334+ /* mnt_want_write() is unnecessary here */
37335+ h_dir = d_inode(h_parent);
37336+ inode = file_inode(file);
37337+ /* no delegation since it is just created */
37338+ if (inode->i_nlink)
37339+ err = vfsub_unlink(h_dir, &file->f_path, /*delegated*/NULL,
37340+ /*force*/0);
37341+ inode_unlock(h_dir);
37342+ dput(h_parent);
37343+ if (unlikely(err)) {
37344+ if (!silent)
37345+ pr_err("unlink %s(%d)\n", fpath, err);
37346+ goto out;
37347+ }
37348+
37349+ err = -EINVAL;
37350+ if (unlikely(sb && sb == d->d_sb)) {
37351+ if (!silent)
37352+ pr_err("%s must be outside\n", fpath);
37353+ goto out;
37354+ }
37355+ if (unlikely(au_test_fs_bad_xino(d->d_sb))) {
37356+ if (!silent)
37357+ pr_err("xino doesn't support %s(%s)\n",
37358+ fpath, au_sbtype(d->d_sb));
37359+ goto out;
37360+ }
37361+ return file; /* success */
37362+
37363+out:
37364+ fput(file);
37365+ file = ERR_PTR(err);
37366+ return file;
37367+}
37368+
37369+/*
37370+ * create a new xinofile at the same place/path as @base.
37371+ */
37372+struct file *au_xino_create2(struct super_block *sb, struct path *base,
37373+ struct file *copy_src)
37374+{
37375+ struct file *file;
37376+ struct dentry *dentry;
37377+ struct inode *dir, *delegated;
37378+ struct qstr *name;
37379+ struct path ppath, path;
37380+ int err, do_unlock;
37381+ struct au_xino_lock_dir ldir;
37382+
37383+ do_unlock = 1;
37384+ au_xino_lock_dir(sb, base, &ldir);
37385+ dentry = base->dentry;
37386+ ppath.dentry = dentry->d_parent; /* dir inode is locked */
37387+ ppath.mnt = base->mnt;
37388+ dir = d_inode(ppath.dentry);
37389+ IMustLock(dir);
37390+
37391+ name = &dentry->d_name;
37392+ path.dentry = vfsub_lookup_one_len(name->name, &ppath, name->len);
37393+ if (IS_ERR(path.dentry)) {
37394+ file = (void *)path.dentry;
37395+ pr_err("%pd lookup err %ld\n", dentry, PTR_ERR(path.dentry));
37396+ goto out;
37397+ }
37398+
37399+ /* no need to mnt_want_write() since we call dentry_open() later */
37400+ err = vfs_create(mnt_user_ns(base->mnt), dir, path.dentry, 0666, NULL);
37401+ if (unlikely(err)) {
37402+ file = ERR_PTR(err);
37403+ pr_err("%pd create err %d\n", dentry, err);
37404+ goto out_dput;
37405+ }
37406+
37407+ path.mnt = base->mnt;
37408+ file = vfsub_dentry_open(&path,
37409+ O_RDWR | O_CREAT | O_EXCL | O_LARGEFILE
37410+ /* | __FMODE_NONOTIFY */);
37411+ if (IS_ERR(file)) {
37412+ pr_err("%pd open err %ld\n", dentry, PTR_ERR(file));
37413+ goto out_dput;
37414+ }
37415+
37416+ delegated = NULL;
37417+ err = vfsub_unlink(dir, &file->f_path, &delegated, /*force*/0);
37418+ au_xino_unlock_dir(&ldir);
37419+ do_unlock = 0;
37420+ if (unlikely(err == -EWOULDBLOCK)) {
37421+ pr_warn("cannot retry for NFSv4 delegation"
37422+ " for an internal unlink\n");
37423+ iput(delegated);
37424+ }
37425+ if (unlikely(err)) {
37426+ pr_err("%pd unlink err %d\n", dentry, err);
37427+ goto out_fput;
37428+ }
37429+
37430+ if (copy_src) {
37431+ /* no one can touch copy_src xino */
37432+ err = au_copy_file(file, copy_src, vfsub_f_size_read(copy_src));
37433+ if (unlikely(err)) {
37434+ pr_err("%pd copy err %d\n", dentry, err);
37435+ goto out_fput;
37436+ }
37437+ }
37438+ goto out_dput; /* success */
37439+
37440+out_fput:
37441+ fput(file);
37442+ file = ERR_PTR(err);
37443+out_dput:
37444+ dput(path.dentry);
37445+out:
37446+ if (do_unlock)
37447+ au_xino_unlock_dir(&ldir);
37448+ return file;
37449+}
37450+
37451+struct file *au_xino_file1(struct au_xino *xi)
37452+{
37453+ struct file *file;
37454+ unsigned int u, nfile;
37455+
37456+ file = NULL;
37457+ nfile = xi->xi_nfile;
37458+ for (u = 0; u < nfile; u++) {
37459+ file = xi->xi_file[u];
37460+ if (file)
37461+ break;
37462+ }
37463+
37464+ return file;
37465+}
37466+
37467+static int au_xino_file_set(struct au_xino *xi, int idx, struct file *file)
37468+{
37469+ int err;
37470+ struct file *f;
37471+ void *p;
37472+
37473+ if (file)
37474+ get_file(file);
37475+
37476+ err = 0;
37477+ f = NULL;
37478+ if (idx < xi->xi_nfile) {
37479+ f = xi->xi_file[idx];
37480+ if (f)
37481+ fput(f);
37482+ } else {
37483+ p = au_kzrealloc(xi->xi_file,
37484+ sizeof(*xi->xi_file) * xi->xi_nfile,
37485+ sizeof(*xi->xi_file) * (idx + 1),
37486+ GFP_NOFS, /*may_shrink*/0);
37487+ if (p) {
37488+ MtxMustLock(&xi->xi_mtx);
37489+ xi->xi_file = p;
37490+ xi->xi_nfile = idx + 1;
37491+ } else {
37492+ err = -ENOMEM;
37493+ if (file)
37494+ fput(file);
37495+ goto out;
37496+ }
37497+ }
37498+ xi->xi_file[idx] = file;
37499+
37500+out:
37501+ return err;
37502+}
37503+
37504+/*
37505+ * if @xinew->xi is not set, then create new xigen file.
37506+ */
37507+struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew)
37508+{
37509+ struct file *file;
37510+ int err;
37511+
37512+ SiMustAnyLock(sb);
37513+
37514+ file = au_xino_create2(sb, xinew->base, xinew->copy_src);
37515+ if (IS_ERR(file)) {
37516+ err = PTR_ERR(file);
37517+ pr_err("%s[%d], err %d\n",
37518+ xinew->xi ? "xino" : "xigen",
37519+ xinew->idx, err);
37520+ goto out;
37521+ }
37522+
37523+ if (xinew->xi)
37524+ err = au_xino_file_set(xinew->xi, xinew->idx, file);
37525+ else {
37526+ BUG();
37527+ /* todo: make xigen file an array */
37528+ /* err = au_xigen_file_set(sb, xinew->idx, file); */
37529+ }
37530+ fput(file);
37531+ if (unlikely(err))
37532+ file = ERR_PTR(err);
37533+
37534+out:
37535+ return file;
37536+}
37537+
37538+/* ---------------------------------------------------------------------- */
37539+
37540+/*
37541+ * truncate xino files
37542+ */
37543+static int au_xino_do_trunc(struct super_block *sb, aufs_bindex_t bindex,
37544+ int idx, struct kstatfs *st)
37545+{
37546+ int err;
37547+ blkcnt_t blocks;
37548+ struct file *file, *new_xino;
37549+ struct au_xi_new xinew = {
37550+ .idx = idx
37551+ };
37552+
37553+ err = 0;
37554+ xinew.xi = au_sbr(sb, bindex)->br_xino;
37555+ file = au_xino_file(xinew.xi, idx);
37556+ if (!file)
37557+ goto out;
37558+
37559+ xinew.base = &file->f_path;
37560+ err = vfs_statfs(xinew.base, st);
37561+ if (unlikely(err)) {
37562+ AuErr1("statfs err %d, ignored\n", err);
37563+ err = 0;
37564+ goto out;
37565+ }
37566+
37567+ blocks = file_inode(file)->i_blocks;
37568+ pr_info("begin truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
37569+ bindex, idx, (u64)blocks, st->f_bfree, st->f_blocks);
37570+
37571+ xinew.copy_src = file;
37572+ new_xino = au_xi_new(sb, &xinew);
37573+ if (IS_ERR(new_xino)) {
37574+ err = PTR_ERR(new_xino);
37575+ pr_err("xino(b%d-%d), err %d, ignored\n", bindex, idx, err);
37576+ goto out;
37577+ }
37578+
37579+ err = vfs_statfs(&new_xino->f_path, st);
37580+ if (!err)
37581+ pr_info("end truncating xino(b%d-%d), ib%llu, %llu/%llu free blks\n",
37582+ bindex, idx, (u64)file_inode(new_xino)->i_blocks,
37583+ st->f_bfree, st->f_blocks);
37584+ else {
37585+ AuErr1("statfs err %d, ignored\n", err);
37586+ err = 0;
37587+ }
37588+
37589+out:
37590+ return err;
37591+}
37592+
37593+int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin)
37594+{
37595+ int err, i;
37596+ unsigned long jiffy;
37597+ aufs_bindex_t bbot;
37598+ struct kstatfs *st;
37599+ struct au_branch *br;
37600+ struct au_xino *xi;
37601+
37602+ err = -ENOMEM;
37603+ st = kmalloc(sizeof(*st), GFP_NOFS);
37604+ if (unlikely(!st))
37605+ goto out;
37606+
37607+ err = -EINVAL;
37608+ bbot = au_sbbot(sb);
37609+ if (unlikely(bindex < 0 || bbot < bindex))
37610+ goto out_st;
37611+
37612+ err = 0;
37613+ jiffy = jiffies;
37614+ br = au_sbr(sb, bindex);
37615+ xi = br->br_xino;
37616+ for (i = idx_begin; !err && i < xi->xi_nfile; i++)
37617+ err = au_xino_do_trunc(sb, bindex, i, st);
37618+ if (!err)
37619+ au_sbi(sb)->si_xino_jiffy = jiffy;
37620+
37621+out_st:
37622+ au_kfree_rcu(st);
37623+out:
37624+ return err;
37625+}
37626+
37627+struct xino_do_trunc_args {
37628+ struct super_block *sb;
37629+ struct au_branch *br;
37630+ int idx;
37631+};
37632+
37633+static void xino_do_trunc(void *_args)
37634+{
37635+ struct xino_do_trunc_args *args = _args;
37636+ struct super_block *sb;
37637+ struct au_branch *br;
37638+ struct inode *dir;
37639+ int err, idx;
37640+ aufs_bindex_t bindex;
37641+
37642+ err = 0;
37643+ sb = args->sb;
37644+ dir = d_inode(sb->s_root);
37645+ br = args->br;
37646+ idx = args->idx;
37647+
37648+ si_noflush_write_lock(sb);
37649+ ii_read_lock_parent(dir);
37650+ bindex = au_br_index(sb, br->br_id);
37651+ err = au_xino_trunc(sb, bindex, idx);
37652+ ii_read_unlock(dir);
37653+ if (unlikely(err))
37654+ pr_warn("err b%d, (%d)\n", bindex, err);
37655+ atomic_dec(&br->br_xino->xi_truncating);
37656+ au_lcnt_dec(&br->br_count);
37657+ si_write_unlock(sb);
37658+ au_nwt_done(&au_sbi(sb)->si_nowait);
37659+ au_kfree_rcu(args);
37660+}
37661+
37662+/*
37663+ * returns the index in the xi_file array whose corresponding file is necessary
37664+ * to truncate, or -1 which means no need to truncate.
37665+ */
37666+static int xino_trunc_test(struct super_block *sb, struct au_branch *br)
37667+{
37668+ int err;
37669+ unsigned int u;
37670+ struct kstatfs st;
37671+ struct au_sbinfo *sbinfo;
37672+ struct au_xino *xi;
37673+ struct file *file;
37674+
37675+ /* todo: si_xino_expire and the ratio should be customizable */
37676+ sbinfo = au_sbi(sb);
37677+ if (time_before(jiffies,
37678+ sbinfo->si_xino_jiffy + sbinfo->si_xino_expire))
37679+ return -1;
37680+
37681+ /* truncation border */
37682+ xi = br->br_xino;
37683+ for (u = 0; u < xi->xi_nfile; u++) {
37684+ file = au_xino_file(xi, u);
37685+ if (!file)
37686+ continue;
37687+
37688+ err = vfs_statfs(&file->f_path, &st);
37689+ if (unlikely(err)) {
37690+ AuErr1("statfs err %d, ignored\n", err);
37691+ return -1;
37692+ }
37693+ if (div64_u64(st.f_bfree * 100, st.f_blocks)
37694+ >= AUFS_XINO_DEF_TRUNC)
37695+ return u;
37696+ }
37697+
37698+ return -1;
37699+}
37700+
37701+static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
37702+{
37703+ int idx;
37704+ struct xino_do_trunc_args *args;
37705+ int wkq_err;
37706+
37707+ idx = xino_trunc_test(sb, br);
37708+ if (idx < 0)
37709+ return;
37710+
37711+ if (atomic_inc_return(&br->br_xino->xi_truncating) > 1)
37712+ goto out;
37713+
37714+ /* lock and kfree() will be called in trunc_xino() */
37715+ args = kmalloc(sizeof(*args), GFP_NOFS);
37716+ if (unlikely(!args)) {
37717+ AuErr1("no memory\n");
37718+ goto out;
37719+ }
37720+
37721+ au_lcnt_inc(&br->br_count);
37722+ args->sb = sb;
37723+ args->br = br;
37724+ args->idx = idx;
37725+ wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
37726+ if (!wkq_err)
37727+ return; /* success */
37728+
37729+ pr_err("wkq %d\n", wkq_err);
37730+ au_lcnt_dec(&br->br_count);
37731+ au_kfree_rcu(args);
37732+
37733+out:
37734+ atomic_dec(&br->br_xino->xi_truncating);
37735+}
37736+
37737+/* ---------------------------------------------------------------------- */
37738+
37739+struct au_xi_calc {
37740+ int idx;
37741+ loff_t pos;
37742+};
37743+
37744+static void au_xi_calc(struct super_block *sb, ino_t h_ino,
37745+ struct au_xi_calc *calc)
37746+{
37747+ loff_t maxent;
37748+
37749+ maxent = au_xi_maxent(sb);
37750+ calc->idx = div64_u64_rem(h_ino, maxent, &calc->pos);
37751+ calc->pos *= sizeof(ino_t);
37752+}
37753+
37754+static int au_xino_do_new_async(struct super_block *sb, struct au_branch *br,
37755+ struct au_xi_calc *calc)
37756+{
37757+ int err;
37758+ struct file *file;
37759+ struct au_xino *xi = br->br_xino;
37760+ struct au_xi_new xinew = {
37761+ .xi = xi
37762+ };
37763+
37764+ SiMustAnyLock(sb);
37765+
37766+ err = 0;
37767+ if (!xi)
37768+ goto out;
37769+
37770+ mutex_lock(&xi->xi_mtx);
37771+ file = au_xino_file(xi, calc->idx);
37772+ if (file)
37773+ goto out_mtx;
37774+
37775+ file = au_xino_file(xi, /*idx*/-1);
37776+ AuDebugOn(!file);
37777+ xinew.idx = calc->idx;
37778+ xinew.base = &file->f_path;
37779+ /* xinew.copy_src = NULL; */
37780+ file = au_xi_new(sb, &xinew);
37781+ if (IS_ERR(file))
37782+ err = PTR_ERR(file);
37783+
37784+out_mtx:
37785+ mutex_unlock(&xi->xi_mtx);
37786+out:
37787+ return err;
37788+}
37789+
37790+struct au_xino_do_new_async_args {
37791+ struct super_block *sb;
37792+ struct au_branch *br;
37793+ struct au_xi_calc calc;
37794+ ino_t ino;
37795+};
37796+
37797+struct au_xi_writing {
37798+ struct hlist_bl_node node;
37799+ ino_t h_ino, ino;
37800+};
37801+
37802+static int au_xino_do_write(struct file *file, struct au_xi_calc *calc,
37803+ ino_t ino);
37804+
37805+static void au_xino_call_do_new_async(void *args)
37806+{
37807+ struct au_xino_do_new_async_args *a = args;
37808+ struct au_branch *br;
37809+ struct super_block *sb;
37810+ struct au_sbinfo *sbi;
37811+ struct inode *root;
37812+ struct file *file;
37813+ struct au_xi_writing *del, *p;
37814+ struct hlist_bl_head *hbl;
37815+ struct hlist_bl_node *pos;
37816+ int err;
37817+
37818+ br = a->br;
37819+ sb = a->sb;
37820+ sbi = au_sbi(sb);
37821+ si_noflush_read_lock(sb);
37822+ root = d_inode(sb->s_root);
37823+ ii_read_lock_child(root);
37824+ err = au_xino_do_new_async(sb, br, &a->calc);
37825+ if (unlikely(err)) {
37826+ AuIOErr("err %d\n", err);
37827+ goto out;
37828+ }
37829+
37830+ file = au_xino_file(br->br_xino, a->calc.idx);
37831+ AuDebugOn(!file);
37832+ err = au_xino_do_write(file, &a->calc, a->ino);
37833+ if (unlikely(err)) {
37834+ AuIOErr("err %d\n", err);
37835+ goto out;
37836+ }
37837+
37838+ del = NULL;
37839+ hbl = &br->br_xino->xi_writing;
37840+ hlist_bl_lock(hbl);
37841+ au_hbl_for_each(pos, hbl) {
37842+ p = container_of(pos, struct au_xi_writing, node);
37843+ if (p->ino == a->ino) {
37844+ del = p;
37845+ hlist_bl_del(&p->node);
37846+ break;
37847+ }
37848+ }
37849+ hlist_bl_unlock(hbl);
37850+ au_kfree_rcu(del);
37851+
37852+out:
37853+ au_lcnt_dec(&br->br_count);
37854+ ii_read_unlock(root);
37855+ si_read_unlock(sb);
37856+ au_nwt_done(&sbi->si_nowait);
37857+ au_kfree_rcu(a);
37858+}
37859+
37860+/*
37861+ * create a new xino file asynchronously
37862+ */
37863+static int au_xino_new_async(struct super_block *sb, struct au_branch *br,
37864+ struct au_xi_calc *calc, ino_t ino)
37865+{
37866+ int err;
37867+ struct au_xino_do_new_async_args *arg;
37868+
37869+ err = -ENOMEM;
37870+ arg = kmalloc(sizeof(*arg), GFP_NOFS);
37871+ if (unlikely(!arg))
37872+ goto out;
37873+
37874+ arg->sb = sb;
37875+ arg->br = br;
37876+ arg->calc = *calc;
37877+ arg->ino = ino;
37878+ au_lcnt_inc(&br->br_count);
37879+ err = au_wkq_nowait(au_xino_call_do_new_async, arg, sb, AuWkq_NEST);
37880+ if (unlikely(err)) {
37881+ pr_err("wkq %d\n", err);
37882+ au_lcnt_dec(&br->br_count);
37883+ au_kfree_rcu(arg);
37884+ }
37885+
37886+out:
37887+ return err;
37888+}
37889+
37890+/*
37891+ * read @ino from xinofile for the specified branch{@sb, @bindex}
37892+ * at the position of @h_ino.
37893+ */
37894+int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
37895+ ino_t *ino)
37896+{
37897+ int err;
37898+ ssize_t sz;
37899+ struct au_xi_calc calc;
37900+ struct au_sbinfo *sbinfo;
37901+ struct file *file;
37902+ struct au_xino *xi;
37903+ struct hlist_bl_head *hbl;
37904+ struct hlist_bl_node *pos;
37905+ struct au_xi_writing *p;
37906+
37907+ *ino = 0;
37908+ if (!au_opt_test(au_mntflags(sb), XINO))
37909+ return 0; /* no xino */
37910+
37911+ err = 0;
37912+ au_xi_calc(sb, h_ino, &calc);
37913+ xi = au_sbr(sb, bindex)->br_xino;
37914+ file = au_xino_file(xi, calc.idx);
37915+ if (!file) {
37916+ hbl = &xi->xi_writing;
37917+ hlist_bl_lock(hbl);
37918+ au_hbl_for_each(pos, hbl) {
37919+ p = container_of(pos, struct au_xi_writing, node);
37920+ if (p->h_ino == h_ino) {
37921+ AuDbg("hi%llu, i%llu, found\n",
37922+ (u64)p->h_ino, (u64)p->ino);
37923+ *ino = p->ino;
37924+ break;
37925+ }
37926+ }
37927+ hlist_bl_unlock(hbl);
37928+ return 0;
37929+ } else if (vfsub_f_size_read(file) < calc.pos + sizeof(*ino))
37930+ return 0; /* no xino */
37931+
37932+ sbinfo = au_sbi(sb);
37933+ sz = xino_fread(file, ino, sizeof(*ino), &calc.pos);
37934+ if (sz == sizeof(*ino))
37935+ return 0; /* success */
37936+
37937+ err = sz;
37938+ if (unlikely(sz >= 0)) {
37939+ err = -EIO;
37940+ AuIOErr("xino read error (%zd)\n", sz);
37941+ }
37942+ return err;
37943+}
37944+
37945+static int au_xino_do_write(struct file *file, struct au_xi_calc *calc,
37946+ ino_t ino)
37947+{
37948+ ssize_t sz;
37949+
37950+ sz = xino_fwrite(file, &ino, sizeof(ino), &calc->pos);
37951+ if (sz == sizeof(ino))
37952+ return 0; /* success */
37953+
37954+ AuIOErr("write failed (%zd)\n", sz);
37955+ return -EIO;
37956+}
37957+
37958+/*
37959+ * write @ino to the xinofile for the specified branch{@sb, @bindex}
37960+ * at the position of @h_ino.
37961+ * even if @ino is zero, it is written to the xinofile and means no entry.
37962+ * if the size of the xino file on a specific filesystem exceeds the watermark,
37963+ * try truncating it.
37964+ */
37965+int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
37966+ ino_t ino)
37967+{
37968+ int err;
37969+ unsigned int mnt_flags;
37970+ struct au_xi_calc calc;
37971+ struct file *file;
37972+ struct au_branch *br;
37973+ struct au_xino *xi;
37974+ struct au_xi_writing *p;
37975+
37976+ SiMustAnyLock(sb);
37977+
37978+ mnt_flags = au_mntflags(sb);
37979+ if (!au_opt_test(mnt_flags, XINO))
37980+ return 0;
37981+
37982+ au_xi_calc(sb, h_ino, &calc);
37983+ br = au_sbr(sb, bindex);
37984+ xi = br->br_xino;
37985+ file = au_xino_file(xi, calc.idx);
37986+ if (!file) {
37987+ /* store the inum pair into the list */
37988+ p = kmalloc(sizeof(*p), GFP_NOFS | __GFP_NOFAIL);
37989+ p->h_ino = h_ino;
37990+ p->ino = ino;
37991+ au_hbl_add(&p->node, &xi->xi_writing);
37992+
37993+ /* create and write a new xino file asynchronously */
37994+ err = au_xino_new_async(sb, br, &calc, ino);
37995+ if (!err)
37996+ return 0; /* success */
37997+ goto out;
37998+ }
37999+
38000+ err = au_xino_do_write(file, &calc, ino);
38001+ if (!err) {
38002+ br = au_sbr(sb, bindex);
38003+ if (au_opt_test(mnt_flags, TRUNC_XINO)
38004+ && au_test_fs_trunc_xino(au_br_sb(br)))
38005+ xino_try_trunc(sb, br);
38006+ return 0; /* success */
38007+ }
38008+
38009+out:
38010+ AuIOErr("write failed (%d)\n", err);
38011+ return -EIO;
38012+}
38013+
38014+static ssize_t xino_fread_wkq(struct file *file, void *buf, size_t size,
38015+ loff_t *pos);
38016+
38017+/* todo: unnecessary to support mmap_sem since kernel-space? */
38018+ssize_t xino_fread(struct file *file, void *kbuf, size_t size, loff_t *pos)
38019+{
38020+ ssize_t err;
38021+ int i;
38022+ const int prevent_endless = 10;
38023+
38024+ i = 0;
38025+ do {
38026+ err = vfsub_read_k(file, kbuf, size, pos);
38027+ if (err == -EINTR
38028+ && !au_wkq_test()
38029+ && fatal_signal_pending(current)) {
38030+ err = xino_fread_wkq(file, kbuf, size, pos);
38031+ BUG_ON(err == -EINTR);
38032+ }
38033+ } while (i++ < prevent_endless
38034+ && (err == -EAGAIN || err == -EINTR));
38035+
38036+#if 0 /* reserved for future use */
38037+ if (err > 0)
38038+ fsnotify_access(file->f_path.dentry);
38039+#endif
38040+
38041+ return err;
38042+}
38043+
38044+struct xino_fread_args {
38045+ ssize_t *errp;
38046+ struct file *file;
38047+ void *buf;
38048+ size_t size;
38049+ loff_t *pos;
38050+};
38051+
38052+static void call_xino_fread(void *args)
38053+{
38054+ struct xino_fread_args *a = args;
38055+ *a->errp = xino_fread(a->file, a->buf, a->size, a->pos);
38056+}
38057+
38058+static ssize_t xino_fread_wkq(struct file *file, void *buf, size_t size,
38059+ loff_t *pos)
38060+{
38061+ ssize_t err;
38062+ int wkq_err;
38063+ struct xino_fread_args args = {
38064+ .errp = &err,
38065+ .file = file,
38066+ .buf = buf,
38067+ .size = size,
38068+ .pos = pos
38069+ };
38070+
38071+ wkq_err = au_wkq_wait(call_xino_fread, &args);
38072+ if (unlikely(wkq_err))
38073+ err = wkq_err;
38074+
38075+ return err;
38076+}
38077+
38078+static ssize_t xino_fwrite_wkq(struct file *file, void *buf, size_t size,
38079+ loff_t *pos);
38080+
38081+static ssize_t do_xino_fwrite(struct file *file, void *kbuf, size_t size,
38082+ loff_t *pos)
38083+{
38084+ ssize_t err;
38085+ int i;
38086+ const int prevent_endless = 10;
38087+
38088+ i = 0;
38089+ do {
38090+ err = vfsub_write_k(file, kbuf, size, pos);
38091+ if (err == -EINTR
38092+ && !au_wkq_test()
38093+ && fatal_signal_pending(current)) {
38094+ err = xino_fwrite_wkq(file, kbuf, size, pos);
38095+ BUG_ON(err == -EINTR);
38096+ }
38097+ } while (i++ < prevent_endless
38098+ && (err == -EAGAIN || err == -EINTR));
38099+
38100+#if 0 /* reserved for future use */
38101+ if (err > 0)
38102+ fsnotify_modify(file->f_path.dentry);
38103+#endif
38104+
38105+ return err;
38106+}
38107+
38108+struct do_xino_fwrite_args {
38109+ ssize_t *errp;
38110+ struct file *file;
38111+ void *buf;
38112+ size_t size;
38113+ loff_t *pos;
38114+};
38115+
38116+static void call_do_xino_fwrite(void *args)
38117+{
38118+ struct do_xino_fwrite_args *a = args;
38119+ *a->errp = do_xino_fwrite(a->file, a->buf, a->size, a->pos);
38120+}
38121+
38122+static ssize_t xino_fwrite_wkq(struct file *file, void *buf, size_t size,
38123+ loff_t *pos)
38124+{
38125+ ssize_t err;
38126+ int wkq_err;
38127+ struct do_xino_fwrite_args args = {
38128+ .errp = &err,
38129+ .file = file,
38130+ .buf = buf,
38131+ .size = size,
38132+ .pos = pos
38133+ };
38134+
38135+ /*
38136+ * it breaks RLIMIT_FSIZE and normal user's limit,
38137+ * users should care about quota and real 'filesystem full.'
38138+ */
38139+ wkq_err = au_wkq_wait(call_do_xino_fwrite, &args);
38140+ if (unlikely(wkq_err))
38141+ err = wkq_err;
38142+
38143+ return err;
38144+}
38145+
38146+ssize_t xino_fwrite(struct file *file, void *buf, size_t size, loff_t *pos)
38147+{
38148+ ssize_t err;
38149+
38150+ if (rlimit(RLIMIT_FSIZE) == RLIM_INFINITY) {
38151+ lockdep_off();
38152+ err = do_xino_fwrite(file, buf, size, pos);
38153+ lockdep_on();
38154+ } else {
38155+ lockdep_off();
38156+ err = xino_fwrite_wkq(file, buf, size, pos);
38157+ lockdep_on();
38158+ }
38159+
38160+ return err;
38161+}
38162+
38163+/* ---------------------------------------------------------------------- */
38164+
38165+/*
38166+ * inode number bitmap
38167+ */
38168+static const int page_bits = (int)PAGE_SIZE * BITS_PER_BYTE;
38169+static ino_t xib_calc_ino(unsigned long pindex, int bit)
38170+{
38171+ ino_t ino;
38172+
38173+ AuDebugOn(bit < 0 || page_bits <= bit);
38174+ ino = AUFS_FIRST_INO + pindex * page_bits + bit;
38175+ return ino;
38176+}
38177+
38178+static void xib_calc_bit(ino_t ino, unsigned long *pindex, int *bit)
38179+{
38180+ AuDebugOn(ino < AUFS_FIRST_INO);
38181+ ino -= AUFS_FIRST_INO;
38182+ *pindex = ino / page_bits;
38183+ *bit = ino % page_bits;
38184+}
38185+
38186+static int xib_pindex(struct super_block *sb, unsigned long pindex)
38187+{
38188+ int err;
38189+ loff_t pos;
38190+ ssize_t sz;
38191+ struct au_sbinfo *sbinfo;
38192+ struct file *xib;
38193+ unsigned long *p;
38194+
38195+ sbinfo = au_sbi(sb);
38196+ MtxMustLock(&sbinfo->si_xib_mtx);
38197+ AuDebugOn(pindex > ULONG_MAX / PAGE_SIZE
38198+ || !au_opt_test(sbinfo->si_mntflags, XINO));
38199+
38200+ if (pindex == sbinfo->si_xib_last_pindex)
38201+ return 0;
38202+
38203+ xib = sbinfo->si_xib;
38204+ p = sbinfo->si_xib_buf;
38205+ pos = sbinfo->si_xib_last_pindex;
38206+ pos *= PAGE_SIZE;
38207+ sz = xino_fwrite(xib, p, PAGE_SIZE, &pos);
38208+ if (unlikely(sz != PAGE_SIZE))
38209+ goto out;
38210+
38211+ pos = pindex;
38212+ pos *= PAGE_SIZE;
38213+ if (vfsub_f_size_read(xib) >= pos + PAGE_SIZE)
38214+ sz = xino_fread(xib, p, PAGE_SIZE, &pos);
38215+ else {
38216+ memset(p, 0, PAGE_SIZE);
38217+ sz = xino_fwrite(xib, p, PAGE_SIZE, &pos);
38218+ }
38219+ if (sz == PAGE_SIZE) {
38220+ sbinfo->si_xib_last_pindex = pindex;
38221+ return 0; /* success */
38222+ }
38223+
38224+out:
38225+ AuIOErr1("write failed (%zd)\n", sz);
38226+ err = sz;
38227+ if (sz >= 0)
38228+ err = -EIO;
38229+ return err;
38230+}
38231+
38232+static void au_xib_clear_bit(struct inode *inode)
38233+{
38234+ int err, bit;
38235+ unsigned long pindex;
38236+ struct super_block *sb;
38237+ struct au_sbinfo *sbinfo;
38238+
38239+ AuDebugOn(inode->i_nlink);
38240+
38241+ sb = inode->i_sb;
38242+ xib_calc_bit(inode->i_ino, &pindex, &bit);
38243+ AuDebugOn(page_bits <= bit);
38244+ sbinfo = au_sbi(sb);
38245+ mutex_lock(&sbinfo->si_xib_mtx);
38246+ err = xib_pindex(sb, pindex);
38247+ if (!err) {
38248+ clear_bit(bit, sbinfo->si_xib_buf);
38249+ sbinfo->si_xib_next_bit = bit;
38250+ }
38251+ mutex_unlock(&sbinfo->si_xib_mtx);
38252+}
38253+
38254+/* ---------------------------------------------------------------------- */
38255+
38256+/*
38257+ * truncate a xino bitmap file
38258+ */
38259+
38260+/* todo: slow */
38261+static int do_xib_restore(struct super_block *sb, struct file *file, void *page)
38262+{
38263+ int err, bit;
38264+ ssize_t sz;
38265+ unsigned long pindex;
38266+ loff_t pos, pend;
38267+ struct au_sbinfo *sbinfo;
38268+ ino_t *ino;
38269+ unsigned long *p;
38270+
38271+ err = 0;
38272+ sbinfo = au_sbi(sb);
38273+ MtxMustLock(&sbinfo->si_xib_mtx);
38274+ p = sbinfo->si_xib_buf;
38275+ pend = vfsub_f_size_read(file);
38276+ pos = 0;
38277+ while (pos < pend) {
38278+ sz = xino_fread(file, page, PAGE_SIZE, &pos);
38279+ err = sz;
38280+ if (unlikely(sz <= 0))
38281+ goto out;
38282+
38283+ err = 0;
38284+ for (ino = page; sz > 0; ino++, sz -= sizeof(ino)) {
38285+ if (unlikely(*ino < AUFS_FIRST_INO))
38286+ continue;
38287+
38288+ xib_calc_bit(*ino, &pindex, &bit);
38289+ AuDebugOn(page_bits <= bit);
38290+ err = xib_pindex(sb, pindex);
38291+ if (!err)
38292+ set_bit(bit, p);
38293+ else
38294+ goto out;
38295+ }
38296+ }
38297+
38298+out:
38299+ return err;
38300+}
38301+
38302+static int xib_restore(struct super_block *sb)
38303+{
38304+ int err, i;
38305+ unsigned int nfile;
38306+ aufs_bindex_t bindex, bbot;
38307+ void *page;
38308+ struct au_branch *br;
38309+ struct au_xino *xi;
38310+ struct file *file;
38311+
38312+ err = -ENOMEM;
38313+ page = (void *)__get_free_page(GFP_NOFS);
38314+ if (unlikely(!page))
38315+ goto out;
38316+
38317+ err = 0;
38318+ bbot = au_sbbot(sb);
38319+ for (bindex = 0; !err && bindex <= bbot; bindex++)
38320+ if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0) {
38321+ br = au_sbr(sb, bindex);
38322+ xi = br->br_xino;
38323+ nfile = xi->xi_nfile;
38324+ for (i = 0; i < nfile; i++) {
38325+ file = au_xino_file(xi, i);
38326+ if (file)
38327+ err = do_xib_restore(sb, file, page);
38328+ }
38329+ } else
38330+ AuDbg("skip shared b%d\n", bindex);
38331+ free_page((unsigned long)page);
38332+
38333+out:
38334+ return err;
38335+}
38336+
38337+int au_xib_trunc(struct super_block *sb)
38338+{
38339+ int err;
38340+ ssize_t sz;
38341+ loff_t pos;
38342+ struct au_sbinfo *sbinfo;
38343+ unsigned long *p;
38344+ struct file *file;
38345+
38346+ SiMustWriteLock(sb);
38347+
38348+ err = 0;
38349+ sbinfo = au_sbi(sb);
38350+ if (!au_opt_test(sbinfo->si_mntflags, XINO))
38351+ goto out;
38352+
38353+ file = sbinfo->si_xib;
38354+ if (vfsub_f_size_read(file) <= PAGE_SIZE)
38355+ goto out;
38356+
38357+ file = au_xino_create2(sb, &sbinfo->si_xib->f_path, NULL);
38358+ err = PTR_ERR(file);
38359+ if (IS_ERR(file))
38360+ goto out;
38361+ fput(sbinfo->si_xib);
38362+ sbinfo->si_xib = file;
38363+
38364+ p = sbinfo->si_xib_buf;
38365+ memset(p, 0, PAGE_SIZE);
38366+ pos = 0;
38367+ sz = xino_fwrite(sbinfo->si_xib, p, PAGE_SIZE, &pos);
38368+ if (unlikely(sz != PAGE_SIZE)) {
38369+ err = sz;
38370+ AuIOErr("err %d\n", err);
38371+ if (sz >= 0)
38372+ err = -EIO;
38373+ goto out;
38374+ }
38375+
38376+ mutex_lock(&sbinfo->si_xib_mtx);
38377+ /* mnt_want_write() is unnecessary here */
38378+ err = xib_restore(sb);
38379+ mutex_unlock(&sbinfo->si_xib_mtx);
38380+
38381+out:
38382+ return err;
38383+}
38384+
38385+/* ---------------------------------------------------------------------- */
38386+
38387+struct au_xino *au_xino_alloc(unsigned int nfile)
38388+{
38389+ struct au_xino *xi;
38390+
38391+ xi = kzalloc(sizeof(*xi), GFP_NOFS);
38392+ if (unlikely(!xi))
38393+ goto out;
38394+ xi->xi_nfile = nfile;
38395+ xi->xi_file = kcalloc(nfile, sizeof(*xi->xi_file), GFP_NOFS);
38396+ if (unlikely(!xi->xi_file))
38397+ goto out_free;
38398+
38399+ xi->xi_nondir.total = 8; /* initial size */
38400+ xi->xi_nondir.array = kcalloc(xi->xi_nondir.total, sizeof(ino_t),
38401+ GFP_NOFS);
38402+ if (unlikely(!xi->xi_nondir.array))
38403+ goto out_file;
38404+
38405+ spin_lock_init(&xi->xi_nondir.spin);
38406+ init_waitqueue_head(&xi->xi_nondir.wqh);
38407+ mutex_init(&xi->xi_mtx);
38408+ INIT_HLIST_BL_HEAD(&xi->xi_writing);
38409+ atomic_set(&xi->xi_truncating, 0);
38410+ kref_init(&xi->xi_kref);
38411+ goto out; /* success */
38412+
38413+out_file:
38414+ au_kfree_try_rcu(xi->xi_file);
38415+out_free:
38416+ au_kfree_rcu(xi);
38417+ xi = NULL;
38418+out:
38419+ return xi;
38420+}
38421+
38422+static int au_xino_init(struct au_branch *br, int idx, struct file *file)
38423+{
38424+ int err;
38425+ struct au_xino *xi;
38426+
38427+ err = 0;
38428+ xi = au_xino_alloc(idx + 1);
38429+ if (unlikely(!xi)) {
38430+ err = -ENOMEM;
38431+ goto out;
38432+ }
38433+
38434+ if (file)
38435+ get_file(file);
38436+ xi->xi_file[idx] = file;
38437+ AuDebugOn(br->br_xino);
38438+ br->br_xino = xi;
38439+
38440+out:
38441+ return err;
38442+}
38443+
38444+static void au_xino_release(struct kref *kref)
38445+{
38446+ struct au_xino *xi;
38447+ int i;
38448+ unsigned long ul;
38449+ struct hlist_bl_head *hbl;
38450+ struct hlist_bl_node *pos, *n;
38451+ struct au_xi_writing *p;
38452+
38453+ xi = container_of(kref, struct au_xino, xi_kref);
38454+ for (i = 0; i < xi->xi_nfile; i++)
38455+ if (xi->xi_file[i])
38456+ fput(xi->xi_file[i]);
38457+ for (i = xi->xi_nondir.total - 1; i >= 0; i--)
38458+ AuDebugOn(xi->xi_nondir.array[i]);
38459+ mutex_destroy(&xi->xi_mtx);
38460+ hbl = &xi->xi_writing;
38461+ ul = au_hbl_count(hbl);
38462+ if (unlikely(ul)) {
38463+ pr_warn("xi_writing %lu\n", ul);
38464+ hlist_bl_lock(hbl);
38465+ hlist_bl_for_each_entry_safe(p, pos, n, hbl, node) {
38466+ hlist_bl_del(&p->node);
38467+ /* kmemleak reported au_kfree_rcu() doesn't free it */
38468+ kfree(p);
38469+ }
38470+ hlist_bl_unlock(hbl);
38471+ }
38472+ au_kfree_try_rcu(xi->xi_file);
38473+ au_kfree_try_rcu(xi->xi_nondir.array);
38474+ au_kfree_rcu(xi);
38475+}
38476+
38477+int au_xino_put(struct au_branch *br)
38478+{
38479+ int ret;
38480+ struct au_xino *xi;
38481+
38482+ ret = 0;
38483+ xi = br->br_xino;
38484+ if (xi) {
38485+ br->br_xino = NULL;
38486+ ret = kref_put(&xi->xi_kref, au_xino_release);
38487+ }
38488+
38489+ return ret;
38490+}
38491+
38492+/* ---------------------------------------------------------------------- */
38493+
38494+/*
38495+ * xino mount option handlers
38496+ */
38497+
38498+/* xino bitmap */
38499+static void xino_clear_xib(struct super_block *sb)
38500+{
38501+ struct au_sbinfo *sbinfo;
38502+
38503+ SiMustWriteLock(sb);
38504+
38505+ sbinfo = au_sbi(sb);
38506+ if (sbinfo->si_xib)
38507+ fput(sbinfo->si_xib);
38508+ sbinfo->si_xib = NULL;
38509+ if (sbinfo->si_xib_buf)
38510+ free_page((unsigned long)sbinfo->si_xib_buf);
38511+ sbinfo->si_xib_buf = NULL;
38512+}
38513+
38514+static int au_xino_set_xib(struct super_block *sb, struct path *path)
38515+{
38516+ int err;
38517+ loff_t pos;
38518+ struct au_sbinfo *sbinfo;
38519+ struct file *file;
38520+ struct super_block *xi_sb;
38521+
38522+ SiMustWriteLock(sb);
38523+
38524+ sbinfo = au_sbi(sb);
38525+ file = au_xino_create2(sb, path, sbinfo->si_xib);
38526+ err = PTR_ERR(file);
38527+ if (IS_ERR(file))
38528+ goto out;
38529+ if (sbinfo->si_xib)
38530+ fput(sbinfo->si_xib);
38531+ sbinfo->si_xib = file;
38532+ xi_sb = file_inode(file)->i_sb;
38533+ sbinfo->si_ximaxent = xi_sb->s_maxbytes;
38534+ if (unlikely(sbinfo->si_ximaxent < PAGE_SIZE)) {
38535+ err = -EIO;
38536+ pr_err("s_maxbytes(%llu) on %s is too small\n",
38537+ (u64)sbinfo->si_ximaxent, au_sbtype(xi_sb));
38538+ goto out_unset;
38539+ }
38540+ sbinfo->si_ximaxent /= sizeof(ino_t);
38541+
38542+ err = -ENOMEM;
38543+ if (!sbinfo->si_xib_buf)
38544+ sbinfo->si_xib_buf = (void *)get_zeroed_page(GFP_NOFS);
38545+ if (unlikely(!sbinfo->si_xib_buf))
38546+ goto out_unset;
38547+
38548+ sbinfo->si_xib_last_pindex = 0;
38549+ sbinfo->si_xib_next_bit = 0;
38550+ if (vfsub_f_size_read(file) < PAGE_SIZE) {
38551+ pos = 0;
38552+ err = xino_fwrite(file, sbinfo->si_xib_buf, PAGE_SIZE, &pos);
38553+ if (unlikely(err != PAGE_SIZE))
38554+ goto out_free;
38555+ }
38556+ err = 0;
38557+ goto out; /* success */
38558+
38559+out_free:
38560+ if (sbinfo->si_xib_buf)
38561+ free_page((unsigned long)sbinfo->si_xib_buf);
38562+ sbinfo->si_xib_buf = NULL;
38563+ if (err >= 0)
38564+ err = -EIO;
38565+out_unset:
38566+ fput(sbinfo->si_xib);
38567+ sbinfo->si_xib = NULL;
38568+out:
38569+ AuTraceErr(err);
38570+ return err;
38571+}
38572+
38573+/* xino for each branch */
38574+static void xino_clear_br(struct super_block *sb)
38575+{
38576+ aufs_bindex_t bindex, bbot;
38577+ struct au_branch *br;
38578+
38579+ bbot = au_sbbot(sb);
38580+ for (bindex = 0; bindex <= bbot; bindex++) {
38581+ br = au_sbr(sb, bindex);
38582+ AuDebugOn(!br);
38583+ au_xino_put(br);
38584+ }
38585+}
38586+
38587+static void au_xino_set_br_shared(struct super_block *sb, struct au_branch *br,
38588+ aufs_bindex_t bshared)
38589+{
38590+ struct au_branch *brshared;
38591+
38592+ brshared = au_sbr(sb, bshared);
38593+ AuDebugOn(!brshared->br_xino);
38594+ AuDebugOn(!brshared->br_xino->xi_file);
38595+ if (br->br_xino != brshared->br_xino) {
38596+ au_xino_get(brshared);
38597+ au_xino_put(br);
38598+ br->br_xino = brshared->br_xino;
38599+ }
38600+}
38601+
38602+struct au_xino_do_set_br {
38603+ struct au_branch *br;
38604+ ino_t h_ino;
38605+ aufs_bindex_t bshared;
38606+};
38607+
38608+static int au_xino_do_set_br(struct super_block *sb, struct path *path,
38609+ struct au_xino_do_set_br *args)
38610+{
38611+ int err;
38612+ struct au_xi_calc calc;
38613+ struct file *file;
38614+ struct au_branch *br;
38615+ struct au_xi_new xinew = {
38616+ .base = path
38617+ };
38618+
38619+ br = args->br;
38620+ xinew.xi = br->br_xino;
38621+ au_xi_calc(sb, args->h_ino, &calc);
38622+ xinew.copy_src = au_xino_file(xinew.xi, calc.idx);
38623+ if (args->bshared >= 0)
38624+ /* shared xino */
38625+ au_xino_set_br_shared(sb, br, args->bshared);
38626+ else if (!xinew.xi) {
38627+ /* new xino */
38628+ err = au_xino_init(br, calc.idx, xinew.copy_src);
38629+ if (unlikely(err))
38630+ goto out;
38631+ }
38632+
38633+ /* force re-creating */
38634+ xinew.xi = br->br_xino;
38635+ xinew.idx = calc.idx;
38636+ mutex_lock(&xinew.xi->xi_mtx);
38637+ file = au_xi_new(sb, &xinew);
38638+ mutex_unlock(&xinew.xi->xi_mtx);
38639+ err = PTR_ERR(file);
38640+ if (IS_ERR(file))
38641+ goto out;
38642+ AuDebugOn(!file);
38643+
38644+ err = au_xino_do_write(file, &calc, AUFS_ROOT_INO);
38645+ if (unlikely(err))
38646+ au_xino_put(br);
38647+
38648+out:
38649+ AuTraceErr(err);
38650+ return err;
38651+}
38652+
38653+static int au_xino_set_br(struct super_block *sb, struct path *path)
38654+{
38655+ int err;
38656+ aufs_bindex_t bindex, bbot;
38657+ struct au_xino_do_set_br args;
38658+ struct inode *inode;
38659+
38660+ SiMustWriteLock(sb);
38661+
38662+ bbot = au_sbbot(sb);
38663+ inode = d_inode(sb->s_root);
38664+ for (bindex = 0; bindex <= bbot; bindex++) {
38665+ args.h_ino = au_h_iptr(inode, bindex)->i_ino;
38666+ args.br = au_sbr(sb, bindex);
38667+ args.bshared = is_sb_shared(sb, bindex, bindex - 1);
38668+ err = au_xino_do_set_br(sb, path, &args);
38669+ if (unlikely(err))
38670+ break;
38671+ }
38672+
38673+ AuTraceErr(err);
38674+ return err;
38675+}
38676+
38677+void au_xino_clr(struct super_block *sb)
38678+{
38679+ struct au_sbinfo *sbinfo;
38680+
38681+ au_xigen_clr(sb);
38682+ xino_clear_xib(sb);
38683+ xino_clear_br(sb);
38684+ dbgaufs_brs_del(sb, 0);
38685+ sbinfo = au_sbi(sb);
38686+ /* lvalue, do not call au_mntflags() */
38687+ au_opt_clr(sbinfo->si_mntflags, XINO);
38688+}
38689+
38690+int au_xino_set(struct super_block *sb, struct au_opt_xino *xiopt, int remount)
38691+{
38692+ int err, skip;
38693+ struct dentry *dentry, *parent, *cur_dentry, *cur_parent;
38694+ struct qstr *dname, *cur_name;
38695+ struct file *cur_xino;
38696+ struct au_sbinfo *sbinfo;
38697+ struct path *path, *cur_path;
38698+
38699+ SiMustWriteLock(sb);
38700+
38701+ err = 0;
38702+ sbinfo = au_sbi(sb);
38703+ path = &xiopt->file->f_path;
38704+ dentry = path->dentry;
38705+ parent = dget_parent(dentry);
38706+ if (remount) {
38707+ skip = 0;
38708+ cur_xino = sbinfo->si_xib;
38709+ if (cur_xino) {
38710+ cur_path = &cur_xino->f_path;
38711+ cur_dentry = cur_path->dentry;
38712+ cur_parent = dget_parent(cur_dentry);
38713+ cur_name = &cur_dentry->d_name;
38714+ dname = &dentry->d_name;
38715+ skip = (cur_parent == parent
38716+ && au_qstreq(dname, cur_name));
38717+ dput(cur_parent);
38718+ }
38719+ if (skip)
38720+ goto out;
38721+ }
38722+
38723+ au_opt_set(sbinfo->si_mntflags, XINO);
38724+ err = au_xino_set_xib(sb, path);
38725+ /* si_x{read,write} are set */
38726+ if (!err)
38727+ err = au_xigen_set(sb, path);
38728+ if (!err)
38729+ err = au_xino_set_br(sb, path);
38730+ if (!err) {
38731+ dbgaufs_brs_add(sb, 0, /*topdown*/1);
38732+ goto out; /* success */
38733+ }
38734+
38735+ /* reset all */
38736+ AuIOErr("failed setting xino(%d).\n", err);
38737+ au_xino_clr(sb);
38738+
38739+out:
38740+ dput(parent);
38741+ return err;
38742+}
38743+
38744+/*
38745+ * create a xinofile at the default place/path.
38746+ */
38747+struct file *au_xino_def(struct super_block *sb)
38748+{
38749+ struct file *file;
38750+ char *page, *p;
38751+ struct au_branch *br;
38752+ struct super_block *h_sb;
38753+ struct path path;
38754+ aufs_bindex_t bbot, bindex, bwr;
38755+
38756+ br = NULL;
38757+ bbot = au_sbbot(sb);
38758+ bwr = -1;
38759+ for (bindex = 0; bindex <= bbot; bindex++) {
38760+ br = au_sbr(sb, bindex);
38761+ if (au_br_writable(br->br_perm)
38762+ && !au_test_fs_bad_xino(au_br_sb(br))) {
38763+ bwr = bindex;
38764+ break;
38765+ }
38766+ }
38767+
38768+ if (bwr >= 0) {
38769+ file = ERR_PTR(-ENOMEM);
38770+ page = (void *)__get_free_page(GFP_NOFS);
38771+ if (unlikely(!page))
38772+ goto out;
38773+ path.mnt = au_br_mnt(br);
38774+ path.dentry = au_h_dptr(sb->s_root, bwr);
38775+ p = d_path(&path, page, PATH_MAX - sizeof(AUFS_XINO_FNAME));
38776+ file = (void *)p;
38777+ if (!IS_ERR(p)) {
38778+ strcat(p, "/" AUFS_XINO_FNAME);
38779+ AuDbg("%s\n", p);
38780+ file = au_xino_create(sb, p, /*silent*/0, /*wbrtop*/1);
38781+ }
38782+ free_page((unsigned long)page);
38783+ } else {
38784+ file = au_xino_create(sb, AUFS_XINO_DEFPATH, /*silent*/0,
38785+ /*wbrtop*/0);
38786+ if (IS_ERR(file))
38787+ goto out;
38788+ h_sb = file->f_path.dentry->d_sb;
38789+ if (unlikely(au_test_fs_bad_xino(h_sb))) {
38790+ pr_err("xino doesn't support %s(%s)\n",
38791+ AUFS_XINO_DEFPATH, au_sbtype(h_sb));
38792+ fput(file);
38793+ file = ERR_PTR(-EINVAL);
38794+ }
38795+ }
38796+
38797+out:
38798+ return file;
38799+}
38800+
38801+/* ---------------------------------------------------------------------- */
38802+
38803+/*
38804+ * initialize the xinofile for the specified branch @br
38805+ * at the place/path where @base_file indicates.
38806+ * test whether another branch is on the same filesystem or not,
38807+ * if found then share the xinofile with another branch.
38808+ */
38809+int au_xino_init_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
38810+ struct path *base)
38811+{
38812+ int err;
38813+ struct au_xino_do_set_br args = {
38814+ .h_ino = h_ino,
38815+ .br = br
38816+ };
38817+
38818+ args.bshared = sbr_find_shared(sb, /*btop*/0, au_sbbot(sb),
38819+ au_br_sb(br));
38820+ err = au_xino_do_set_br(sb, base, &args);
38821+ if (unlikely(err))
38822+ au_xino_put(br);
38823+
38824+ return err;
38825+}
38826+
38827+/* ---------------------------------------------------------------------- */
38828+
38829+/*
38830+ * get an unused inode number from bitmap
38831+ */
38832+ino_t au_xino_new_ino(struct super_block *sb)
38833+{
38834+ ino_t ino;
38835+ unsigned long *p, pindex, ul, pend;
38836+ struct au_sbinfo *sbinfo;
38837+ struct file *file;
38838+ int free_bit, err;
38839+
38840+ if (!au_opt_test(au_mntflags(sb), XINO))
38841+ return iunique(sb, AUFS_FIRST_INO);
38842+
38843+ sbinfo = au_sbi(sb);
38844+ mutex_lock(&sbinfo->si_xib_mtx);
38845+ p = sbinfo->si_xib_buf;
38846+ free_bit = sbinfo->si_xib_next_bit;
38847+ if (free_bit < page_bits && !test_bit(free_bit, p))
38848+ goto out; /* success */
38849+ free_bit = find_first_zero_bit(p, page_bits);
38850+ if (free_bit < page_bits)
38851+ goto out; /* success */
38852+
38853+ pindex = sbinfo->si_xib_last_pindex;
38854+ for (ul = pindex - 1; ul < ULONG_MAX; ul--) {
38855+ err = xib_pindex(sb, ul);
38856+ if (unlikely(err))
38857+ goto out_err;
38858+ free_bit = find_first_zero_bit(p, page_bits);
38859+ if (free_bit < page_bits)
38860+ goto out; /* success */
38861+ }
38862+
38863+ file = sbinfo->si_xib;
38864+ pend = vfsub_f_size_read(file) / PAGE_SIZE;
38865+ for (ul = pindex + 1; ul <= pend; ul++) {
38866+ err = xib_pindex(sb, ul);
38867+ if (unlikely(err))
38868+ goto out_err;
38869+ free_bit = find_first_zero_bit(p, page_bits);
38870+ if (free_bit < page_bits)
38871+ goto out; /* success */
38872+ }
38873+ BUG();
38874+
38875+out:
38876+ set_bit(free_bit, p);
38877+ sbinfo->si_xib_next_bit = free_bit + 1;
38878+ pindex = sbinfo->si_xib_last_pindex;
38879+ mutex_unlock(&sbinfo->si_xib_mtx);
38880+ ino = xib_calc_ino(pindex, free_bit);
38881+ AuDbg("i%lu\n", (unsigned long)ino);
38882+ return ino;
38883+out_err:
38884+ mutex_unlock(&sbinfo->si_xib_mtx);
38885+ AuDbg("i0\n");
38886+ return 0;
38887+}
38888+
38889+/* for s_op->delete_inode() */
38890+void au_xino_delete_inode(struct inode *inode, const int unlinked)
38891+{
38892+ int err;
38893+ unsigned int mnt_flags;
38894+ aufs_bindex_t bindex, bbot, bi;
38895+ unsigned char try_trunc;
38896+ struct au_iinfo *iinfo;
38897+ struct super_block *sb;
38898+ struct au_hinode *hi;
38899+ struct inode *h_inode;
38900+ struct au_branch *br;
38901+ struct au_xi_calc calc;
38902+ struct file *file;
38903+
38904+ AuDebugOn(au_is_bad_inode(inode));
38905+
38906+ sb = inode->i_sb;
38907+ mnt_flags = au_mntflags(sb);
38908+ if (!au_opt_test(mnt_flags, XINO)
38909+ || inode->i_ino == AUFS_ROOT_INO)
38910+ return;
38911+
38912+ if (unlinked) {
38913+ au_xigen_inc(inode);
38914+ au_xib_clear_bit(inode);
38915+ }
38916+
38917+ iinfo = au_ii(inode);
38918+ bindex = iinfo->ii_btop;
38919+ if (bindex < 0)
38920+ return;
38921+
38922+ try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
38923+ hi = au_hinode(iinfo, bindex);
38924+ bbot = iinfo->ii_bbot;
38925+ for (; bindex <= bbot; bindex++, hi++) {
38926+ h_inode = hi->hi_inode;
38927+ if (!h_inode
38928+ || (!unlinked && h_inode->i_nlink))
38929+ continue;
38930+
38931+ /* inode may not be revalidated */
38932+ bi = au_br_index(sb, hi->hi_id);
38933+ if (bi < 0)
38934+ continue;
38935+
38936+ br = au_sbr(sb, bi);
38937+ au_xi_calc(sb, h_inode->i_ino, &calc);
38938+ file = au_xino_file(br->br_xino, calc.idx);
38939+ if (IS_ERR_OR_NULL(file))
38940+ continue;
38941+
38942+ err = au_xino_do_write(file, &calc, /*ino*/0);
38943+ if (!err && try_trunc
38944+ && au_test_fs_trunc_xino(au_br_sb(br)))
38945+ xino_try_trunc(sb, br);
38946+ }
38947+}
38948+
38949+/* ---------------------------------------------------------------------- */
38950+
38951+static int au_xinondir_find(struct au_xino *xi, ino_t h_ino)
38952+{
38953+ int found, total, i;
38954+
38955+ found = -1;
38956+ total = xi->xi_nondir.total;
38957+ for (i = 0; i < total; i++) {
38958+ if (xi->xi_nondir.array[i] != h_ino)
38959+ continue;
38960+ found = i;
38961+ break;
38962+ }
38963+
38964+ return found;
38965+}
38966+
38967+static int au_xinondir_expand(struct au_xino *xi)
38968+{
38969+ int err, sz;
38970+ ino_t *p;
38971+
38972+ BUILD_BUG_ON(KMALLOC_MAX_SIZE > INT_MAX);
38973+
38974+ err = -ENOMEM;
38975+ sz = xi->xi_nondir.total * sizeof(ino_t);
38976+ if (unlikely(sz > KMALLOC_MAX_SIZE / 2))
38977+ goto out;
38978+ p = au_kzrealloc(xi->xi_nondir.array, sz, sz << 1, GFP_ATOMIC,
38979+ /*may_shrink*/0);
38980+ if (p) {
38981+ xi->xi_nondir.array = p;
38982+ xi->xi_nondir.total <<= 1;
38983+ AuDbg("xi_nondir.total %d\n", xi->xi_nondir.total);
38984+ err = 0;
38985+ }
38986+
38987+out:
38988+ return err;
38989+}
38990+
38991+void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
38992+ ino_t h_ino, int idx)
38993+{
38994+ struct au_xino *xi;
38995+
38996+ AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
38997+ xi = au_sbr(sb, bindex)->br_xino;
38998+ AuDebugOn(idx < 0 || xi->xi_nondir.total <= idx);
38999+
39000+ spin_lock(&xi->xi_nondir.spin);
39001+ AuDebugOn(xi->xi_nondir.array[idx] != h_ino);
39002+ xi->xi_nondir.array[idx] = 0;
39003+ spin_unlock(&xi->xi_nondir.spin);
39004+ wake_up_all(&xi->xi_nondir.wqh);
39005+}
39006+
39007+int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
39008+ int *idx)
39009+{
39010+ int err, found, empty;
39011+ struct au_xino *xi;
39012+
39013+ err = 0;
39014+ *idx = -1;
39015+ if (!au_opt_test(au_mntflags(sb), XINO))
39016+ goto out; /* no xino */
39017+
39018+ xi = au_sbr(sb, bindex)->br_xino;
39019+
39020+again:
39021+ spin_lock(&xi->xi_nondir.spin);
39022+ found = au_xinondir_find(xi, h_ino);
39023+ if (found == -1) {
39024+ empty = au_xinondir_find(xi, /*h_ino*/0);
39025+ if (empty == -1) {
39026+ empty = xi->xi_nondir.total;
39027+ err = au_xinondir_expand(xi);
39028+ if (unlikely(err))
39029+ goto out_unlock;
39030+ }
39031+ xi->xi_nondir.array[empty] = h_ino;
39032+ *idx = empty;
39033+ } else {
39034+ spin_unlock(&xi->xi_nondir.spin);
39035+ wait_event(xi->xi_nondir.wqh,
39036+ xi->xi_nondir.array[found] != h_ino);
39037+ goto again;
39038+ }
39039+
39040+out_unlock:
39041+ spin_unlock(&xi->xi_nondir.spin);
39042+out:
39043+ return err;
39044+}
39045+
39046+/* ---------------------------------------------------------------------- */
39047+
39048+int au_xino_path(struct seq_file *seq, struct file *file)
39049+{
39050+ int err;
39051+
39052+ err = au_seq_path(seq, &file->f_path);
39053+ if (unlikely(err))
39054+ goto out;
39055+
39056+#define Deleted "\\040(deleted)"
39057+ seq->count -= sizeof(Deleted) - 1;
39058+ AuDebugOn(memcmp(seq->buf + seq->count, Deleted,
39059+ sizeof(Deleted) - 1));
39060+#undef Deleted
39061+
39062+out:
39063+ return err;
39064+}
39065diff -urN /usr/share/empty/include/uapi/linux/aufs_type.h linux/include/uapi/linux/aufs_type.h
39066--- /usr/share/empty/include/uapi/linux/aufs_type.h 1970-01-01 01:00:00.000000000 +0100
39067+++ linux/include/uapi/linux/aufs_type.h 2023-02-20 21:05:51.959693785 +0100
39068@@ -0,0 +1,452 @@
39069+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
39070+/*
39071+ * Copyright (C) 2005-2022 Junjiro R. Okajima
39072+ *
39073+ * This program is free software; you can redistribute it and/or modify
39074+ * it under the terms of the GNU General Public License as published by
39075+ * the Free Software Foundation; either version 2 of the License, or
39076+ * (at your option) any later version.
39077+ *
39078+ * This program is distributed in the hope that it will be useful,
39079+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
39080+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39081+ * GNU General Public License for more details.
39082+ *
39083+ * You should have received a copy of the GNU General Public License
39084+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
39085+ */
39086+
39087+#ifndef __AUFS_TYPE_H__
39088+#define __AUFS_TYPE_H__
39089+
39090+#define AUFS_NAME "aufs"
39091+
39092+#ifdef __KERNEL__
39093+/*
39094+ * define it before including all other headers.
39095+ * sched.h may use pr_* macros before defining "current", so define the
39096+ * no-current version first, and re-define later.
39097+ */
39098+#define pr_fmt(fmt) AUFS_NAME " %s:%d: " fmt, __func__, __LINE__
39099+#include <linux/sched.h>
39100+#undef pr_fmt
39101+#define pr_fmt(fmt) \
39102+ AUFS_NAME " %s:%d:%.*s[%d]: " fmt, __func__, __LINE__, \
39103+ (int)sizeof(current->comm), current->comm, current->pid
39104+#include <linux/limits.h>
39105+#else
39106+#include <stdint.h>
39107+#include <sys/types.h>
39108+#include <limits.h>
39109+#endif /* __KERNEL__ */
39110+
39111+#define AUFS_VERSION "6.x-rcN-20230109"
39112+
39113+/* todo? move this to linux-2.6.19/include/magic.h */
39114+#define AUFS_SUPER_MAGIC ('a' << 24 | 'u' << 16 | 'f' << 8 | 's')
39115+
39116+/* ---------------------------------------------------------------------- */
39117+
39118+#ifdef __KERNEL__
39119+#ifdef CONFIG_AUFS_BRANCH_MAX_127
39120+typedef int8_t aufs_bindex_t;
39121+#define AUFS_BRANCH_MAX 127
39122+#else
39123+typedef int16_t aufs_bindex_t;
39124+#ifdef CONFIG_AUFS_BRANCH_MAX_511
39125+#define AUFS_BRANCH_MAX 511
39126+#elif defined(CONFIG_AUFS_BRANCH_MAX_1023)
39127+#define AUFS_BRANCH_MAX 1023
39128+#elif defined(CONFIG_AUFS_BRANCH_MAX_32767)
39129+#define AUFS_BRANCH_MAX 32767
39130+#endif
39131+#endif
39132+
39133+#ifndef AUFS_BRANCH_MAX
39134+#error unknown CONFIG_AUFS_BRANCH_MAX value
39135+#endif
39136+#endif /* __KERNEL__ */
39137+
39138+/* ---------------------------------------------------------------------- */
39139+
39140+#define AUFS_FSTYPE AUFS_NAME
39141+
39142+#define AUFS_ROOT_INO 2
39143+#define AUFS_FIRST_INO 11
39144+
39145+#define AUFS_WH_PFX ".wh."
39146+#define AUFS_WH_PFX_LEN ((int)sizeof(AUFS_WH_PFX) - 1)
39147+#define AUFS_WH_TMP_LEN 4
39148+/* a limit for rmdir/rename a dir and copyup */
39149+#define AUFS_MAX_NAMELEN (NAME_MAX \
39150+ - AUFS_WH_PFX_LEN * 2 /* doubly whiteouted */\
39151+ - 1 /* dot */\
39152+ - AUFS_WH_TMP_LEN) /* hex */
39153+#define AUFS_XINO_FNAME "." AUFS_NAME ".xino"
39154+#define AUFS_XINO_DEFPATH "/tmp/" AUFS_XINO_FNAME
39155+#define AUFS_XINO_DEF_SEC 30 /* seconds */
39156+#define AUFS_XINO_DEF_TRUNC 45 /* percentage */
39157+#define AUFS_DIRWH_DEF 3
39158+#define AUFS_RDCACHE_DEF 10 /* seconds */
39159+#define AUFS_RDCACHE_MAX 3600 /* seconds */
39160+#define AUFS_RDBLK_DEF 512 /* bytes */
39161+#define AUFS_RDHASH_DEF 32
39162+#define AUFS_WKQ_NAME AUFS_NAME "d"
39163+#define AUFS_MFS_DEF_SEC 30 /* seconds */
39164+#define AUFS_MFS_MAX_SEC 3600 /* seconds */
39165+#define AUFS_FHSM_CACHE_DEF_SEC 30 /* seconds */
39166+#define AUFS_PLINK_WARN 50 /* number of plinks in a single bucket */
39167+
39168+/* pseudo-link maintenace under /proc */
39169+#define AUFS_PLINK_MAINT_NAME "plink_maint"
39170+#define AUFS_PLINK_MAINT_DIR "fs/" AUFS_NAME
39171+#define AUFS_PLINK_MAINT_PATH AUFS_PLINK_MAINT_DIR "/" AUFS_PLINK_MAINT_NAME
39172+
39173+/* dirren, renamed dir */
39174+#define AUFS_DR_INFO_PFX AUFS_WH_PFX ".dr."
39175+#define AUFS_DR_BRHINO_NAME AUFS_WH_PFX "hino"
39176+/* whiteouted doubly */
39177+#define AUFS_WH_DR_INFO_PFX AUFS_WH_PFX AUFS_DR_INFO_PFX
39178+#define AUFS_WH_DR_BRHINO AUFS_WH_PFX AUFS_DR_BRHINO_NAME
39179+
39180+#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" /* whiteouted doubly */
39181+#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME
39182+
39183+#define AUFS_BASE_NAME AUFS_WH_PFX AUFS_NAME
39184+#define AUFS_PLINKDIR_NAME AUFS_WH_PFX "plnk"
39185+#define AUFS_ORPHDIR_NAME AUFS_WH_PFX "orph"
39186+
39187+/* doubly whiteouted */
39188+#define AUFS_WH_BASE AUFS_WH_PFX AUFS_BASE_NAME
39189+#define AUFS_WH_PLINKDIR AUFS_WH_PFX AUFS_PLINKDIR_NAME
39190+#define AUFS_WH_ORPHDIR AUFS_WH_PFX AUFS_ORPHDIR_NAME
39191+
39192+/* branch permissions and attributes */
39193+#define AUFS_BRPERM_RW "rw"
39194+#define AUFS_BRPERM_RO "ro"
39195+#define AUFS_BRPERM_RR "rr"
39196+#define AUFS_BRATTR_COO_REG "coo_reg"
39197+#define AUFS_BRATTR_COO_ALL "coo_all"
39198+#define AUFS_BRATTR_FHSM "fhsm"
39199+#define AUFS_BRATTR_UNPIN "unpin"
39200+#define AUFS_BRATTR_ICEX "icex"
39201+#define AUFS_BRATTR_ICEX_SEC "icexsec"
39202+#define AUFS_BRATTR_ICEX_SYS "icexsys"
39203+#define AUFS_BRATTR_ICEX_TR "icextr"
39204+#define AUFS_BRATTR_ICEX_USR "icexusr"
39205+#define AUFS_BRATTR_ICEX_OTH "icexoth"
39206+#define AUFS_BRRATTR_WH "wh"
39207+#define AUFS_BRWATTR_NLWH "nolwh"
39208+#define AUFS_BRWATTR_MOO "moo"
39209+
39210+#define AuBrPerm_RW 1 /* writable, hardlinkable wh */
39211+#define AuBrPerm_RO (1 << 1) /* readonly */
39212+#define AuBrPerm_RR (1 << 2) /* natively readonly */
39213+#define AuBrPerm_Mask (AuBrPerm_RW | AuBrPerm_RO | AuBrPerm_RR)
39214+
39215+#define AuBrAttr_COO_REG (1 << 3) /* copy-up on open */
39216+#define AuBrAttr_COO_ALL (1 << 4)
39217+#define AuBrAttr_COO_Mask (AuBrAttr_COO_REG | AuBrAttr_COO_ALL)
39218+
39219+#define AuBrAttr_FHSM (1 << 5) /* file-based hsm */
39220+#define AuBrAttr_UNPIN (1 << 6) /* rename-able top dir of
39221+ branch. meaningless since
39222+ linux-3.18-rc1 */
39223+
39224+/* ignore error in copying XATTR */
39225+#define AuBrAttr_ICEX_SEC (1 << 7)
39226+#define AuBrAttr_ICEX_SYS (1 << 8)
39227+#define AuBrAttr_ICEX_TR (1 << 9)
39228+#define AuBrAttr_ICEX_USR (1 << 10)
39229+#define AuBrAttr_ICEX_OTH (1 << 11)
39230+#define AuBrAttr_ICEX (AuBrAttr_ICEX_SEC \
39231+ | AuBrAttr_ICEX_SYS \
39232+ | AuBrAttr_ICEX_TR \
39233+ | AuBrAttr_ICEX_USR \
39234+ | AuBrAttr_ICEX_OTH)
39235+
39236+#define AuBrRAttr_WH (1 << 12) /* whiteout-able */
39237+#define AuBrRAttr_Mask AuBrRAttr_WH
39238+
39239+#define AuBrWAttr_NoLinkWH (1 << 13) /* un-hardlinkable whiteouts */
39240+#define AuBrWAttr_MOO (1 << 14) /* move-up on open */
39241+#define AuBrWAttr_Mask (AuBrWAttr_NoLinkWH | AuBrWAttr_MOO)
39242+
39243+#define AuBrAttr_CMOO_Mask (AuBrAttr_COO_Mask | AuBrWAttr_MOO)
39244+
39245+/* #warning test userspace */
39246+#ifdef __KERNEL__
39247+#ifndef CONFIG_AUFS_FHSM
39248+#undef AuBrAttr_FHSM
39249+#define AuBrAttr_FHSM 0
39250+#endif
39251+#ifndef CONFIG_AUFS_XATTR
39252+#undef AuBrAttr_ICEX
39253+#define AuBrAttr_ICEX 0
39254+#undef AuBrAttr_ICEX_SEC
39255+#define AuBrAttr_ICEX_SEC 0
39256+#undef AuBrAttr_ICEX_SYS
39257+#define AuBrAttr_ICEX_SYS 0
39258+#undef AuBrAttr_ICEX_TR
39259+#define AuBrAttr_ICEX_TR 0
39260+#undef AuBrAttr_ICEX_USR
39261+#define AuBrAttr_ICEX_USR 0
39262+#undef AuBrAttr_ICEX_OTH
39263+#define AuBrAttr_ICEX_OTH 0
39264+#endif
39265+#endif
39266+
39267+/* the longest combination */
39268+/* AUFS_BRATTR_ICEX and AUFS_BRATTR_ICEX_TR don't affect here */
39269+#define AuBrPermStrSz sizeof(AUFS_BRPERM_RW \
39270+ "+" AUFS_BRATTR_COO_REG \
39271+ "+" AUFS_BRATTR_FHSM \
39272+ "+" AUFS_BRATTR_UNPIN \
39273+ "+" AUFS_BRATTR_ICEX_SEC \
39274+ "+" AUFS_BRATTR_ICEX_SYS \
39275+ "+" AUFS_BRATTR_ICEX_USR \
39276+ "+" AUFS_BRATTR_ICEX_OTH \
39277+ "+" AUFS_BRWATTR_NLWH)
39278+
39279+typedef struct {
39280+ char a[AuBrPermStrSz];
39281+} au_br_perm_str_t;
39282+
39283+static inline int au_br_writable(int brperm)
39284+{
39285+ return brperm & AuBrPerm_RW;
39286+}
39287+
39288+static inline int au_br_whable(int brperm)
39289+{
39290+ return brperm & (AuBrPerm_RW | AuBrRAttr_WH);
39291+}
39292+
39293+static inline int au_br_wh_linkable(int brperm)
39294+{
39295+ return !(brperm & AuBrWAttr_NoLinkWH);
39296+}
39297+
39298+static inline int au_br_cmoo(int brperm)
39299+{
39300+ return brperm & AuBrAttr_CMOO_Mask;
39301+}
39302+
39303+static inline int au_br_fhsm(int brperm)
39304+{
39305+ return brperm & AuBrAttr_FHSM;
39306+}
39307+
39308+/* ---------------------------------------------------------------------- */
39309+
39310+/* ioctl */
39311+enum {
39312+ /* readdir in userspace */
39313+ AuCtl_RDU,
39314+ AuCtl_RDU_INO,
39315+
39316+ AuCtl_WBR_FD, /* pathconf wrapper */
39317+ AuCtl_IBUSY, /* busy inode */
39318+ AuCtl_MVDOWN, /* move-down */
39319+ AuCtl_BR, /* info about branches */
39320+ AuCtl_FHSM_FD /* connection for fhsm */
39321+};
39322+
39323+/* borrowed from linux/include/linux/kernel.h */
39324+#ifndef ALIGN
39325+#ifdef _GNU_SOURCE
39326+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
39327+#else
39328+#define ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
39329+#endif
39330+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
39331+#endif
39332+
39333+/* borrowed from linux/include/linux/compiler-gcc3.h */
39334+#ifndef __aligned
39335+#define __aligned(x) __attribute__((aligned(x)))
39336+#endif
39337+
39338+#ifdef __KERNEL__
39339+#ifndef __packed
39340+#define __packed __attribute__((packed))
39341+#endif
39342+#endif
39343+
39344+struct au_rdu_cookie {
39345+ uint64_t h_pos;
39346+ int16_t bindex;
39347+ uint8_t flags;
39348+ uint8_t pad;
39349+ uint32_t generation;
39350+} __aligned(8);
39351+
39352+struct au_rdu_ent {
39353+ uint64_t ino;
39354+ int16_t bindex;
39355+ uint8_t type;
39356+ uint8_t nlen;
39357+ uint8_t wh;
39358+ char name[];
39359+} __aligned(8);
39360+
39361+static inline int au_rdu_len(int nlen)
39362+{
39363+ /* include the terminating NULL */
39364+ return ALIGN(sizeof(struct au_rdu_ent) + nlen + 1,
39365+ sizeof(uint64_t));
39366+}
39367+
39368+union au_rdu_ent_ul {
39369+ struct au_rdu_ent __user *e;
39370+ uint64_t ul;
39371+};
39372+
39373+enum {
39374+ AufsCtlRduV_SZ,
39375+ AufsCtlRduV_End
39376+};
39377+
39378+struct aufs_rdu {
39379+ /* input */
39380+ union {
39381+ uint64_t sz; /* AuCtl_RDU */
39382+ uint64_t nent; /* AuCtl_RDU_INO */
39383+ };
39384+ union au_rdu_ent_ul ent;
39385+ uint16_t verify[AufsCtlRduV_End];
39386+
39387+ /* input/output */
39388+ uint32_t blk;
39389+
39390+ /* output */
39391+ union au_rdu_ent_ul tail;
39392+ /* number of entries which were added in a single call */
39393+ uint64_t rent;
39394+ uint8_t full;
39395+ uint8_t shwh;
39396+
39397+ struct au_rdu_cookie cookie;
39398+} __aligned(8);
39399+
39400+/* ---------------------------------------------------------------------- */
39401+
39402+/* dirren. the branch is identified by the filename who contains this */
39403+struct au_drinfo {
39404+ uint64_t ino;
39405+ union {
39406+ uint8_t oldnamelen;
39407+ uint64_t _padding;
39408+ };
39409+ uint8_t oldname[];
39410+} __aligned(8);
39411+
39412+struct au_drinfo_fdata {
39413+ uint32_t magic;
39414+ struct au_drinfo drinfo;
39415+} __aligned(8);
39416+
39417+#define AUFS_DRINFO_MAGIC_V1 ('a' << 24 | 'd' << 16 | 'r' << 8 | 0x01)
39418+/* future */
39419+#define AUFS_DRINFO_MAGIC_V2 ('a' << 24 | 'd' << 16 | 'r' << 8 | 0x02)
39420+
39421+/* ---------------------------------------------------------------------- */
39422+
39423+struct aufs_wbr_fd {
39424+ uint32_t oflags;
39425+ int16_t brid;
39426+} __aligned(8);
39427+
39428+/* ---------------------------------------------------------------------- */
39429+
39430+struct aufs_ibusy {
39431+ uint64_t ino, h_ino;
39432+ int16_t bindex;
39433+} __aligned(8);
39434+
39435+/* ---------------------------------------------------------------------- */
39436+
39437+/* error code for move-down */
39438+/* the actual message strings are implemented in aufs-util.git */
39439+enum {
39440+ EAU_MVDOWN_OPAQUE = 1,
39441+ EAU_MVDOWN_WHITEOUT,
39442+ EAU_MVDOWN_UPPER,
39443+ EAU_MVDOWN_BOTTOM,
39444+ EAU_MVDOWN_NOUPPER,
39445+ EAU_MVDOWN_NOLOWERBR,
39446+ EAU_Last
39447+};
39448+
39449+/* flags for move-down */
39450+#define AUFS_MVDOWN_DMSG 1
39451+#define AUFS_MVDOWN_OWLOWER (1 << 1) /* overwrite lower */
39452+#define AUFS_MVDOWN_KUPPER (1 << 2) /* keep upper */
39453+#define AUFS_MVDOWN_ROLOWER (1 << 3) /* do even if lower is RO */
39454+#define AUFS_MVDOWN_ROLOWER_R (1 << 4) /* did on lower RO */
39455+#define AUFS_MVDOWN_ROUPPER (1 << 5) /* do even if upper is RO */
39456+#define AUFS_MVDOWN_ROUPPER_R (1 << 6) /* did on upper RO */
39457+#define AUFS_MVDOWN_BRID_UPPER (1 << 7) /* upper brid */
39458+#define AUFS_MVDOWN_BRID_LOWER (1 << 8) /* lower brid */
39459+#define AUFS_MVDOWN_FHSM_LOWER (1 << 9) /* find fhsm attr for lower */
39460+#define AUFS_MVDOWN_STFS (1 << 10) /* req. stfs */
39461+#define AUFS_MVDOWN_STFS_FAILED (1 << 11) /* output: stfs is unusable */
39462+#define AUFS_MVDOWN_BOTTOM (1 << 12) /* output: no more lowers */
39463+
39464+/* index for move-down */
39465+enum {
39466+ AUFS_MVDOWN_UPPER,
39467+ AUFS_MVDOWN_LOWER,
39468+ AUFS_MVDOWN_NARRAY
39469+};
39470+
39471+/*
39472+ * additional info of move-down
39473+ * number of free blocks and inodes.
39474+ * subset of struct kstatfs, but smaller and always 64bit.
39475+ */
39476+struct aufs_stfs {
39477+ uint64_t f_blocks;
39478+ uint64_t f_bavail;
39479+ uint64_t f_files;
39480+ uint64_t f_ffree;
39481+};
39482+
39483+struct aufs_stbr {
39484+ int16_t brid; /* optional input */
39485+ int16_t bindex; /* output */
39486+ struct aufs_stfs stfs; /* output when AUFS_MVDOWN_STFS set */
39487+} __aligned(8);
39488+
39489+struct aufs_mvdown {
39490+ uint32_t flags; /* input/output */
39491+ struct aufs_stbr stbr[AUFS_MVDOWN_NARRAY]; /* input/output */
39492+ int8_t au_errno; /* output */
39493+} __aligned(8);
39494+
39495+/* ---------------------------------------------------------------------- */
39496+
39497+union aufs_brinfo {
39498+ /* PATH_MAX may differ between kernel-space and user-space */
39499+ char _spacer[4096];
39500+ struct {
39501+ int16_t id;
39502+ int perm;
39503+ char path[];
39504+ };
39505+} __aligned(8);
39506+
39507+/* ---------------------------------------------------------------------- */
39508+
39509+#define AuCtlType 'A'
39510+#define AUFS_CTL_RDU _IOWR(AuCtlType, AuCtl_RDU, struct aufs_rdu)
39511+#define AUFS_CTL_RDU_INO _IOWR(AuCtlType, AuCtl_RDU_INO, struct aufs_rdu)
39512+#define AUFS_CTL_WBR_FD _IOW(AuCtlType, AuCtl_WBR_FD, \
39513+ struct aufs_wbr_fd)
39514+#define AUFS_CTL_IBUSY _IOWR(AuCtlType, AuCtl_IBUSY, struct aufs_ibusy)
39515+#define AUFS_CTL_MVDOWN _IOWR(AuCtlType, AuCtl_MVDOWN, \
39516+ struct aufs_mvdown)
39517+#define AUFS_CTL_BRINFO _IOW(AuCtlType, AuCtl_BR, union aufs_brinfo)
39518+#define AUFS_CTL_FHSM_FD _IOW(AuCtlType, AuCtl_FHSM_FD, int)
39519+
39520+#endif /* __AUFS_TYPE_H__ */
39521SPDX-License-Identifier: GPL-2.0
39522aufs6.x-rcN loopback patch
39523
39524diff --git a/drivers/block/loop.c b/drivers/block/loop.c
39525index 7a9928c6db9d..0bac486deea2 100644
39526--- a/drivers/block/loop.c
39527+++ b/drivers/block/loop.c
39528@@ -54,7 +54,7 @@ struct loop_device {
39529 int lo_flags;
39530 char lo_file_name[LO_NAME_SIZE];
39531
39532- struct file * lo_backing_file;
39533+ struct file *lo_backing_file, *lo_backing_virt_file;
39534 struct block_device *lo_device;
39535
39536 gfp_t old_gfp_mask;
39537@@ -510,6 +510,15 @@ static inline void loop_update_dio(struct loop_device *lo)
39538 lo->use_dio);
39539 }
39540
39541+static struct file *loop_real_file(struct file *file)
39542+{
39543+ struct file *f = NULL;
39544+
39545+ if (file->f_path.dentry->d_sb->s_op->real_loop)
39546+ f = file->f_path.dentry->d_sb->s_op->real_loop(file);
39547+ return f;
39548+}
39549+
39550 static void loop_reread_partitions(struct loop_device *lo)
39551 {
39552 int rc;
39553@@ -567,6 +576,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
39554 {
39555 struct file *file = fget(arg);
39556 struct file *old_file;
39557+ struct file *f, *virt_file = NULL, *old_virt_file;
39558 int error;
39559 bool partscan;
39560 bool is_loop;
39561@@ -590,11 +600,19 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
39562 if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
39563 goto out_err;
39564
39565+ f = loop_real_file(file);
39566+ if (f) {
39567+ virt_file = file;
39568+ file = f;
39569+ get_file(file);
39570+ }
39571+
39572 error = loop_validate_file(file, bdev);
39573 if (error)
39574 goto out_err;
39575
39576 old_file = lo->lo_backing_file;
39577+ old_virt_file = lo->lo_backing_virt_file;
39578
39579 error = -EINVAL;
39580
39581@@ -607,6 +625,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
39582 blk_mq_freeze_queue(lo->lo_queue);
39583 mapping_set_gfp_mask(old_file->f_mapping, lo->old_gfp_mask);
39584 lo->lo_backing_file = file;
39585+ lo->lo_backing_virt_file = virt_file;
39586 lo->old_gfp_mask = mapping_gfp_mask(file->f_mapping);
39587 mapping_set_gfp_mask(file->f_mapping,
39588 lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
39589@@ -629,6 +648,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
39590 * dependency.
39591 */
39592 fput(old_file);
39593+ if (old_virt_file)
39594+ fput(old_virt_file);
39595 if (partscan)
39596 loop_reread_partitions(lo);
39597
39598@@ -642,6 +663,8 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
39599 loop_global_unlock(lo, is_loop);
39600 out_putf:
39601 fput(file);
39602+ if (virt_file)
39603+ fput(virt_file);
39604 goto done;
39605 }
39606
39607@@ -1013,6 +1036,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
39608 const struct loop_config *config)
39609 {
39610 struct file *file = fget(config->fd);
39611+ struct file *f, *virt_file = NULL;
39612 struct inode *inode;
39613 struct address_space *mapping;
39614 int error;
39615@@ -1031,6 +1055,13 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
39616 /* suppress uevents while reconfiguring the device */
39617 dev_set_uevent_suppress(disk_to_dev(lo->lo_disk), 1);
39618
39619+ f = loop_real_file(file);
39620+ if (f) {
39621+ virt_file = file;
39622+ file = f;
39623+ get_file(file);
39624+ }
39625+
39626 /*
39627 * If we don't hold exclusive handle for the device, upgrade to it
39628 * here to avoid changing device under exclusive owner.
39629@@ -1091,6 +1122,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
39630 lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
39631 lo->lo_device = bdev;
39632 lo->lo_backing_file = file;
39633+ lo->lo_backing_virt_file = virt_file;
39634 lo->old_gfp_mask = mapping_gfp_mask(mapping);
39635 mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
39636
39637@@ -1146,6 +1178,8 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
39638 bd_abort_claiming(bdev, loop_configure);
39639 out_putf:
39640 fput(file);
39641+ if (virt_file)
39642+ fput(virt_file);
39643 /* This is safe: open() is still holding a reference. */
39644 module_put(THIS_MODULE);
39645 goto done;
39646@@ -1154,6 +1188,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
39647 static void __loop_clr_fd(struct loop_device *lo, bool release)
39648 {
39649 struct file *filp;
39650+ struct file *virt_filp = lo->lo_backing_virt_file;
39651 gfp_t gfp = lo->old_gfp_mask;
39652
39653 if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
39654@@ -1170,6 +1205,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
39655 spin_lock_irq(&lo->lo_lock);
39656 filp = lo->lo_backing_file;
39657 lo->lo_backing_file = NULL;
39658+ lo->lo_backing_virt_file = NULL;
39659 spin_unlock_irq(&lo->lo_lock);
39660
39661 lo->lo_device = NULL;
39662@@ -1232,6 +1268,8 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
39663 * fput can take open_mutex which is usually taken before lo_mutex.
39664 */
39665 fput(filp);
39666+ if (virt_filp)
39667+ fput(virt_filp);
39668 }
39669
39670 static int loop_clr_fd(struct loop_device *lo)
39671diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
39672index 1ed300047a41..ea63191d6473 100644
39673--- a/fs/aufs/f_op.c
39674+++ b/fs/aufs/f_op.c
39675@@ -309,7 +309,7 @@ static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
39676 if (IS_ERR(h_file))
39677 goto out;
39678
39679- if (au_test_loopback_kthread()) {
39680+ if (0 && au_test_loopback_kthread()) {
39681 au_warn_loopback(h_file->f_path.dentry->d_sb);
39682 if (file->f_mapping != h_file->f_mapping) {
39683 file->f_mapping = h_file->f_mapping;
39684diff --git a/fs/aufs/loop.c b/fs/aufs/loop.c
39685index 58043e31e5f3..e2bfae6f9d59 100644
39686--- a/fs/aufs/loop.c
39687+++ b/fs/aufs/loop.c
39688@@ -133,3 +133,19 @@ void au_loopback_fin(void)
39689 symbol_put(loop_backing_file);
39690 au_kfree_try_rcu(au_warn_loopback_array);
39691 }
39692+
39693+/* ---------------------------------------------------------------------- */
39694+
39695+/* support the loopback block device insude aufs */
39696+
39697+struct file *aufs_real_loop(struct file *file)
39698+{
39699+ struct file *f;
39700+
39701+ BUG_ON(!au_test_aufs(file->f_path.dentry->d_sb));
39702+ fi_read_lock(file);
39703+ f = au_hf_top(file);
39704+ fi_read_unlock(file);
39705+ AuDebugOn(!f);
39706+ return f;
39707+}
39708diff --git a/fs/aufs/loop.h b/fs/aufs/loop.h
39709index 03d4908a6c03..34d356e181d5 100644
39710--- a/fs/aufs/loop.h
39711+++ b/fs/aufs/loop.h
39712@@ -26,6 +26,8 @@ void au_warn_loopback(struct super_block *h_sb);
39713
39714 int au_loopback_init(void);
39715 void au_loopback_fin(void);
39716+
39717+struct file *aufs_real_loop(struct file *file);
39718 #else
39719 AuStub(struct file *, loop_backing_file, return NULL, struct super_block *sb)
39720
39721@@ -36,6 +38,8 @@ AuStubVoid(au_warn_loopback, struct super_block *h_sb)
39722
39723 AuStubInt0(au_loopback_init, void)
39724 AuStubVoid(au_loopback_fin, void)
39725+
39726+AuStub(struct file *, aufs_real_loop, return NULL, struct file *file)
39727 #endif /* BLK_DEV_LOOP */
39728
39729 #endif /* __KERNEL__ */
39730diff --git a/fs/aufs/super.c b/fs/aufs/super.c
39731index 81922d4faf54..c8a62c267d72 100644
39732--- a/fs/aufs/super.c
39733+++ b/fs/aufs/super.c
39734@@ -758,7 +758,10 @@ const struct super_operations aufs_sop = {
39735 .show_options = aufs_show_options,
39736 .statfs = aufs_statfs,
39737 .put_super = aufs_put_super,
39738- .sync_fs = aufs_sync_fs
39739+ .sync_fs = aufs_sync_fs,
39740+#ifdef CONFIG_AUFS_BDEV_LOOP
39741+ .real_loop = aufs_real_loop
39742+#endif
39743 };
39744
39745 /* ---------------------------------------------------------------------- */
39746diff --git a/include/linux/fs.h b/include/linux/fs.h
39747index 2d30def9a580..84a83c480302 100644
39748--- a/include/linux/fs.h
39749+++ b/include/linux/fs.h
39750@@ -2252,6 +2252,10 @@ struct super_operations {
39751 struct shrink_control *);
39752 long (*free_cached_objects)(struct super_block *,
39753 struct shrink_control *);
39754+#if IS_ENABLED(CONFIG_BLK_DEV_LOOP) || IS_ENABLED(CONFIG_BLK_DEV_LOOP_MODULE)
39755+ /* and aufs */
39756+ struct file *(*real_loop)(struct file *);
39757+#endif
39758 };
39759
39760 /*
This page took 0.608962 seconds and 4 git commands to generate.