1 diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c
2 index f424dd771..bcd520de3 100644
5 @@ -3102,13 +3102,22 @@ dump_znode_sa_xattr(sa_handle_t *hdl)
6 (void) printf("\tSA xattrs: %d bytes, %d entries\n\n",
7 sa_xattr_size, sa_xattr_entries);
8 while ((elem = nvlist_next_nvpair(sa_xattr, elem)) != NULL) {
9 + boolean_t can_print = !dump_opt['P'];
13 (void) printf("\t\t%s = ", nvpair_name(elem));
14 nvpair_value_byte_array(elem, &value, &cnt);
16 + for (idx = 0; idx < cnt; ++idx) {
17 + if (!isprint(value[idx])) {
18 + can_print = B_FALSE;
23 for (idx = 0; idx < cnt; ++idx) {
24 - if (isprint(value[idx]))
26 (void) putchar(value[idx]);
28 (void) printf("\\%3.3o", value[idx]);
29 diff --git a/cmd/zed/agents/zfs_retire.c b/cmd/zed/agents/zfs_retire.c
30 index b4794e311..29eaee750 100644
31 --- a/cmd/zed/agents/zfs_retire.c
32 +++ b/cmd/zed/agents/zfs_retire.c
33 @@ -444,14 +444,16 @@ zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl,
36 /* Remove the vdev since device is unplugged */
37 + int remove_status = 0;
38 if (l2arc || (strcmp(class, "resource.fs.zfs.removed") == 0)) {
39 - int status = zpool_vdev_remove_wanted(zhp, devname);
40 + remove_status = zpool_vdev_remove_wanted(zhp, devname);
41 fmd_hdl_debug(hdl, "zpool_vdev_remove_wanted '%s'"
42 - ", ret:%d", devname, status);
43 + ", err:%d", devname, libzfs_errno(zhdl));
46 /* Replace the vdev with a spare if its not a l2arc */
47 - if (!l2arc && (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
48 + if (!l2arc && !remove_status &&
49 + (!fmd_prop_get_int32(hdl, "spare_on_remove") ||
50 replace_with_spare(hdl, zhp, vdev) == B_FALSE)) {
51 /* Could not handle with spare */
52 fmd_hdl_debug(hdl, "no spare for '%s'", devname);
53 diff --git a/cmd/zpool/zpool_main.c b/cmd/zpool/zpool_main.c
54 index 2311d4f04..a06af9aec 100644
55 --- a/cmd/zpool/zpool_main.c
56 +++ b/cmd/zpool/zpool_main.c
57 @@ -392,7 +392,7 @@ get_usage(zpool_help_t idx)
59 return (gettext("\treopen [-n] <pool>\n"));
61 - return (gettext("\tinitialize [-c | -s] [-w] <pool> "
62 + return (gettext("\tinitialize [-c | -s | -u] [-w] <pool> "
65 return (gettext("\tscrub [-s | -p] [-w] <pool> ...\n"));
66 @@ -548,12 +548,13 @@ usage(boolean_t requested)
70 - * zpool initialize [-c | -s] [-w] <pool> [<vdev> ...]
71 + * zpool initialize [-c | -s | -u] [-w] <pool> [<vdev> ...]
72 * Initialize all unused blocks in the specified vdevs, or all vdevs in the pool
75 * -c Cancel. Ends active initializing.
76 * -s Suspend. Initializing can then be restarted with no flags.
77 + * -u Uninitialize. Clears initialization state.
78 * -w Wait. Blocks until initializing has completed.
81 @@ -569,12 +570,14 @@ zpool_do_initialize(int argc, char **argv)
82 struct option long_options[] = {
83 {"cancel", no_argument, NULL, 'c'},
84 {"suspend", no_argument, NULL, 's'},
85 + {"uninit", no_argument, NULL, 'u'},
86 {"wait", no_argument, NULL, 'w'},
90 pool_initialize_func_t cmd_type = POOL_INITIALIZE_START;
91 - while ((c = getopt_long(argc, argv, "csw", long_options, NULL)) != -1) {
92 + while ((c = getopt_long(argc, argv, "csuw", long_options,
96 if (cmd_type != POOL_INITIALIZE_START &&
97 @@ -594,6 +597,15 @@ zpool_do_initialize(int argc, char **argv)
99 cmd_type = POOL_INITIALIZE_SUSPEND;
102 + if (cmd_type != POOL_INITIALIZE_START &&
103 + cmd_type != POOL_INITIALIZE_UNINIT) {
104 + (void) fprintf(stderr, gettext("-u cannot be "
105 + "combined with other options\n"));
108 + cmd_type = POOL_INITIALIZE_UNINIT;
113 @@ -620,8 +632,8 @@ zpool_do_initialize(int argc, char **argv)
116 if (wait && (cmd_type != POOL_INITIALIZE_START)) {
117 - (void) fprintf(stderr, gettext("-w cannot be used with -c or "
119 + (void) fprintf(stderr, gettext("-w cannot be used with -c, -s"
124 @@ -6921,6 +6933,17 @@ zpool_do_online(int argc, char **argv)
127 for (i = 1; i < argc; i++) {
128 + vdev_state_t oldstate;
129 + boolean_t avail_spare, l2cache;
130 + nvlist_t *tgt = zpool_find_vdev(zhp, argv[i], &avail_spare,
137 + oldstate = ((vdev_stat_t *)fnvlist_lookup_uint64_array(tgt,
138 + ZPOOL_CONFIG_VDEV_STATS, &vsc))->vs_state;
139 if (zpool_vdev_online(zhp, argv[i], flags, &newstate) == 0) {
140 if (newstate != VDEV_STATE_HEALTHY) {
141 (void) printf(gettext("warning: device '%s' "
142 @@ -6934,6 +6957,17 @@ zpool_do_online(int argc, char **argv)
143 (void) printf(gettext("use 'zpool "
144 "replace' to replace devices "
145 "that are no longer present\n"));
146 + if ((flags & ZFS_ONLINE_EXPAND)) {
147 + (void) printf(gettext("%s: failed "
148 + "to expand usable space on "
149 + "unhealthy device '%s'\n"),
150 + (oldstate >= VDEV_STATE_DEGRADED ?
151 + "error" : "warning"), argv[i]);
152 + if (oldstate >= VDEV_STATE_DEGRADED) {
160 @@ -7549,19 +7583,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
162 zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
164 - assert(ps->pss_func == POOL_SCAN_SCRUB ||
165 - ps->pss_func == POOL_SCAN_RESILVER);
166 + int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
167 + int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
168 + assert(is_resilver || is_scrub);
170 /* Scan is finished or canceled. */
171 if (ps->pss_state == DSS_FINISHED) {
172 secs_to_dhms(end - start, time_buf);
174 - if (ps->pss_func == POOL_SCAN_SCRUB) {
176 (void) printf(gettext("scrub repaired %s "
177 "in %s with %llu errors on %s"), processed_buf,
178 time_buf, (u_longlong_t)ps->pss_errors,
180 - } else if (ps->pss_func == POOL_SCAN_RESILVER) {
181 + } else if (is_resilver) {
182 (void) printf(gettext("resilvered %s "
183 "in %s with %llu errors on %s"), processed_buf,
184 time_buf, (u_longlong_t)ps->pss_errors,
185 @@ -7569,10 +7604,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
188 } else if (ps->pss_state == DSS_CANCELED) {
189 - if (ps->pss_func == POOL_SCAN_SCRUB) {
191 (void) printf(gettext("scrub canceled on %s"),
193 - } else if (ps->pss_func == POOL_SCAN_RESILVER) {
194 + } else if (is_resilver) {
195 (void) printf(gettext("resilver canceled on %s"),
198 @@ -7582,7 +7617,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
199 assert(ps->pss_state == DSS_SCANNING);
201 /* Scan is in progress. Resilvers can't be paused. */
202 - if (ps->pss_func == POOL_SCAN_SCRUB) {
205 (void) printf(gettext("scrub in progress since %s"),
207 @@ -7592,7 +7627,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
208 (void) printf(gettext("\tscrub started on %s"),
211 - } else if (ps->pss_func == POOL_SCAN_RESILVER) {
212 + } else if (is_resilver) {
213 (void) printf(gettext("resilver in progress since %s"),
216 @@ -7634,17 +7669,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
217 scanned_buf, issued_buf, total_buf);
220 - if (ps->pss_func == POOL_SCAN_RESILVER) {
222 (void) printf(gettext("\t%s resilvered, %.2f%% done"),
223 processed_buf, 100 * fraction_done);
224 - } else if (ps->pss_func == POOL_SCAN_SCRUB) {
225 + } else if (is_scrub) {
226 (void) printf(gettext("\t%s repaired, %.2f%% done"),
227 processed_buf, 100 * fraction_done);
232 + * Only provide an estimate iff:
233 + * 1) the time remaining is valid, and
234 + * 2) the issue rate exceeds 10 MB/s, and
236 + * a) a resilver which has started repairs, or
237 + * b) a scrub which has entered the issue phase.
239 if (total_secs_left != UINT64_MAX &&
240 - issue_rate >= 10 * 1024 * 1024) {
241 + issue_rate >= 10 * 1024 * 1024 &&
242 + ((is_resilver && ps->pss_processed > 0) ||
243 + (is_scrub && issued > 0))) {
244 (void) printf(gettext(", %s to go\n"), time_buf);
246 (void) printf(gettext(", no estimated "
247 diff --git a/config/always-compiler-options.m4 b/config/always-compiler-options.m4
248 index 5046ce0dd..0f66db584 100644
249 --- a/config/always-compiler-options.m4
250 +++ b/config/always-compiler-options.m4
251 @@ -221,3 +221,34 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA], [
252 CFLAGS="$saved_flags"
253 AC_SUBST([NO_IPA_SRA])
257 +dnl # Check if kernel cc supports -fno-ipa-sra option.
259 +AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA], [
260 + AC_MSG_CHECKING([whether $KERNEL_CC supports -fno-ipa-sra])
263 + saved_flags="$CFLAGS"
265 + CFLAGS="$CFLAGS -Werror -fno-ipa-sra"
267 + AS_IF([ test -n "$KERNEL_CC" ], [
270 + AS_IF([ test -n "$KERNEL_LLVM" ], [
274 + AC_COMPILE_IFELSE([AC_LANG_PROGRAM([], [])], [
275 + KERNEL_NO_IPA_SRA=-fno-ipa-sra
276 + AC_MSG_RESULT([yes])
279 + AC_MSG_RESULT([no])
283 + CFLAGS="$saved_flags"
284 + AC_SUBST([KERNEL_NO_IPA_SRA])
286 diff --git a/config/kernel-acl.m4 b/config/kernel-acl.m4
287 index 6e92da97d..be08c3c60 100644
288 --- a/config/kernel-acl.m4
289 +++ b/config/kernel-acl.m4
290 @@ -236,7 +236,22 @@ dnl #
291 dnl # 6.2 API change,
292 dnl # set_acl() second paramter changed to a struct dentry *
294 +dnl # 6.3 API change,
295 +dnl # set_acl() first parameter changed to struct mnt_idmap *
297 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OPERATIONS_SET_ACL], [
298 + ZFS_LINUX_TEST_SRC([inode_operations_set_acl_mnt_idmap_dentry], [
299 + #include <linux/fs.h>
301 + int set_acl_fn(struct mnt_idmap *idmap,
302 + struct dentry *dent, struct posix_acl *acl,
303 + int type) { return 0; }
305 + static const struct inode_operations
306 + iops __attribute__ ((unused)) = {
307 + .set_acl = set_acl_fn,
310 ZFS_LINUX_TEST_SRC([inode_operations_set_acl_userns_dentry], [
311 #include <linux/fs.h>
313 @@ -281,17 +296,24 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL], [
314 AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
315 AC_DEFINE(HAVE_SET_ACL_USERNS, 1, [iops->set_acl() takes 4 args])
317 - ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
318 + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_mnt_idmap_dentry], [
320 AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
321 - AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
322 - [iops->set_acl() takes 4 args, arg2 is struct dentry *])
323 + AC_DEFINE(HAVE_SET_ACL_IDMAP_DENTRY, 1,
324 + [iops->set_acl() takes 4 args, arg1 is struct mnt_idmap *])
326 - ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
327 + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl_userns_dentry], [
329 - AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
330 + AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists])
331 + AC_DEFINE(HAVE_SET_ACL_USERNS_DENTRY_ARG2, 1,
332 + [iops->set_acl() takes 4 args, arg2 is struct dentry *])
334 - ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
335 + ZFS_LINUX_TEST_RESULT([inode_operations_set_acl], [
337 + AC_DEFINE(HAVE_SET_ACL, 1, [iops->set_acl() exists, takes 3 args])
339 + ZFS_LINUX_REQUIRE_API([i_op->set_acl()], [3.14])
344 diff --git a/config/kernel-cpu_has_feature.m4 b/config/kernel-cpu_has_feature.m4
346 index 000000000..608faf0f8
348 +++ b/config/kernel-cpu_has_feature.m4
351 +dnl # cpu_has_feature() may referencing GPL-only cpu_feature_keys on powerpc
355 +dnl # Checking if cpu_has_feature is exported GPL-only
357 +AC_DEFUN([ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE], [
358 + ZFS_LINUX_TEST_SRC([cpu_has_feature], [
359 + #include <linux/version.h>
360 + #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 7, 0)
361 + #include <asm/cpu_has_feature.h>
363 + #include <asm/cputable.h>
366 + return cpu_has_feature(CPU_FTR_ALTIVEC) ? 0 : 1;
367 + ], [], [ZFS_META_LICENSE])
369 +AC_DEFUN([ZFS_AC_KERNEL_CPU_HAS_FEATURE], [
370 + AC_MSG_CHECKING([whether cpu_has_feature() is GPL-only])
371 + ZFS_LINUX_TEST_RESULT([cpu_has_feature_license], [
375 + AC_DEFINE(HAVE_CPU_HAS_FEATURE_GPL_ONLY, 1,
376 + [cpu_has_feature() is GPL-only])
379 diff --git a/config/kernel-filemap.m4 b/config/kernel-filemap.m4
381 index 000000000..745928168
383 +++ b/config/kernel-filemap.m4
386 +dnl # filemap_range_has_page was not available till 4.13
388 +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
389 + ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
390 + #include <linux/fs.h>
392 + struct address_space *mapping = NULL;
395 + bool ret __attribute__ ((unused));
397 + ret = filemap_range_has_page(mapping, lstart, lend);
401 +AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [
402 + AC_MSG_CHECKING([whether filemap_range_has_page() is available])
403 + ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [
405 + AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1,
406 + [filemap_range_has_page() is available])
411 diff --git a/config/kernel-flush_dcache_page.m4 b/config/kernel-flush_dcache_page.m4
413 index 000000000..2340c386e
415 +++ b/config/kernel-flush_dcache_page.m4
418 +dnl # Starting from Linux 5.13, flush_dcache_page() becomes an inline
419 +dnl # function and may indirectly referencing GPL-only cpu_feature_keys on
424 +dnl # Checking if flush_dcache_page is exported GPL-only
426 +AC_DEFUN([ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE], [
427 + ZFS_LINUX_TEST_SRC([flush_dcache_page], [
428 + #include <asm/cacheflush.h>
430 + flush_dcache_page(0);
431 + ], [], [ZFS_META_LICENSE])
433 +AC_DEFUN([ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE], [
434 + AC_MSG_CHECKING([whether flush_dcache_page() is GPL-only])
435 + ZFS_LINUX_TEST_RESULT([flush_dcache_page_license], [
439 + AC_DEFINE(HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY, 1,
440 + [flush_dcache_page() is GPL-only])
443 diff --git a/config/kernel-generic_fillattr.m4 b/config/kernel-generic_fillattr.m4
444 index 0acd5d531..02dee4d4c 100644
445 --- a/config/kernel-generic_fillattr.m4
446 +++ b/config/kernel-generic_fillattr.m4
447 @@ -4,7 +4,10 @@ dnl #
448 dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace*
449 dnl # as the first arg, to support idmapped mounts.
451 -AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
453 +dnl # generic_fillattr() now takes struct mnt_idmap* as the first argument
455 +AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR], [
456 ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
457 #include <linux/fs.h>
459 @@ -13,16 +16,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
460 struct kstat *k = NULL;
461 generic_fillattr(userns, in, k);
464 + ZFS_LINUX_TEST_SRC([generic_fillattr_mnt_idmap], [
465 + #include <linux/fs.h>
467 + struct mnt_idmap *idmap = NULL;
468 + struct inode *in = NULL;
469 + struct kstat *k = NULL;
470 + generic_fillattr(idmap, in, k);
474 -AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS], [
475 - AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
476 - ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
477 +AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR], [
478 + AC_MSG_CHECKING([whether generic_fillattr requires struct mnt_idmap*])
479 + ZFS_LINUX_TEST_RESULT([generic_fillattr_mnt_idmap], [
481 - AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
482 - [generic_fillattr requires struct user_namespace*])
483 + AC_DEFINE(HAVE_GENERIC_FILLATTR_IDMAP, 1,
484 + [generic_fillattr requires struct mnt_idmap*])
486 - AC_MSG_RESULT([no])
487 + AC_MSG_CHECKING([whether generic_fillattr requires struct user_namespace*])
488 + ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
489 + AC_MSG_RESULT([yes])
490 + AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
491 + [generic_fillattr requires struct user_namespace*])
493 + AC_MSG_RESULT([no])
498 diff --git a/config/kernel-inode-create.m4 b/config/kernel-inode-create.m4
499 index a6ea11fb6..9e9e43180 100644
500 --- a/config/kernel-inode-create.m4
501 +++ b/config/kernel-inode-create.m4
503 AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
505 + dnl # 6.3 API change
506 + dnl # The first arg is changed to struct mnt_idmap *
508 + ZFS_LINUX_TEST_SRC([create_mnt_idmap], [
509 + #include <linux/fs.h>
510 + #include <linux/sched.h>
512 + int inode_create(struct mnt_idmap *idmap,
513 + struct inode *inode ,struct dentry *dentry,
514 + umode_t umode, bool flag) { return 0; }
516 + static const struct inode_operations
517 + iops __attribute__ ((unused)) = {
518 + .create = inode_create,
523 dnl # 5.12 API change that added the struct user_namespace* arg
524 dnl # to the front of this function type's arg list.
525 @@ -35,19 +53,28 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
528 AC_DEFUN([ZFS_AC_KERNEL_CREATE], [
529 - AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
530 - ZFS_LINUX_TEST_RESULT([create_userns], [
531 + AC_MSG_CHECKING([whether iops->create() takes struct mnt_idmap*])
532 + ZFS_LINUX_TEST_RESULT([create_mnt_idmap], [
534 - AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
535 - [iops->create() takes struct user_namespace*])
536 + AC_DEFINE(HAVE_IOPS_CREATE_IDMAP, 1,
537 + [iops->create() takes struct mnt_idmap*])
541 - AC_MSG_CHECKING([whether iops->create() passes flags])
542 - ZFS_LINUX_TEST_RESULT([create_flags], [
543 + AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
544 + ZFS_LINUX_TEST_RESULT([create_userns], [
546 + AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
547 + [iops->create() takes struct user_namespace*])
549 - ZFS_LINUX_TEST_ERROR([iops->create()])
552 + AC_MSG_CHECKING([whether iops->create() passes flags])
553 + ZFS_LINUX_TEST_RESULT([create_flags], [
556 + ZFS_LINUX_TEST_ERROR([iops->create()])
561 diff --git a/config/kernel-inode-getattr.m4 b/config/kernel-inode-getattr.m4
562 index f62e82f52..c8bfb0786 100644
563 --- a/config/kernel-inode-getattr.m4
564 +++ b/config/kernel-inode-getattr.m4
566 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
568 + dnl # Linux 6.3 API
569 + dnl # The first arg of getattr I/O operations handler type
570 + dnl # is changed to struct mnt_idmap*
572 + ZFS_LINUX_TEST_SRC([inode_operations_getattr_mnt_idmap], [
573 + #include <linux/fs.h>
576 + struct mnt_idmap *idmap,
577 + const struct path *p, struct kstat *k,
578 + u32 request_mask, unsigned int query_flags)
581 + static const struct inode_operations
582 + iops __attribute__ ((unused)) = {
583 + .getattr = test_getattr,
589 dnl # The getattr I/O operations handler type was extended to require
590 @@ -55,37 +75,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
592 AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [
594 - dnl # Kernel 5.12 test
595 + dnl # Kernel 6.3 test
597 - AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
598 - ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
599 + AC_MSG_CHECKING([whether iops->getattr() takes mnt_idmap])
600 + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_mnt_idmap], [
602 - AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
603 - [iops->getattr() takes struct user_namespace*])
604 + AC_DEFINE(HAVE_IDMAP_IOPS_GETATTR, 1,
605 + [iops->getattr() takes struct mnt_idmap*])
610 - dnl # Kernel 4.11 test
611 + dnl # Kernel 5.12 test
613 - AC_MSG_CHECKING([whether iops->getattr() takes a path])
614 - ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
615 + AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
616 + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
618 - AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
619 - [iops->getattr() takes a path])
620 + AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
621 + [iops->getattr() takes struct user_namespace*])
626 - dnl # Kernel < 4.11 test
627 + dnl # Kernel 4.11 test
629 - AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
630 - ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
631 + AC_MSG_CHECKING([whether iops->getattr() takes a path])
632 + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
634 - AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
635 - [iops->getattr() takes a vfsmount])
636 + AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
637 + [iops->getattr() takes a path])
642 + dnl # Kernel < 4.11 test
644 + AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
645 + ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
647 + AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
648 + [iops->getattr() takes a vfsmount])
655 diff --git a/config/kernel-inode-setattr.m4 b/config/kernel-inode-setattr.m4
657 index 000000000..45755b4eb
659 +++ b/config/kernel-inode-setattr.m4
661 +AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_SETATTR], [
663 + dnl # Linux 6.3 API
664 + dnl # The first arg of setattr I/O operations handler type
665 + dnl # is changed to struct mnt_idmap*
667 + ZFS_LINUX_TEST_SRC([inode_operations_setattr_mnt_idmap], [
668 + #include <linux/fs.h>
671 + struct mnt_idmap *idmap,
672 + struct dentry *de, struct iattr *ia)
675 + static const struct inode_operations
676 + iops __attribute__ ((unused)) = {
677 + .setattr = test_setattr,
682 + dnl # Linux 5.12 API
683 + dnl # The setattr I/O operations handler type was extended to require
684 + dnl # a struct user_namespace* as its first arg, to support idmapped
687 + ZFS_LINUX_TEST_SRC([inode_operations_setattr_userns], [
688 + #include <linux/fs.h>
691 + struct user_namespace *userns,
692 + struct dentry *de, struct iattr *ia)
695 + static const struct inode_operations
696 + iops __attribute__ ((unused)) = {
697 + .setattr = test_setattr,
701 + ZFS_LINUX_TEST_SRC([inode_operations_setattr], [
702 + #include <linux/fs.h>
705 + struct dentry *de, struct iattr *ia)
708 + static const struct inode_operations
709 + iops __attribute__ ((unused)) = {
710 + .setattr = test_setattr,
715 +AC_DEFUN([ZFS_AC_KERNEL_INODE_SETATTR], [
717 + dnl # Kernel 6.3 test
719 + AC_MSG_CHECKING([whether iops->setattr() takes mnt_idmap])
720 + ZFS_LINUX_TEST_RESULT([inode_operations_setattr_mnt_idmap], [
722 + AC_DEFINE(HAVE_IDMAP_IOPS_SETATTR, 1,
723 + [iops->setattr() takes struct mnt_idmap*])
727 + dnl # Kernel 5.12 test
729 + AC_MSG_CHECKING([whether iops->setattr() takes user_namespace])
730 + ZFS_LINUX_TEST_RESULT([inode_operations_setattr_userns], [
732 + AC_DEFINE(HAVE_USERNS_IOPS_SETATTR, 1,
733 + [iops->setattr() takes struct user_namespace*])
737 + AC_MSG_CHECKING([whether iops->setattr() exists])
738 + ZFS_LINUX_TEST_RESULT([inode_operations_setattr], [
740 + AC_DEFINE(HAVE_IOPS_SETATTR, 1,
741 + [iops->setattr() exists])
748 diff --git a/config/kernel-is_owner_or_cap.m4 b/config/kernel-is_owner_or_cap.m4
749 index a90cf3da6..4e9c002b7 100644
750 --- a/config/kernel-is_owner_or_cap.m4
751 +++ b/config/kernel-is_owner_or_cap.m4
752 @@ -16,12 +16,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [
753 (void) inode_owner_or_capable(ip);
756 - ZFS_LINUX_TEST_SRC([inode_owner_or_capable_idmapped], [
757 + ZFS_LINUX_TEST_SRC([inode_owner_or_capable_userns], [
758 #include <linux/fs.h>
760 struct inode *ip = NULL;
761 (void) inode_owner_or_capable(&init_user_ns, ip);
764 + ZFS_LINUX_TEST_SRC([inode_owner_or_capable_mnt_idmap], [
765 + #include <linux/fs.h>
766 + #include <linux/mnt_idmapping.h>
768 + struct inode *ip = NULL;
769 + (void) inode_owner_or_capable(&nop_mnt_idmap, ip);
773 AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
774 @@ -35,12 +43,21 @@ AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
777 [whether inode_owner_or_capable() takes user_ns])
778 - ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_idmapped], [
779 + ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_userns], [
781 - AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED, 1,
782 + AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_USERNS, 1,
783 [inode_owner_or_capable() takes user_ns])
785 - ZFS_LINUX_TEST_ERROR([capability])
788 + [whether inode_owner_or_capable() takes mnt_idmap])
789 + ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_mnt_idmap], [
791 + AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP, 1,
792 + [inode_owner_or_capable() takes mnt_idmap])
794 + ZFS_LINUX_TEST_ERROR([capability])
799 diff --git a/config/kernel-mkdir.m4 b/config/kernel-mkdir.m4
800 index 6667ed04f..7407a791b 100644
801 --- a/config/kernel-mkdir.m4
802 +++ b/config/kernel-mkdir.m4
803 @@ -2,6 +2,22 @@ dnl #
804 dnl # Supported mkdir() interfaces checked newest to oldest.
806 AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
808 + dnl # 6.3 API change
809 + dnl # mkdir() takes struct mnt_idmap * as the first arg
811 + ZFS_LINUX_TEST_SRC([mkdir_mnt_idmap], [
812 + #include <linux/fs.h>
814 + int mkdir(struct mnt_idmap *idmap,
815 + struct inode *inode, struct dentry *dentry,
816 + umode_t umode) { return 0; }
817 + static const struct inode_operations
818 + iops __attribute__ ((unused)) = {
824 dnl # 5.12 API change
825 dnl # The struct user_namespace arg was added as the first argument to
826 @@ -43,25 +59,36 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
828 AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
830 - dnl # 5.12 API change
831 - dnl # The struct user_namespace arg was added as the first argument to
832 - dnl # mkdir() of the iops structure.
833 + dnl # 6.3 API change
834 + dnl # mkdir() takes struct mnt_idmap * as the first arg
836 - AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
837 - ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
838 + AC_MSG_CHECKING([whether iops->mkdir() takes struct mnt_idmap*])
839 + ZFS_LINUX_TEST_RESULT([mkdir_mnt_idmap], [
841 - AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
842 - [iops->mkdir() takes struct user_namespace*])
843 + AC_DEFINE(HAVE_IOPS_MKDIR_IDMAP, 1,
844 + [iops->mkdir() takes struct mnt_idmap*])
848 - AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
849 - ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
851 + dnl # 5.12 API change
852 + dnl # The struct user_namespace arg was added as the first argument to
853 + dnl # mkdir() of the iops structure.
855 + AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
856 + ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
858 - AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
859 - [iops->mkdir() takes umode_t])
860 + AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
861 + [iops->mkdir() takes struct user_namespace*])
863 - ZFS_LINUX_TEST_ERROR([mkdir()])
866 + AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
867 + ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
869 + AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
870 + [iops->mkdir() takes umode_t])
872 + ZFS_LINUX_TEST_ERROR([mkdir()])
877 diff --git a/config/kernel-mknod.m4 b/config/kernel-mknod.m4
878 index ffe451060..1494ec1ae 100644
879 --- a/config/kernel-mknod.m4
880 +++ b/config/kernel-mknod.m4
882 AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
884 + dnl # 6.3 API change
885 + dnl # The first arg is now struct mnt_idmap*
887 + ZFS_LINUX_TEST_SRC([mknod_mnt_idmap], [
888 + #include <linux/fs.h>
889 + #include <linux/sched.h>
891 + int tmp_mknod(struct mnt_idmap *idmap,
892 + struct inode *inode ,struct dentry *dentry,
893 + umode_t u, dev_t d) { return 0; }
895 + static const struct inode_operations
896 + iops __attribute__ ((unused)) = {
897 + .mknod = tmp_mknod,
902 dnl # 5.12 API change that added the struct user_namespace* arg
903 dnl # to the front of this function type's arg list.
904 @@ -19,12 +37,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
907 AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [
908 - AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
909 - ZFS_LINUX_TEST_RESULT([mknod_userns], [
910 + AC_MSG_CHECKING([whether iops->mknod() takes struct mnt_idmap*])
911 + ZFS_LINUX_TEST_RESULT([mknod_mnt_idmap], [
913 - AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
914 - [iops->mknod() takes struct user_namespace*])
915 + AC_DEFINE(HAVE_IOPS_MKNOD_IDMAP, 1,
916 + [iops->mknod() takes struct mnt_idmap*])
919 + AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
920 + ZFS_LINUX_TEST_RESULT([mknod_userns], [
922 + AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
923 + [iops->mknod() takes struct user_namespace*])
929 diff --git a/config/kernel-reclaim_state.m4 b/config/kernel-reclaim_state.m4
931 index 000000000..9936b3c10
933 +++ b/config/kernel-reclaim_state.m4
935 +AC_DEFUN([ZFS_AC_KERNEL_SRC_RECLAIMED], [
937 + dnl # 6.4 API change
938 + dnl # The reclaimed_slab of struct reclaim_state
939 + dnl # is renamed to reclaimed
941 + ZFS_LINUX_TEST_SRC([reclaim_state_reclaimed], [
942 + #include <linux/swap.h>
943 + static const struct reclaim_state
944 + rs __attribute__ ((unused)) = {
950 +AC_DEFUN([ZFS_AC_KERNEL_RECLAIMED], [
951 + AC_MSG_CHECKING([whether struct reclaim_state has reclaimed field])
952 + ZFS_LINUX_TEST_RESULT([reclaim_state_reclaimed], [
954 + AC_DEFINE(HAVE_RECLAIM_STATE_RECLAIMED, 1,
955 + [struct reclaim_state has reclaimed])
961 diff --git a/config/kernel-rename.m4 b/config/kernel-rename.m4
962 index 302db43f5..b33cd0bfb 100644
963 --- a/config/kernel-rename.m4
964 +++ b/config/kernel-rename.m4
965 @@ -33,24 +33,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
971 + dnl # 6.3 API change - the first arg is now struct mnt_idmap*
973 + ZFS_LINUX_TEST_SRC([inode_operations_rename_mnt_idmap], [
974 + #include <linux/fs.h>
975 + int rename_fn(struct mnt_idmap *idmap, struct inode *sip,
976 + struct dentry *sdp, struct inode *tip, struct dentry *tdp,
977 + unsigned int flags) { return 0; }
979 + static const struct inode_operations
980 + iops __attribute__ ((unused)) = {
981 + .rename = rename_fn,
986 AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
987 - AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
988 - ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
989 + AC_MSG_CHECKING([whether iops->rename() takes struct mnt_idmap*])
990 + ZFS_LINUX_TEST_RESULT([inode_operations_rename_mnt_idmap], [
992 - AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
993 - [iops->rename() takes struct user_namespace*])
994 + AC_DEFINE(HAVE_IOPS_RENAME_IDMAP, 1,
995 + [iops->rename() takes struct mnt_idmap*])
999 - AC_MSG_CHECKING([whether iop->rename() wants flags])
1000 - ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
1001 + AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
1002 + ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
1004 - AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
1005 - [iops->rename() wants flags])
1006 + AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
1007 + [iops->rename() takes struct user_namespace*])
1011 + AC_MSG_CHECKING([whether iops->rename() wants flags])
1012 + ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
1013 + AC_MSG_RESULT(yes)
1014 + AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
1015 + [iops->rename() wants flags])
1022 diff --git a/config/kernel-setattr-prepare.m4 b/config/kernel-setattr-prepare.m4
1023 index 24245aa53..e02d6263e 100644
1024 --- a/config/kernel-setattr-prepare.m4
1025 +++ b/config/kernel-setattr-prepare.m4
1026 @@ -27,26 +27,48 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [
1027 int error __attribute__ ((unused)) =
1028 setattr_prepare(userns, dentry, attr);
1032 + dnl # 6.3 API change
1033 + dnl # The first arg of setattr_prepare() is changed to struct mnt_idmap*
1035 + ZFS_LINUX_TEST_SRC([setattr_prepare_mnt_idmap], [
1036 + #include <linux/fs.h>
1038 + struct dentry *dentry = NULL;
1039 + struct iattr *attr = NULL;
1040 + struct mnt_idmap *idmap = NULL;
1041 + int error __attribute__ ((unused)) =
1042 + setattr_prepare(idmap, dentry, attr);
1046 AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [
1047 - AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
1048 - ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
1049 + AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct mnt_idmap*])
1050 + ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_mnt_idmap],
1051 [setattr_prepare], [fs/attr.c], [
1053 - AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
1054 - [setattr_prepare() accepts user_namespace])
1055 + AC_DEFINE(HAVE_SETATTR_PREPARE_IDMAP, 1,
1056 + [setattr_prepare() accepts mnt_idmap])
1060 - AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
1061 - ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
1062 - [setattr_prepare], [fs/attr.c], [
1063 + AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
1064 + ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
1065 + [setattr_prepare], [fs/attr.c], [
1067 - AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
1068 - [setattr_prepare() is available, doesn't accept user_namespace])
1069 + AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
1070 + [setattr_prepare() accepts user_namespace])
1074 + AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
1075 + ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
1076 + [setattr_prepare], [fs/attr.c], [
1077 + AC_MSG_RESULT(yes)
1078 + AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
1079 + [setattr_prepare() is available, doesn't accept user_namespace])
1086 diff --git a/config/kernel-symlink.m4 b/config/kernel-symlink.m4
1087 index d90366d04..a0333ed66 100644
1088 --- a/config/kernel-symlink.m4
1089 +++ b/config/kernel-symlink.m4
1091 AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
1093 + dnl # 6.3 API change that changed the first arg
1094 + dnl # to struct mnt_idmap*
1096 + ZFS_LINUX_TEST_SRC([symlink_mnt_idmap], [
1097 + #include <linux/fs.h>
1098 + #include <linux/sched.h>
1099 + int tmp_symlink(struct mnt_idmap *idmap,
1100 + struct inode *inode ,struct dentry *dentry,
1101 + const char *path) { return 0; }
1103 + static const struct inode_operations
1104 + iops __attribute__ ((unused)) = {
1105 + .symlink = tmp_symlink,
1109 dnl # 5.12 API change that added the struct user_namespace* arg
1110 dnl # to the front of this function type's arg list.
1111 @@ -19,12 +35,19 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
1114 AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [
1115 - AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
1116 - ZFS_LINUX_TEST_RESULT([symlink_userns], [
1117 + AC_MSG_CHECKING([whether iops->symlink() takes struct mnt_idmap*])
1118 + ZFS_LINUX_TEST_RESULT([symlink_mnt_idmap], [
1120 - AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
1121 - [iops->symlink() takes struct user_namespace*])
1122 + AC_DEFINE(HAVE_IOPS_SYMLINK_IDMAP, 1,
1123 + [iops->symlink() takes struct mnt_idmap*])
1126 + AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
1127 + ZFS_LINUX_TEST_RESULT([symlink_userns], [
1128 + AC_MSG_RESULT(yes)
1129 + AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
1130 + [iops->symlink() takes struct user_namespace*])
1136 diff --git a/config/kernel-tmpfile.m4 b/config/kernel-tmpfile.m4
1137 index 0e1deb361..cc18b8f65 100644
1138 --- a/config/kernel-tmpfile.m4
1139 +++ b/config/kernel-tmpfile.m4
1140 @@ -4,6 +4,19 @@ dnl # Add support for i_op->tmpfile
1142 AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [
1144 + dnl # 6.3 API change
1145 + dnl # The first arg is now struct mnt_idmap *
1147 + ZFS_LINUX_TEST_SRC([inode_operations_tmpfile_mnt_idmap], [
1148 + #include <linux/fs.h>
1149 + int tmpfile(struct mnt_idmap *idmap,
1150 + struct inode *inode, struct file *file,
1151 + umode_t mode) { return 0; }
1152 + static struct inode_operations
1153 + iops __attribute__ ((unused)) = {
1154 + .tmpfile = tmpfile,
1157 dnl # 6.1 API change
1158 dnl # use struct file instead of struct dentry
1160 @@ -44,23 +57,29 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_TMPFILE], [
1162 AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [
1163 AC_MSG_CHECKING([whether i_op->tmpfile() exists])
1164 - ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
1165 + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_mnt_idmap], [
1167 AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
1168 - AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
1170 - ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
1171 + AC_DEFINE(HAVE_TMPFILE_IDMAP, 1, [i_op->tmpfile() has mnt_idmap])
1173 + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile], [
1175 AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
1176 AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
1177 - AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
1179 - ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
1180 + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry_userns], [
1182 AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
1183 + AC_DEFINE(HAVE_TMPFILE_USERNS, 1, [i_op->tmpfile() has userns])
1184 AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
1186 - ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
1187 + ZFS_LINUX_TEST_RESULT([inode_operations_tmpfile_dentry], [
1188 + AC_MSG_RESULT(yes)
1189 + AC_DEFINE(HAVE_TMPFILE, 1, [i_op->tmpfile() exists])
1190 + AC_DEFINE(HAVE_TMPFILE_DENTRY, 1, [i_op->tmpfile() uses old dentry signature])
1192 + ZFS_LINUX_REQUIRE_API([i_op->tmpfile()], [3.11])
1197 diff --git a/config/kernel-writepage_t.m4 b/config/kernel-writepage_t.m4
1198 new file mode 100644
1199 index 000000000..3a0cffd98
1201 +++ b/config/kernel-writepage_t.m4
1203 +AC_DEFUN([ZFS_AC_KERNEL_SRC_WRITEPAGE_T], [
1205 + dnl # 6.3 API change
1206 + dnl # The writepage_t function type now has its first argument as
1207 + dnl # struct folio* instead of struct page*
1209 + ZFS_LINUX_TEST_SRC([writepage_t_folio], [
1210 + #include <linux/writeback.h>
1211 + int putpage(struct folio *folio,
1212 + struct writeback_control *wbc, void *data)
1214 + writepage_t func = putpage;
1218 +AC_DEFUN([ZFS_AC_KERNEL_WRITEPAGE_T], [
1219 + AC_MSG_CHECKING([whether int (*writepage_t)() takes struct folio*])
1220 + ZFS_LINUX_TEST_RESULT([writepage_t_folio], [
1221 + AC_MSG_RESULT(yes)
1222 + AC_DEFINE(HAVE_WRITEPAGE_T_FOLIO, 1,
1223 + [int (*writepage_t)() takes struct folio*])
1229 diff --git a/config/kernel-xattr-handler.m4 b/config/kernel-xattr-handler.m4
1230 index b6cbfa155..6b8a08dbc 100644
1231 --- a/config/kernel-xattr-handler.m4
1232 +++ b/config/kernel-xattr-handler.m4
1233 @@ -179,6 +179,21 @@ dnl #
1234 dnl # Supported xattr handler set() interfaces checked newest to oldest.
1236 AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
1237 + ZFS_LINUX_TEST_SRC([xattr_handler_set_mnt_idmap], [
1238 + #include <linux/xattr.h>
1240 + int set(const struct xattr_handler *handler,
1241 + struct mnt_idmap *idmap,
1242 + struct dentry *dentry, struct inode *inode,
1243 + const char *name, const void *buffer,
1244 + size_t size, int flags)
1246 + static const struct xattr_handler
1247 + xops __attribute__ ((unused)) = {
1252 ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [
1253 #include <linux/xattr.h>
1255 @@ -240,53 +255,63 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
1256 dnl # The xattr_handler->set() callback was changed to 8 arguments, and
1257 dnl # struct user_namespace* was inserted as arg #2
1259 - AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
1260 - ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
1261 + dnl # 6.3 API change,
1262 + dnl # The xattr_handler->set() callback 2nd arg is now struct mnt_idmap *
1264 + AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and mnt_idmap])
1265 + ZFS_LINUX_TEST_RESULT([xattr_handler_set_mnt_idmap], [
1267 - AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
1268 - [xattr_handler->set() takes user_namespace])
1271 - dnl # 4.7 API change,
1272 - dnl # The xattr_handler->set() callback was changed to take both
1273 - dnl # dentry and inode.
1276 - AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
1277 - ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
1278 + AC_DEFINE(HAVE_XATTR_SET_IDMAP, 1,
1279 + [xattr_handler->set() takes mnt_idmap])
1281 + AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
1282 + ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
1284 - AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
1285 - [xattr_handler->set() wants both dentry and inode])
1286 + AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
1287 + [xattr_handler->set() takes user_namespace])
1290 - dnl # 4.4 API change,
1291 - dnl # The xattr_handler->set() callback was changed to take a
1292 - dnl # xattr_handler, and handler_flags argument was removed and
1293 - dnl # should be accessed by handler->flags.
1294 + dnl # 4.7 API change,
1295 + dnl # The xattr_handler->set() callback was changed to take both
1296 + dnl # dentry and inode.
1300 - [whether xattr_handler->set() wants xattr_handler])
1301 - ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
1302 + AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
1303 + ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
1305 - AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
1306 - [xattr_handler->set() wants xattr_handler])
1307 + AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
1308 + [xattr_handler->set() wants both dentry and inode])
1311 - dnl # 2.6.33 API change,
1312 - dnl # The xattr_handler->set() callback was changed
1313 - dnl # to take a dentry instead of an inode, and a
1314 - dnl # handler_flags argument was added.
1315 + dnl # 4.4 API change,
1316 + dnl # The xattr_handler->set() callback was changed to take a
1317 + dnl # xattr_handler, and handler_flags argument was removed and
1318 + dnl # should be accessed by handler->flags.
1322 - [whether xattr_handler->set() wants dentry])
1323 - ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
1324 + [whether xattr_handler->set() wants xattr_handler])
1325 + ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
1327 - AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
1328 - [xattr_handler->set() wants dentry])
1329 + AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
1330 + [xattr_handler->set() wants xattr_handler])
1332 - ZFS_LINUX_TEST_ERROR([xattr set()])
1334 + dnl # 2.6.33 API change,
1335 + dnl # The xattr_handler->set() callback was changed
1336 + dnl # to take a dentry instead of an inode, and a
1337 + dnl # handler_flags argument was added.
1341 + [whether xattr_handler->set() wants dentry])
1342 + ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
1343 + AC_MSG_RESULT(yes)
1344 + AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
1345 + [xattr_handler->set() wants dentry])
1347 + ZFS_LINUX_TEST_ERROR([xattr set()])
1352 diff --git a/config/kernel.m4 b/config/kernel.m4
1353 index 7806da7a8..173c78a2a 100644
1354 --- a/config/kernel.m4
1355 +++ b/config/kernel.m4
1356 @@ -69,6 +69,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
1357 ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE
1358 ZFS_AC_KERNEL_SRC_XATTR
1359 ZFS_AC_KERNEL_SRC_ACL
1360 + ZFS_AC_KERNEL_SRC_INODE_SETATTR
1361 ZFS_AC_KERNEL_SRC_INODE_GETATTR
1362 ZFS_AC_KERNEL_SRC_INODE_SET_FLAGS
1363 ZFS_AC_KERNEL_SRC_INODE_SET_IVERSION
1364 @@ -130,7 +131,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
1365 ZFS_AC_KERNEL_SRC_KSTRTOUL
1366 ZFS_AC_KERNEL_SRC_PERCPU
1367 ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
1368 - ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS
1369 + ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR
1370 ZFS_AC_KERNEL_SRC_MKNOD
1371 ZFS_AC_KERNEL_SRC_SYMLINK
1372 ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
1373 @@ -144,6 +145,15 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
1374 ZFS_AC_KERNEL_SRC_KTHREAD
1375 ZFS_AC_KERNEL_SRC_ZERO_PAGE
1376 ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
1377 + ZFS_AC_KERNEL_SRC_FILEMAP
1378 + ZFS_AC_KERNEL_SRC_WRITEPAGE_T
1379 + ZFS_AC_KERNEL_SRC_RECLAIMED
1380 + case "$host_cpu" in
1382 + ZFS_AC_KERNEL_SRC_CPU_HAS_FEATURE
1383 + ZFS_AC_KERNEL_SRC_FLUSH_DCACHE_PAGE
1387 AC_MSG_CHECKING([for available kernel interfaces])
1388 ZFS_LINUX_TEST_COMPILE_ALL([kabi])
1389 @@ -186,6 +196,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
1390 ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE
1393 + ZFS_AC_KERNEL_INODE_SETATTR
1394 ZFS_AC_KERNEL_INODE_GETATTR
1395 ZFS_AC_KERNEL_INODE_SET_FLAGS
1396 ZFS_AC_KERNEL_INODE_SET_IVERSION
1397 @@ -247,7 +258,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
1398 ZFS_AC_KERNEL_KSTRTOUL
1399 ZFS_AC_KERNEL_PERCPU
1400 ZFS_AC_KERNEL_CPU_HOTPLUG
1401 - ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS
1402 + ZFS_AC_KERNEL_GENERIC_FILLATTR
1404 ZFS_AC_KERNEL_SYMLINK
1405 ZFS_AC_KERNEL_BIO_MAX_SEGS
1406 @@ -261,6 +272,15 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
1407 ZFS_AC_KERNEL_KTHREAD
1408 ZFS_AC_KERNEL_ZERO_PAGE
1409 ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
1410 + ZFS_AC_KERNEL_FILEMAP
1411 + ZFS_AC_KERNEL_WRITEPAGE_T
1412 + ZFS_AC_KERNEL_RECLAIMED
1413 + case "$host_cpu" in
1415 + ZFS_AC_KERNEL_CPU_HAS_FEATURE
1416 + ZFS_AC_KERNEL_FLUSH_DCACHE_PAGE
1422 diff --git a/config/zfs-build.m4 b/config/zfs-build.m4
1423 index 2ab6765c3..9390812cd 100644
1424 --- a/config/zfs-build.m4
1425 +++ b/config/zfs-build.m4
1426 @@ -81,7 +81,7 @@ AC_DEFUN([ZFS_AC_DEBUG], [
1427 AC_DEFUN([ZFS_AC_DEBUGINFO_ENABLE], [
1428 DEBUG_CFLAGS="$DEBUG_CFLAGS -g -fno-inline $NO_IPA_SRA"
1430 - KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $NO_IPA_SRA"
1431 + KERNEL_DEBUG_CFLAGS="$KERNEL_DEBUG_CFLAGS -fno-inline $KERNEL_NO_IPA_SRA"
1432 KERNEL_MAKE="$KERNEL_MAKE CONFIG_DEBUG_INFO=y"
1434 DEBUGINFO_ZFS="_with_debuginfo"
1435 @@ -217,6 +217,7 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
1436 ZFS_AC_CONFIG_ALWAYS_CC_NO_FORMAT_ZERO_LENGTH
1437 ZFS_AC_CONFIG_ALWAYS_CC_NO_OMIT_FRAME_POINTER
1438 ZFS_AC_CONFIG_ALWAYS_CC_NO_IPA_SRA
1439 + ZFS_AC_CONFIG_ALWAYS_KERNEL_CC_NO_IPA_SRA
1440 ZFS_AC_CONFIG_ALWAYS_CC_ASAN
1441 ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
1442 ZFS_AC_CONFIG_ALWAYS_SYSTEM
1443 diff --git a/configure.ac b/configure.ac
1444 index 2671434af..cb339ccd4 100644
1447 @@ -222,6 +222,7 @@ AC_CONFIG_FILES([
1448 tests/zfs-tests/cmd/mmap_exec/Makefile
1449 tests/zfs-tests/cmd/mmap_libaio/Makefile
1450 tests/zfs-tests/cmd/mmap_seek/Makefile
1451 + tests/zfs-tests/cmd/mmap_sync/Makefile
1452 tests/zfs-tests/cmd/mmapwrite/Makefile
1453 tests/zfs-tests/cmd/nvlist_to_lua/Makefile
1454 tests/zfs-tests/cmd/randfree_file/Makefile
1455 diff --git a/contrib/initramfs/scripts/zfs b/contrib/initramfs/scripts/zfs
1456 index 4ce739fda..3c51b53ee 100644
1457 --- a/contrib/initramfs/scripts/zfs
1458 +++ b/contrib/initramfs/scripts/zfs
1459 @@ -326,7 +326,7 @@ mount_fs()
1461 # Need the _original_ datasets mountpoint!
1462 mountpoint=$(get_fs_value "$fs" mountpoint)
1463 - ZFS_CMD="mount.zfs -o zfsutil"
1464 + ZFS_CMD="mount -o zfsutil -t zfs"
1465 if [ "$mountpoint" = "legacy" ] || [ "$mountpoint" = "none" ]; then
1466 # Can't use the mountpoint property. Might be one of our
1467 # clones. Check the 'org.zol:mountpoint' property set in
1468 @@ -343,7 +343,7 @@ mount_fs()
1470 # Don't use mount.zfs -o zfsutils for legacy mountpoint
1471 if [ "$mountpoint" = "legacy" ]; then
1472 - ZFS_CMD="mount.zfs"
1473 + ZFS_CMD="mount -t zfs"
1475 # Last hail-mary: Hope 'rootmnt' is set!
1477 @@ -914,7 +914,7 @@ mountroot()
1478 echo " not specified on the kernel command line."
1480 echo "Manually mount the root filesystem on $rootmnt and then exit."
1481 - echo "Hint: Try: mount.zfs -o zfsutil ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
1482 + echo "Hint: Try: mount -o zfsutil -t zfs ${ZFS_RPOOL-rpool}/ROOT/system $rootmnt"
1486 diff --git a/contrib/pam_zfs_key/pam_zfs_key.c b/contrib/pam_zfs_key/pam_zfs_key.c
1487 index 0db119382..313703770 100644
1488 --- a/contrib/pam_zfs_key/pam_zfs_key.c
1489 +++ b/contrib/pam_zfs_key/pam_zfs_key.c
1490 @@ -548,16 +548,11 @@ zfs_key_config_modify_session_counter(pam_handle_t *pamh,
1494 - size_t runtime_path_len = strlen(runtime_path);
1495 - size_t counter_path_len = runtime_path_len + 1 + 10;
1496 - char *counter_path = malloc(counter_path_len + 1);
1497 - if (!counter_path) {
1499 + char *counter_path;
1500 + if (asprintf(&counter_path, "%s/%u", runtime_path, config->uid) == -1)
1503 - counter_path[0] = 0;
1504 - strcat(counter_path, runtime_path);
1505 - snprintf(counter_path + runtime_path_len, counter_path_len, "/%d",
1508 const int fd = open(counter_path,
1509 O_RDWR | O_CLOEXEC | O_CREAT | O_NOFOLLOW,
1511 diff --git a/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/include/os/freebsd/zfs/sys/zfs_znode_impl.h
1512 index 3d93525b4..120884116 100644
1513 --- a/include/os/freebsd/zfs/sys/zfs_znode_impl.h
1514 +++ b/include/os/freebsd/zfs/sys/zfs_znode_impl.h
1515 @@ -118,7 +118,8 @@ extern minor_t zfsdev_minor_alloc(void);
1516 #define Z_ISLNK(type) ((type) == VLNK)
1517 #define Z_ISDIR(type) ((type) == VDIR)
1519 -#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp))
1520 +#define zn_has_cached_data(zp, start, end) \
1521 + vn_has_cached_data(ZTOV(zp))
1522 #define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync)
1523 #define zn_rlimit_fsize(zp, uio) \
1524 vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))
1525 diff --git a/include/os/linux/kernel/linux/dcache_compat.h b/include/os/linux/kernel/linux/dcache_compat.h
1526 index c90135fd3..f87f1653a 100644
1527 --- a/include/os/linux/kernel/linux/dcache_compat.h
1528 +++ b/include/os/linux/kernel/linux/dcache_compat.h
1530 #define d_alias d_u.d_alias
1534 + * Starting from Linux 5.13, flush_dcache_page() becomes an inline function
1535 + * and under some configurations, may indirectly referencing GPL-only
1536 + * cpu_feature_keys on powerpc. Override this function when it is detected
1539 +#if defined __powerpc__ && defined HAVE_FLUSH_DCACHE_PAGE_GPL_ONLY
1540 +#include <linux/simd_powerpc.h>
1541 +#define flush_dcache_page(page) do { \
1542 + if (!cpu_has_feature(CPU_FTR_COHERENT_ICACHE) && \
1543 + test_bit(PG_dcache_clean, &(page)->flags)) \
1544 + clear_bit(PG_dcache_clean, &(page)->flags); \
1549 * 2.6.30 API change,
1550 * The const keyword was added to the 'struct dentry_operations' in
1551 diff --git a/include/os/linux/kernel/linux/simd_powerpc.h b/include/os/linux/kernel/linux/simd_powerpc.h
1552 index 108cef22f..422b85af3 100644
1553 --- a/include/os/linux/kernel/linux/simd_powerpc.h
1554 +++ b/include/os/linux/kernel/linux/simd_powerpc.h
1556 #define kfpu_init() 0
1557 #define kfpu_fini() ((void) 0)
1560 + * Linux 4.7 makes cpu_has_feature to use jump labels on powerpc if
1561 + * CONFIG_JUMP_LABEL_FEATURE_CHECKS is enabled, in this case however it
1562 + * references GPL-only symbol cpu_feature_keys. Therefore we overrides this
1563 + * interface when it is detected being GPL-only.
1565 +#if defined(CONFIG_JUMP_LABEL_FEATURE_CHECKS) && \
1566 + defined(HAVE_CPU_HAS_FEATURE_GPL_ONLY)
1567 +#define cpu_has_feature(feature) early_cpu_has_feature(feature)
1571 * Check if AltiVec instruction set is available
1573 diff --git a/include/os/linux/kernel/linux/vfs_compat.h b/include/os/linux/kernel/linux/vfs_compat.h
1574 index 91e908598..e82bbf755 100644
1575 --- a/include/os/linux/kernel/linux/vfs_compat.h
1576 +++ b/include/os/linux/kernel/linux/vfs_compat.h
1577 @@ -344,7 +344,8 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
1580 #if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \
1581 - defined(HAVE_SETATTR_PREPARE_USERNS))
1582 + defined(HAVE_SETATTR_PREPARE_USERNS) || \
1583 + defined(HAVE_SETATTR_PREPARE_IDMAP))
1585 setattr_prepare(struct dentry *dentry, struct iattr *ia)
1587 @@ -399,6 +400,15 @@ func(struct user_namespace *user_ns, const struct path *path, \
1588 return (func##_impl(user_ns, path, stat, request_mask, \
1591 +#elif defined(HAVE_IDMAP_IOPS_GETATTR)
1592 +#define ZPL_GETATTR_WRAPPER(func) \
1594 +func(struct mnt_idmap *user_ns, const struct path *path, \
1595 + struct kstat *stat, u32 request_mask, unsigned int query_flags) \
1597 + return (func##_impl(user_ns, path, stat, request_mask, \
1603 @@ -450,8 +460,15 @@ zpl_is_32bit_api(void)
1605 * To support id-mapped mounts, generic_fillattr() was modified to
1606 * accept a new struct user_namespace* as its first arg.
1609 + * generic_fillattr() first arg is changed to struct mnt_idmap *
1612 -#ifdef HAVE_GENERIC_FILLATTR_USERNS
1613 +#ifdef HAVE_GENERIC_FILLATTR_IDMAP
1614 +#define zpl_generic_fillattr(idmap, ip, sp) \
1615 + generic_fillattr(idmap, ip, sp)
1616 +#elif defined(HAVE_GENERIC_FILLATTR_USERNS)
1617 #define zpl_generic_fillattr(user_ns, ip, sp) \
1618 generic_fillattr(user_ns, ip, sp)
1620 diff --git a/include/os/linux/kernel/linux/xattr_compat.h b/include/os/linux/kernel/linux/xattr_compat.h
1621 index 30403fe87..3ffd00169 100644
1622 --- a/include/os/linux/kernel/linux/xattr_compat.h
1623 +++ b/include/os/linux/kernel/linux/xattr_compat.h
1624 @@ -133,20 +133,35 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \
1625 #error "Unsupported kernel"
1630 + * The xattr_handler->set() callback was changed to take the
1631 + * struct mnt_idmap* as the first arg, to support idmapped
1634 +#if defined(HAVE_XATTR_SET_IDMAP)
1635 +#define ZPL_XATTR_SET_WRAPPER(fn) \
1637 +fn(const struct xattr_handler *handler, struct mnt_idmap *user_ns, \
1638 + struct dentry *dentry, struct inode *inode, const char *name, \
1639 + const void *buffer, size_t size, int flags) \
1641 + return (__ ## fn(user_ns, inode, name, buffer, size, flags)); \
1645 * The xattr_handler->set() callback was changed to take the
1646 * struct user_namespace* as the first arg, to support idmapped
1649 -#if defined(HAVE_XATTR_SET_USERNS)
1650 +#elif defined(HAVE_XATTR_SET_USERNS)
1651 #define ZPL_XATTR_SET_WRAPPER(fn) \
1653 fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \
1654 struct dentry *dentry, struct inode *inode, const char *name, \
1655 const void *buffer, size_t size, int flags) \
1657 - return (__ ## fn(inode, name, buffer, size, flags)); \
1658 + return (__ ## fn(user_ns, inode, name, buffer, size, flags)); \
1662 @@ -160,7 +175,7 @@ fn(const struct xattr_handler *handler, struct dentry *dentry, \
1663 struct inode *inode, const char *name, const void *buffer, \
1664 size_t size, int flags) \
1666 - return (__ ## fn(inode, name, buffer, size, flags)); \
1667 + return (__ ## fn(kcred->user_ns, inode, name, buffer, size, flags));\
1671 @@ -174,7 +189,8 @@ static int \
1672 fn(const struct xattr_handler *handler, struct dentry *dentry, \
1673 const char *name, const void *buffer, size_t size, int flags) \
1675 - return (__ ## fn(dentry->d_inode, name, buffer, size, flags)); \
1676 + return (__ ## fn(kcred->user_ns, dentry->d_inode, name, \
1677 + buffer, size, flags)); \
1680 * 2.6.33 API change,
1681 @@ -187,7 +203,8 @@ static int \
1682 fn(struct dentry *dentry, const char *name, const void *buffer, \
1683 size_t size, int flags, int unused_handler_flags) \
1685 - return (__ ## fn(dentry->d_inode, name, buffer, size, flags)); \
1686 + return (__ ## fn(kcred->user_ns, dentry->d_inode, name, buffer, \
1690 #error "Unsupported kernel"
1691 diff --git a/include/os/linux/spl/sys/cred.h b/include/os/linux/spl/sys/cred.h
1692 index b7d3f38d7..501bd4566 100644
1693 --- a/include/os/linux/spl/sys/cred.h
1694 +++ b/include/os/linux/spl/sys/cred.h
1695 @@ -45,6 +45,8 @@ typedef struct cred cred_t;
1696 #define SGID_TO_KGID(x) (KGIDT_INIT(x))
1697 #define KGIDP_TO_SGIDP(x) (&(x)->val)
1699 +extern zidmap_t *zfs_get_init_idmap(void);
1701 extern void crhold(cred_t *cr);
1702 extern void crfree(cred_t *cr);
1703 extern uid_t crgetuid(const cred_t *cr);
1704 diff --git a/include/os/linux/spl/sys/types.h b/include/os/linux/spl/sys/types.h
1705 index b44c94518..4d638efbb 100644
1706 --- a/include/os/linux/spl/sys/types.h
1707 +++ b/include/os/linux/spl/sys/types.h
1708 @@ -54,4 +54,18 @@ typedef ulong_t pgcnt_t;
1709 typedef int major_t;
1710 typedef int minor_t;
1712 +struct user_namespace;
1713 +#ifdef HAVE_IOPS_CREATE_IDMAP
1714 +#include <linux/refcount.h>
1716 + struct user_namespace *owner;
1719 +typedef struct mnt_idmap zidmap_t;
1721 +typedef struct user_namespace zidmap_t;
1724 +extern zidmap_t *zfs_init_idmap;
1726 #endif /* _SPL_TYPES_H */
1727 diff --git a/include/os/linux/zfs/sys/trace_acl.h b/include/os/linux/zfs/sys/trace_acl.h
1728 index 21bcefa4e..656552749 100644
1729 --- a/include/os/linux/zfs/sys/trace_acl.h
1730 +++ b/include/os/linux/zfs/sys/trace_acl.h
1731 @@ -58,9 +58,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
1732 __field(uint64_t, z_size)
1733 __field(uint64_t, z_pflags)
1734 __field(uint32_t, z_sync_cnt)
1735 + __field(uint32_t, z_sync_writes_cnt)
1736 + __field(uint32_t, z_async_writes_cnt)
1737 __field(mode_t, z_mode)
1738 __field(boolean_t, z_is_sa)
1739 - __field(boolean_t, z_is_mapped)
1740 __field(boolean_t, z_is_ctldir)
1742 __field(uint32_t, i_uid)
1743 @@ -90,9 +91,10 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
1744 __entry->z_size = zn->z_size;
1745 __entry->z_pflags = zn->z_pflags;
1746 __entry->z_sync_cnt = zn->z_sync_cnt;
1747 + __entry->z_sync_writes_cnt = zn->z_sync_writes_cnt;
1748 + __entry->z_async_writes_cnt = zn->z_async_writes_cnt;
1749 __entry->z_mode = zn->z_mode;
1750 __entry->z_is_sa = zn->z_is_sa;
1751 - __entry->z_is_mapped = zn->z_is_mapped;
1752 __entry->z_is_ctldir = zn->z_is_ctldir;
1754 __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
1755 @@ -114,18 +116,18 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
1756 TP_printk("zn { id %llu unlinked %u atime_dirty %u "
1757 "zn_prefetch %u blksz %u seq %u "
1758 "mapcnt %llu size %llu pflags %llu "
1759 - "sync_cnt %u mode 0x%x is_sa %d "
1760 - "is_mapped %d is_ctldir %d inode { "
1761 - "uid %u gid %u ino %lu nlink %u size %lli "
1762 + "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
1763 + "mode 0x%x is_sa %d is_ctldir %d "
1764 + "inode { uid %u gid %u ino %lu nlink %u size %lli "
1765 "blkbits %u bytes %u mode 0x%x generation %x } } "
1766 "ace { type %u flags %u access_mask %u } mask_matched %u",
1767 __entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
1768 __entry->z_zn_prefetch, __entry->z_blksz,
1769 __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
1770 - __entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
1771 - __entry->z_is_sa, __entry->z_is_mapped,
1772 - __entry->z_is_ctldir, __entry->i_uid,
1773 - __entry->i_gid, __entry->i_ino, __entry->i_nlink,
1774 + __entry->z_pflags, __entry->z_sync_cnt,
1775 + __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
1776 + __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
1777 + __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
1778 __entry->i_size, __entry->i_blkbits,
1779 __entry->i_bytes, __entry->i_mode, __entry->i_generation,
1780 __entry->z_type, __entry->z_flags, __entry->z_access_mask,
1781 diff --git a/include/os/linux/zfs/sys/zfs_vnops_os.h b/include/os/linux/zfs/sys/zfs_vnops_os.h
1782 index 47f91e4a6..331f2e2bc 100644
1783 --- a/include/os/linux/zfs/sys/zfs_vnops_os.h
1784 +++ b/include/os/linux/zfs/sys/zfs_vnops_os.h
1785 @@ -54,8 +54,7 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
1786 extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
1787 cred_t *cr, int flags);
1788 extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
1789 -extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
1790 - struct kstat *sp);
1791 +extern int zfs_getattr_fast(zidmap_t *, struct inode *ip, struct kstat *sp);
1792 extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
1793 extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
1794 char *tnm, cred_t *cr, int flags);
1795 @@ -68,9 +67,9 @@ extern void zfs_inactive(struct inode *ip);
1796 extern int zfs_space(znode_t *zp, int cmd, flock64_t *bfp, int flag,
1797 offset_t offset, cred_t *cr);
1798 extern int zfs_fid(struct inode *ip, fid_t *fidp);
1799 -extern int zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages);
1800 +extern int zfs_getpage(struct inode *ip, struct page *pp);
1801 extern int zfs_putpage(struct inode *ip, struct page *pp,
1802 - struct writeback_control *wbc);
1803 + struct writeback_control *wbc, boolean_t for_sync);
1804 extern int zfs_dirty_inode(struct inode *ip, int flags);
1805 extern int zfs_map(struct inode *ip, offset_t off, caddr_t *addrp,
1806 size_t len, unsigned long vm_flags);
1807 diff --git a/include/os/linux/zfs/sys/zfs_znode_impl.h b/include/os/linux/zfs/sys/zfs_znode_impl.h
1808 index de46fc8f2..9b9ac7a4f 100644
1809 --- a/include/os/linux/zfs/sys/zfs_znode_impl.h
1810 +++ b/include/os/linux/zfs/sys/zfs_znode_impl.h
1815 +#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
1816 #define ZNODE_OS_FIELDS \
1817 inode_timespec_t z_btime; /* creation/birth time (cached) */ \
1818 struct inode z_inode;
1820 +#define ZNODE_OS_FIELDS \
1821 + inode_timespec_t z_btime; /* creation/birth time (cached) */ \
1822 + struct inode z_inode; \
1823 + boolean_t z_is_mapped; /* we are mmap'ed */
1827 * Convert between znode pointers and inode pointers
1828 @@ -70,7 +77,14 @@ extern "C" {
1829 #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
1830 #define Z_ISDIR(type) S_ISDIR(type)
1832 -#define zn_has_cached_data(zp) ((zp)->z_is_mapped)
1833 +#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
1834 +#define zn_has_cached_data(zp, start, end) \
1835 + filemap_range_has_page(ZTOI(zp)->i_mapping, start, end)
1837 +#define zn_has_cached_data(zp, start, end) \
1838 + ((zp)->z_is_mapped)
1841 #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync)
1842 #define zn_rlimit_fsize(zp, uio) (0)
1844 diff --git a/include/os/linux/zfs/sys/zpl.h b/include/os/linux/zfs/sys/zpl.h
1845 index ac9815d4e..4e08470e7 100644
1846 --- a/include/os/linux/zfs/sys/zpl.h
1847 +++ b/include/os/linux/zfs/sys/zpl.h
1848 @@ -64,7 +64,10 @@ extern int zpl_xattr_security_init(struct inode *ip, struct inode *dip,
1849 const struct qstr *qstr);
1850 #if defined(CONFIG_FS_POSIX_ACL)
1851 #if defined(HAVE_SET_ACL)
1852 -#if defined(HAVE_SET_ACL_USERNS)
1853 +#if defined(HAVE_SET_ACL_IDMAP_DENTRY)
1854 +extern int zpl_set_acl(struct mnt_idmap *idmap, struct dentry *dentry,
1855 + struct posix_acl *acl, int type);
1856 +#elif defined(HAVE_SET_ACL_USERNS)
1857 extern int zpl_set_acl(struct user_namespace *userns, struct inode *ip,
1858 struct posix_acl *acl, int type);
1859 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
1860 @@ -186,13 +189,15 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
1862 #if defined(HAVE_INODE_OWNER_OR_CAPABLE)
1863 #define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ip)
1864 -#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED)
1865 +#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_USERNS)
1866 #define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ns, ip)
1867 +#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAP)
1868 +#define zpl_inode_owner_or_capable(idmap, ip) inode_owner_or_capable(idmap, ip)
1870 #error "Unsupported kernel"
1873 -#ifdef HAVE_SETATTR_PREPARE_USERNS
1874 +#if defined(HAVE_SETATTR_PREPARE_USERNS) || defined(HAVE_SETATTR_PREPARE_IDMAP)
1875 #define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(ns, dentry, ia)
1878 diff --git a/include/sys/dmu.h b/include/sys/dmu.h
1879 index 7bdd42e8b..12bd88720 100644
1880 --- a/include/sys/dmu.h
1881 +++ b/include/sys/dmu.h
1882 @@ -778,6 +778,9 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
1883 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
1884 void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
1886 +void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
1887 +void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
1889 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
1891 void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
1892 diff --git a/include/sys/dmu_objset.h b/include/sys/dmu_objset.h
1893 index 7ade2dc91..fffcbcfca 100644
1894 --- a/include/sys/dmu_objset.h
1895 +++ b/include/sys/dmu_objset.h
1896 @@ -72,6 +72,10 @@ struct dmu_tx;
1898 #define OBJSET_CRYPT_PORTABLE_FLAGS_MASK (0)
1900 +#if defined(__clang__)
1901 +#pragma clang diagnostic push
1902 +#pragma clang diagnostic ignored "-Wgnu-variable-sized-type-not-at-end"
1904 typedef struct objset_phys {
1905 dnode_phys_t os_meta_dnode;
1906 zil_header_t os_zil_header;
1907 @@ -88,6 +92,9 @@ typedef struct objset_phys {
1908 char os_pad1[OBJSET_PHYS_SIZE_V3 - OBJSET_PHYS_SIZE_V2 -
1909 sizeof (dnode_phys_t)];
1911 +#if defined(__clang__)
1912 +#pragma clang diagnostic pop
1915 typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);
1917 diff --git a/include/sys/dmu_tx.h b/include/sys/dmu_tx.h
1918 index ad3f1b0e4..e8886fd4e 100644
1919 --- a/include/sys/dmu_tx.h
1920 +++ b/include/sys/dmu_tx.h
1921 @@ -90,6 +90,7 @@ enum dmu_tx_hold_type {
1929 diff --git a/include/sys/dnode.h b/include/sys/dnode.h
1930 index 20b7c2aaf..39bbdae44 100644
1931 --- a/include/sys/dnode.h
1932 +++ b/include/sys/dnode.h
1933 @@ -120,7 +120,11 @@ extern "C" {
1934 #define DN_MAX_LEVELS (DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
1935 DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)
1937 -#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
1939 + * Use the flexible array instead of the fixed length one dn_bonus
1940 + * to address memcpy/memmove fortify error
1942 +#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus_flexible + \
1943 (((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
1944 #define DN_MAX_BONUS_LEN(dnp) \
1945 ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) ? \
1946 @@ -266,6 +270,10 @@ typedef struct dnode_phys {
1951 + blkptr_t __dn_ignore4;
1952 + uint8_t dn_bonus_flexible[];
1957 diff --git a/include/sys/fs/zfs.h b/include/sys/fs/zfs.h
1958 index 111e70ece..84f5aee59 100644
1959 --- a/include/sys/fs/zfs.h
1960 +++ b/include/sys/fs/zfs.h
1961 @@ -1173,6 +1173,7 @@ typedef enum pool_initialize_func {
1962 POOL_INITIALIZE_START,
1963 POOL_INITIALIZE_CANCEL,
1964 POOL_INITIALIZE_SUSPEND,
1965 + POOL_INITIALIZE_UNINIT,
1966 POOL_INITIALIZE_FUNCS
1967 } pool_initialize_func_t;
1969 diff --git a/include/sys/spa.h b/include/sys/spa.h
1970 index fedadab45..42f7fec0f 100644
1971 --- a/include/sys/spa.h
1972 +++ b/include/sys/spa.h
1973 @@ -785,6 +785,7 @@ extern int bpobj_enqueue_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
1974 #define SPA_ASYNC_L2CACHE_REBUILD 0x800
1975 #define SPA_ASYNC_L2CACHE_TRIM 0x1000
1976 #define SPA_ASYNC_REBUILD_DONE 0x2000
1977 +#define SPA_ASYNC_DETACH_SPARE 0x4000
1979 /* device manipulation */
1980 extern int spa_vdev_add(spa_t *spa, nvlist_t *nvroot);
1981 @@ -971,6 +972,8 @@ extern int spa_import_progress_set_state(uint64_t pool_guid,
1982 /* Pool configuration locks */
1983 extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
1984 extern void spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw);
1985 +extern void spa_config_enter_mmp(spa_t *spa, int locks, const void *tag,
1987 extern void spa_config_exit(spa_t *spa, int locks, const void *tag);
1988 extern int spa_config_held(spa_t *spa, int locks, krw_t rw);
1990 diff --git a/include/sys/vdev_initialize.h b/include/sys/vdev_initialize.h
1991 index 81d39ebeb..942fc71c5 100644
1992 --- a/include/sys/vdev_initialize.h
1993 +++ b/include/sys/vdev_initialize.h
1994 @@ -33,6 +33,7 @@ extern "C" {
1997 extern void vdev_initialize(vdev_t *vd);
1998 +extern void vdev_uninitialize(vdev_t *vd);
1999 extern void vdev_initialize_stop(vdev_t *vd,
2000 vdev_initializing_state_t tgt_state, list_t *vd_list);
2001 extern void vdev_initialize_stop_all(vdev_t *vd,
2002 diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h
2003 index 0df8a0e4b..48dab671d 100644
2004 --- a/include/sys/zfs_znode.h
2005 +++ b/include/sys/zfs_znode.h
2006 @@ -188,7 +188,6 @@ typedef struct znode {
2007 boolean_t z_atime_dirty; /* atime needs to be synced */
2008 boolean_t z_zn_prefetch; /* Prefetch znodes? */
2009 boolean_t z_is_sa; /* are we native sa? */
2010 - boolean_t z_is_mapped; /* are we mmap'ed */
2011 boolean_t z_is_ctldir; /* are we .zfs entry */
2012 boolean_t z_suspended; /* extra ref from a suspend? */
2013 uint_t z_blksz; /* block size in bytes */
2014 @@ -198,6 +197,8 @@ typedef struct znode {
2015 uint64_t z_size; /* file size (cached) */
2016 uint64_t z_pflags; /* pflags (cached) */
2017 uint32_t z_sync_cnt; /* synchronous open count */
2018 + uint32_t z_sync_writes_cnt; /* synchronous write count */
2019 + uint32_t z_async_writes_cnt; /* asynchronous write count */
2020 mode_t z_mode; /* mode (cached) */
2021 kmutex_t z_acl_lock; /* acl data lock */
2022 zfs_acl_t *z_acl_cached; /* cached acl */
2023 diff --git a/lib/libzfs/libzfs.abi b/lib/libzfs/libzfs.abi
2024 index 13ce19df9..58c2d7635 100644
2025 --- a/lib/libzfs/libzfs.abi
2026 +++ b/lib/libzfs/libzfs.abi
2027 @@ -5410,7 +5410,8 @@
2028 <enumerator name='POOL_INITIALIZE_START' value='0'/>
2029 <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
2030 <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
2031 - <enumerator name='POOL_INITIALIZE_FUNCS' value='3'/>
2032 + <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
2033 + <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
2035 <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
2036 <enum-decl name='pool_trim_func' id='54ed608a'>
2037 diff --git a/lib/libzfs/libzfs_dataset.c b/lib/libzfs/libzfs_dataset.c
2038 index f2219d1c3..f6d844bdf 100644
2039 --- a/lib/libzfs/libzfs_dataset.c
2040 +++ b/lib/libzfs/libzfs_dataset.c
2041 @@ -1017,6 +1017,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
2043 int chosen_normal = -1;
2044 int chosen_utf = -1;
2045 + int set_maxbs = 0;
2047 if (nvlist_alloc(&ret, NV_UNIQUE_NAME, 0) != 0) {
2048 (void) no_memory(hdl);
2049 @@ -1234,12 +1235,17 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
2050 (void) zfs_error(hdl, EZFS_BADPROP, errbuf);
2053 + /* save the ZFS_PROP_RECORDSIZE during create op */
2054 + if (zpool_hdl == NULL && prop == ZFS_PROP_RECORDSIZE) {
2055 + set_maxbs = intval;
2060 case ZFS_PROP_SPECIAL_SMALL_BLOCKS:
2062 - int maxbs = SPA_OLD_MAXBLOCKSIZE;
2064 + set_maxbs == 0 ? SPA_OLD_MAXBLOCKSIZE : set_maxbs;
2067 if (zpool_hdl != NULL) {
2068 diff --git a/lib/libzfs/libzfs_pool.c b/lib/libzfs/libzfs_pool.c
2069 index 29f077841..fc6c6e8e2 100644
2070 --- a/lib/libzfs/libzfs_pool.c
2071 +++ b/lib/libzfs/libzfs_pool.c
2072 @@ -2224,8 +2224,8 @@ xlate_init_err(int err)
2076 - * Begin, suspend, or cancel the initialization (initializing of all free
2077 - * blocks) for the given vdevs in the given pool.
2078 + * Begin, suspend, cancel, or uninit (clear) the initialization (initializing
2079 + * of all free blocks) for the given vdevs in the given pool.
2082 zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2083 @@ -2251,11 +2251,16 @@ zpool_initialize_impl(zpool_handle_t *zhp, pool_initialize_func_t cmd_type,
2084 vdev_guids, &errlist);
2087 - if (errlist != NULL) {
2088 - vd_errlist = fnvlist_lookup_nvlist(errlist,
2089 - ZPOOL_INITIALIZE_VDEVS);
2090 + if (errlist != NULL && nvlist_lookup_nvlist(errlist,
2091 + ZPOOL_INITIALIZE_VDEVS, &vd_errlist) == 0) {
2095 + if (err == EINVAL && cmd_type == POOL_INITIALIZE_UNINIT) {
2096 + zfs_error_aux(zhp->zpool_hdl, dgettext(TEXT_DOMAIN,
2097 + "uninitialize is not supported by kernel"));
2100 (void) zpool_standard_error(zhp->zpool_hdl, err,
2101 dgettext(TEXT_DOMAIN, "operation failed"));
2103 diff --git a/lib/libzfs_core/libzfs_core.abi b/lib/libzfs_core/libzfs_core.abi
2104 index 1b03a5c42..7ede3e097 100644
2105 --- a/lib/libzfs_core/libzfs_core.abi
2106 +++ b/lib/libzfs_core/libzfs_core.abi
2107 @@ -1726,7 +1726,8 @@
2108 <enumerator name='POOL_INITIALIZE_START' value='0'/>
2109 <enumerator name='POOL_INITIALIZE_CANCEL' value='1'/>
2110 <enumerator name='POOL_INITIALIZE_SUSPEND' value='2'/>
2111 - <enumerator name='POOL_INITIALIZE_FUNCS' value='3'/>
2112 + <enumerator name='POOL_INITIALIZE_UNINIT' value='3'/>
2113 + <enumerator name='POOL_INITIALIZE_FUNCS' value='4'/>
2115 <typedef-decl name='pool_initialize_func_t' type-id='5c246ad4' id='7063e1ab'/>
2116 <enum-decl name='pool_trim_func' id='54ed608a'>
2117 diff --git a/man/man4/zfs.4 b/man/man4/zfs.4
2118 index 71a95c3bd..0c60a9c8e 100644
2119 --- a/man/man4/zfs.4
2120 +++ b/man/man4/zfs.4
2121 @@ -1712,7 +1712,7 @@ completes in order to verify the checksums of all blocks which have been
2123 This is enabled by default and strongly recommended.
2125 -.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32MB Pc Pq ulong
2126 +.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq ulong
2127 Maximum amount of I/O that can be concurrently issued for a sequential
2128 resilver per leaf device, given in bytes.
2130 @@ -1831,6 +1831,13 @@ When we cross this limit from above it is because we are issuing verification I/
2131 In this case (unless the metadata scan is done) we stop issuing verification I/O
2132 and start scanning metadata again until we get to the hard limit.
2134 +.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
2135 +When reporting resilver throughput and estimated completion time use the
2136 +performance observed over roughly the last
2137 +.Sy zfs_scan_report_txgs
2139 +When set to zero performance is calculated over the time between checkpoints.
2141 .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
2142 Enforce tight memory limits on pool scans when a sequential scan is in progress.
2143 When disabled, the memory limit may be exceeded by fast disks.
2144 @@ -1839,7 +1846,7 @@ When disabled, the memory limit may be exceeded by fast disks.
2145 Freezes a scrub/resilver in progress without actually pausing it.
2146 Intended for testing/debugging.
2148 -.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4MB Pc Pq int
2149 +.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
2150 Maximum amount of data that can be concurrently issued at once for scrubs and
2151 resilvers per leaf device, given in bytes.
2153 diff --git a/man/man8/zpool-initialize.8 b/man/man8/zpool-initialize.8
2154 index 0a108180d..ada00bb1b 100644
2155 --- a/man/man8/zpool-initialize.8
2156 +++ b/man/man8/zpool-initialize.8
2161 -.Op Fl c Ns | Ns Fl s
2162 +.Op Fl c Ns | Ns Fl s | Ns Fl u
2165 .Oo Ar device Oc Ns …
2166 @@ -60,6 +60,14 @@ initialized, the command will fail and no suspension will occur on any device.
2167 Initializing can then be resumed by running
2168 .Nm zpool Cm initialize
2169 with no flags on the relevant target devices.
2171 +Clears the initialization state on the specified devices, or all eligible
2172 +devices if none are specified.
2173 +If the devices are being actively initialized the command will fail.
2174 +After being cleared
2175 +.Nm zpool Cm initialize
2176 +with no flags can be used to re-initialize all unallocoated regions on
2177 +the relevant target devices.
2179 Wait until the devices have finished initializing before returning.
2181 diff --git a/module/Kbuild.in b/module/Kbuild.in
2182 index 1507965c5..7675d614f 100644
2183 --- a/module/Kbuild.in
2184 +++ b/module/Kbuild.in
2185 @@ -44,4 +44,5 @@ endif
2186 subdir-asflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
2187 subdir-ccflags-y := $(ZFS_MODULE_CFLAGS) $(ZFS_MODULE_CPPFLAGS)
2191 diff --git a/module/icp/algs/edonr/edonr.c b/module/icp/algs/edonr/edonr.c
2192 index 7a3ba30c0..baf8bb885 100644
2193 --- a/module/icp/algs/edonr/edonr.c
2194 +++ b/module/icp/algs/edonr/edonr.c
2195 @@ -343,9 +343,11 @@ Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
2196 * which only goes over it by a hair (1248 bytes on ARM32).
2198 #include <sys/isa_defs.h> /* for _ILP32 */
2199 -#ifdef _ILP32 /* We're 32-bit, assume small stack frames */
2200 +#if defined(_ILP32) /* We're 32-bit, assume small stack frames */
2201 +#if defined(__GNUC__) && !defined(__clang__)
2202 #pragma GCC diagnostic ignored "-Wframe-larger-than="
2206 #if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
2207 static inline size_t
2208 diff --git a/module/icp/algs/skein/skein_block.c b/module/icp/algs/skein/skein_block.c
2209 index 7ba165a48..3ad52da5f 100644
2210 --- a/module/icp/algs/skein/skein_block.c
2211 +++ b/module/icp/algs/skein/skein_block.c
2213 * the #pragma here to ignore the warning.
2215 #if defined(_ILP32) || defined(__powerpc) /* Assume small stack */
2216 +#if defined(__GNUC__) && !defined(__clang__)
2217 #pragma GCC diagnostic ignored "-Wframe-larger-than="
2220 * We're running on 32-bit, don't unroll loops to save stack frame space
2222 diff --git a/module/lua/ldo.c b/module/lua/ldo.c
2223 index a9835c4f5..e4abe04e9 100644
2224 --- a/module/lua/ldo.c
2225 +++ b/module/lua/ldo.c
2226 @@ -197,7 +197,8 @@ l_noret luaD_throw (lua_State *L, int errcode) {
2230 -#if defined(HAVE_INFINITE_RECURSION)
2231 +#if defined(__GNUC__) && !defined(__clang__) && \
2232 + defined(HAVE_INFINITE_RECURSION)
2233 #pragma GCC diagnostic pop
2236 diff --git a/module/os/freebsd/zfs/zfs_ctldir.c b/module/os/freebsd/zfs/zfs_ctldir.c
2237 index 5bd2e1510..cfc4bab2f 100644
2238 --- a/module/os/freebsd/zfs/zfs_ctldir.c
2239 +++ b/module/os/freebsd/zfs/zfs_ctldir.c
2240 @@ -204,6 +204,10 @@ sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
2244 +#if __FreeBSD_version >= 1400077
2245 + vn_set_state(vp, VSTATE_CONSTRUCTED);
2251 @@ -675,6 +679,17 @@ zfsctl_root_readdir(struct vop_readdir_args *ap)
2253 ASSERT3S(vp->v_type, ==, VDIR);
2256 + * FIXME: this routine only ever emits 3 entries and does not tolerate
2257 + * being called with a buffer too small to handle all of them.
2259 + * The check below facilitates the idiom of repeating calls until the
2260 + * count to return is 0.
2262 + if (zfs_uio_offset(&uio) == 3 * sizeof (entry)) {
2266 error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, &uio,
2269 @@ -800,6 +815,9 @@ static struct vop_vector zfsctl_ops_root = {
2270 .vop_default = &default_vnodeops,
2271 #if __FreeBSD_version >= 1300121
2272 .vop_fplookup_vexec = VOP_EAGAIN,
2274 +#if __FreeBSD_version >= 1300139
2275 + .vop_fplookup_symlink = VOP_EAGAIN,
2277 .vop_open = zfsctl_common_open,
2278 .vop_close = zfsctl_common_close,
2279 @@ -1126,6 +1144,9 @@ static struct vop_vector zfsctl_ops_snapdir = {
2280 .vop_default = &default_vnodeops,
2281 #if __FreeBSD_version >= 1300121
2282 .vop_fplookup_vexec = VOP_EAGAIN,
2284 +#if __FreeBSD_version >= 1300139
2285 + .vop_fplookup_symlink = VOP_EAGAIN,
2287 .vop_open = zfsctl_common_open,
2288 .vop_close = zfsctl_common_close,
2289 @@ -1150,7 +1171,7 @@ zfsctl_snapshot_inactive(struct vop_inactive_args *ap)
2291 vnode_t *vp = ap->a_vp;
2293 - VERIFY3S(vrecycle(vp), ==, 1);
2298 @@ -1234,6 +1255,11 @@ static struct vop_vector zfsctl_ops_snapshot = {
2299 #if __FreeBSD_version >= 1300121
2300 .vop_fplookup_vexec = VOP_EAGAIN,
2302 +#if __FreeBSD_version >= 1300139
2303 + .vop_fplookup_symlink = VOP_EAGAIN,
2305 + .vop_open = zfsctl_common_open,
2306 + .vop_close = zfsctl_common_close,
2307 .vop_inactive = zfsctl_snapshot_inactive,
2308 #if __FreeBSD_version >= 1300045
2309 .vop_need_inactive = vop_stdneed_inactive,
2310 diff --git a/module/os/freebsd/zfs/zfs_ioctl_os.c b/module/os/freebsd/zfs/zfs_ioctl_os.c
2311 index 7f7e2b72c..effc11518 100644
2312 --- a/module/os/freebsd/zfs/zfs_ioctl_os.c
2313 +++ b/module/os/freebsd/zfs/zfs_ioctl_os.c
2314 @@ -59,7 +59,7 @@ zfs_vfs_ref(zfsvfs_t **zfvp)
2320 zfs_vfs_held(zfsvfs_t *zfsvfs)
2322 return (zfsvfs->z_vfs != NULL);
2323 diff --git a/module/os/freebsd/zfs/zfs_znode.c b/module/os/freebsd/zfs/zfs_znode.c
2324 index 1debc3ec3..92e3bdd2e 100644
2325 --- a/module/os/freebsd/zfs/zfs_znode.c
2326 +++ b/module/os/freebsd/zfs/zfs_znode.c
2327 @@ -153,6 +153,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
2328 zp->z_xattr_cached = NULL;
2329 zp->z_xattr_parent = 0;
2331 + zp->z_sync_writes_cnt = 0;
2332 + zp->z_async_writes_cnt = 0;
2337 @@ -172,6 +175,9 @@ zfs_znode_cache_destructor(void *buf, void *arg)
2339 ASSERT3P(zp->z_acl_cached, ==, NULL);
2340 ASSERT3P(zp->z_xattr_cached, ==, NULL);
2342 + ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
2343 + ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
2347 @@ -457,6 +463,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
2348 zp->z_blksz = blksz;
2349 zp->z_seq = 0x7A4653;
2351 + zp->z_sync_writes_cnt = 0;
2352 + zp->z_async_writes_cnt = 0;
2353 #if __FreeBSD_version >= 1300139
2354 atomic_store_ptr(&zp->z_cached_symlink, NULL);
2356 diff --git a/module/os/linux/spl/spl-cred.c b/module/os/linux/spl/spl-cred.c
2357 index f81b9540a..d407fc66b 100644
2358 --- a/module/os/linux/spl/spl-cred.c
2359 +++ b/module/os/linux/spl/spl-cred.c
2360 @@ -145,6 +145,18 @@ crgetgid(const cred_t *cr)
2361 return (KGID_TO_SGID(cr->fsgid));
2364 +/* Return the initial user ns or nop_mnt_idmap */
2366 +zfs_get_init_idmap(void)
2368 +#ifdef HAVE_IOPS_CREATE_IDMAP
2369 + return ((zidmap_t *)&nop_mnt_idmap);
2371 + return ((zidmap_t *)&init_user_ns);
2375 +EXPORT_SYMBOL(zfs_get_init_idmap);
2376 EXPORT_SYMBOL(crhold);
2377 EXPORT_SYMBOL(crfree);
2378 EXPORT_SYMBOL(crgetuid);
2379 diff --git a/module/os/linux/spl/spl-generic.c b/module/os/linux/spl/spl-generic.c
2380 index 508fb9d4c..2cb5251d7 100644
2381 --- a/module/os/linux/spl/spl-generic.c
2382 +++ b/module/os/linux/spl/spl-generic.c
2383 @@ -225,8 +225,10 @@ __div_u64(uint64_t u, uint32_t v)
2384 * replacements for libgcc-provided functions and will never be called
2387 +#if defined(__GNUC__) && !defined(__clang__)
2388 #pragma GCC diagnostic push
2389 #pragma GCC diagnostic ignored "-Wmissing-prototypes"
2393 * Implementation of 64-bit unsigned division for 32-bit machines.
2394 @@ -425,7 +427,9 @@ __aeabi_ldivmod(int64_t u, int64_t v)
2395 EXPORT_SYMBOL(__aeabi_ldivmod);
2396 #endif /* __arm || __arm__ */
2398 +#if defined(__GNUC__) && !defined(__clang__)
2399 #pragma GCC diagnostic pop
2402 #endif /* BITS_PER_LONG */
2404 diff --git a/module/os/linux/spl/spl-kmem-cache.c b/module/os/linux/spl/spl-kmem-cache.c
2405 index 5a318e0a5..d586afa9b 100644
2406 --- a/module/os/linux/spl/spl-kmem-cache.c
2407 +++ b/module/os/linux/spl/spl-kmem-cache.c
2408 @@ -183,8 +183,11 @@ kv_free(spl_kmem_cache_t *skc, void *ptr, int size)
2409 * of that infrastructure we are responsible for incrementing it.
2411 if (current->reclaim_state)
2412 +#ifdef HAVE_RECLAIM_STATE_RECLAIMED
2413 + current->reclaim_state->reclaimed += size >> PAGE_SHIFT;
2415 current->reclaim_state->reclaimed_slab += size >> PAGE_SHIFT;
2421 diff --git a/module/os/linux/zfs/arc_os.c b/module/os/linux/zfs/arc_os.c
2422 index f96cd1271..fc76fe0e0 100644
2423 --- a/module/os/linux/zfs/arc_os.c
2424 +++ b/module/os/linux/zfs/arc_os.c
2425 @@ -219,7 +219,11 @@ arc_shrinker_scan(struct shrinker *shrink, struct shrink_control *sc)
2426 arc_reduce_target_size(ptob(sc->nr_to_scan));
2427 arc_wait_for_eviction(ptob(sc->nr_to_scan), B_FALSE);
2428 if (current->reclaim_state != NULL)
2429 +#ifdef HAVE_RECLAIM_STATE_RECLAIMED
2430 + current->reclaim_state->reclaimed += sc->nr_to_scan;
2432 current->reclaim_state->reclaimed_slab += sc->nr_to_scan;
2436 * We are experiencing memory pressure which the arc_evict_zthr was
2437 diff --git a/module/os/linux/zfs/policy.c b/module/os/linux/zfs/policy.c
2438 index 5a52092bb..8d508bcb4 100644
2439 --- a/module/os/linux/zfs/policy.c
2440 +++ b/module/os/linux/zfs/policy.c
2441 @@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
2442 if (crgetuid(cr) == owner)
2445 - if (zpl_inode_owner_or_capable(kcred->user_ns, ip))
2446 + if (zpl_inode_owner_or_capable(zfs_init_idmap, ip))
2449 #if defined(CONFIG_USER_NS)
2450 diff --git a/module/os/linux/zfs/zfs_ctldir.c b/module/os/linux/zfs/zfs_ctldir.c
2451 index c45644a69..743b03412 100644
2452 --- a/module/os/linux/zfs/zfs_ctldir.c
2453 +++ b/module/os/linux/zfs/zfs_ctldir.c
2454 @@ -468,7 +468,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
2455 zp->z_atime_dirty = B_FALSE;
2456 zp->z_zn_prefetch = B_FALSE;
2457 zp->z_is_sa = B_FALSE;
2458 +#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
2459 zp->z_is_mapped = B_FALSE;
2461 zp->z_is_ctldir = B_TRUE;
2462 zp->z_sa_hdl = NULL;
2464 @@ -478,6 +480,8 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
2468 + zp->z_sync_writes_cnt = 0;
2469 + zp->z_async_writes_cnt = 0;
2470 ip->i_generation = 0;
2472 ip->i_mode = (S_IFDIR | S_IRWXUGO);
2473 diff --git a/module/os/linux/zfs/zfs_ioctl_os.c b/module/os/linux/zfs/zfs_ioctl_os.c
2474 index 79b9d777d..767d3a377 100644
2475 --- a/module/os/linux/zfs/zfs_ioctl_os.c
2476 +++ b/module/os/linux/zfs/zfs_ioctl_os.c
2477 @@ -288,6 +288,8 @@ zfsdev_detach(void)
2478 #define ZFS_DEBUG_STR ""
2481 +zidmap_t *zfs_init_idmap;
2486 @@ -311,6 +313,8 @@ openzfs_init(void)
2487 printk(KERN_NOTICE "ZFS: Posix ACLs disabled by kernel\n");
2488 #endif /* CONFIG_FS_POSIX_ACL */
2490 + zfs_init_idmap = (zidmap_t *)zfs_get_init_idmap();
2495 diff --git a/module/os/linux/zfs/zfs_vfsops.c b/module/os/linux/zfs/zfs_vfsops.c
2496 index da897f120..e620eb43a 100644
2497 --- a/module/os/linux/zfs/zfs_vfsops.c
2498 +++ b/module/os/linux/zfs/zfs_vfsops.c
2499 @@ -1192,7 +1192,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
2503 - zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
2504 + zp_array = vmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
2506 mutex_enter(&zfsvfs->z_znodes_lock);
2507 while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
2508 @@ -1228,7 +1228,7 @@ zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
2512 - kmem_free(zp_array, max_array * sizeof (znode_t *));
2513 + vmem_free(zp_array, max_array * sizeof (znode_t *));
2517 diff --git a/module/os/linux/zfs/zfs_vnops_os.c b/module/os/linux/zfs/zfs_vnops_os.c
2518 index ae0401e60..af0d553d5 100644
2519 --- a/module/os/linux/zfs/zfs_vnops_os.c
2520 +++ b/module/os/linux/zfs/zfs_vnops_os.c
2521 @@ -244,43 +244,46 @@ zfs_close(struct inode *ip, int flag, cred_t *cr)
2524 #if defined(_KERNEL)
2526 +static int zfs_fillpage(struct inode *ip, struct page *pp);
2529 * When a file is memory mapped, we must keep the IO data synchronized
2530 - * between the DMU cache and the memory mapped pages. What this means:
2532 - * On Write: If we find a memory mapped page, we write to *both*
2533 - * the page and the dmu buffer.
2534 + * between the DMU cache and the memory mapped pages. Update all mapped
2535 + * pages with the contents of the coresponding dmu buffer.
2538 update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
2540 - struct inode *ip = ZTOI(zp);
2541 - struct address_space *mp = ip->i_mapping;
2546 + struct address_space *mp = ZTOI(zp)->i_mapping;
2547 + int64_t off = start & (PAGE_SIZE - 1);
2549 - off = start & (PAGE_SIZE-1);
2550 for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
2551 - nbytes = MIN(PAGE_SIZE - off, len);
2552 + uint64_t nbytes = MIN(PAGE_SIZE - off, len);
2554 - pp = find_lock_page(mp, start >> PAGE_SHIFT);
2555 + struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
2557 if (mapping_writably_mapped(mp))
2558 flush_dcache_page(pp);
2561 - (void) dmu_read(os, zp->z_id, start + off, nbytes,
2562 - pb + off, DMU_READ_PREFETCH);
2563 + void *pb = kmap(pp);
2564 + int error = dmu_read(os, zp->z_id, start + off,
2565 + nbytes, pb + off, DMU_READ_PREFETCH);
2568 - if (mapping_writably_mapped(mp))
2569 - flush_dcache_page(pp);
2572 + ClearPageUptodate(pp);
2574 + ClearPageError(pp);
2575 + SetPageUptodate(pp);
2577 + if (mapping_writably_mapped(mp))
2578 + flush_dcache_page(pp);
2580 + mark_page_accessed(pp);
2583 - mark_page_accessed(pp);
2584 - SetPageUptodate(pp);
2585 - ClearPageError(pp);
2589 @@ -291,38 +294,44 @@ update_pages(znode_t *zp, int64_t start, int len, objset_t *os)
2593 - * When a file is memory mapped, we must keep the IO data synchronized
2594 - * between the DMU cache and the memory mapped pages. What this means:
2596 - * On Read: We "read" preferentially from memory mapped pages,
2597 - * else we default from the dmu buffer.
2599 - * NOTE: We will always "break up" the IO into PAGESIZE uiomoves when
2600 - * the file is memory mapped.
2601 + * When a file is memory mapped, we must keep the I/O data synchronized
2602 + * between the DMU cache and the memory mapped pages. Preferentially read
2603 + * from memory mapped pages, otherwise fallback to reading through the dmu.
2606 mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
2608 struct inode *ip = ZTOI(zp);
2609 struct address_space *mp = ip->i_mapping;
2611 - int64_t start, off;
2613 + int64_t start = uio->uio_loffset;
2614 + int64_t off = start & (PAGE_SIZE - 1);
2619 - start = uio->uio_loffset;
2620 - off = start & (PAGE_SIZE-1);
2621 for (start &= PAGE_MASK; len > 0; start += PAGE_SIZE) {
2622 - bytes = MIN(PAGE_SIZE - off, len);
2623 + uint64_t bytes = MIN(PAGE_SIZE - off, len);
2625 - pp = find_lock_page(mp, start >> PAGE_SHIFT);
2626 + struct page *pp = find_lock_page(mp, start >> PAGE_SHIFT);
2628 - ASSERT(PageUptodate(pp));
2630 + * If filemap_fault() retries there exists a window
2631 + * where the page will be unlocked and not up to date.
2632 + * In this case we must try and fill the page.
2634 + if (unlikely(!PageUptodate(pp))) {
2635 + error = zfs_fillpage(ip, pp);
2643 + ASSERT(PageUptodate(pp) || PageDirty(pp));
2648 + void *pb = kmap(pp);
2649 error = zfs_uiomove(pb + off, bytes, UIO_READ, uio);
2652 @@ -338,9 +347,11 @@ mappedread(znode_t *zp, int nbytes, zfs_uio_t *uio)
2663 #endif /* _KERNEL */
2664 @@ -1010,7 +1021,7 @@ top:
2666 mutex_enter(&zp->z_lock);
2667 may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
2668 - !(zp->z_is_mapped);
2669 + !zn_has_cached_data(zp, 0, LLONG_MAX);
2670 mutex_exit(&zp->z_lock);
2673 @@ -1098,7 +1109,8 @@ top:
2674 &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
2675 delete_now = may_delete_now && !toobig &&
2676 atomic_read(&ZTOI(zp)->i_count) == 1 &&
2677 - !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked &&
2678 + !zn_has_cached_data(zp, 0, LLONG_MAX) &&
2679 + xattr_obj == xattr_obj_unlinked &&
2680 zfs_external_acl(zp) == acl_obj;
2683 @@ -1663,8 +1675,7 @@ out:
2687 -zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
2689 +zfs_getattr_fast(zidmap_t *user_ns, struct inode *ip, struct kstat *sp)
2691 znode_t *zp = ITOZ(ip);
2692 zfsvfs_t *zfsvfs = ITOZSB(ip);
2693 @@ -3434,21 +3445,34 @@ top:
2697 -zfs_putpage_commit_cb(void *arg)
2698 +zfs_putpage_sync_commit_cb(void *arg)
2700 + struct page *pp = arg;
2702 + ClearPageError(pp);
2703 + end_page_writeback(pp);
2707 +zfs_putpage_async_commit_cb(void *arg)
2709 struct page *pp = arg;
2710 + znode_t *zp = ITOZ(pp->mapping->host);
2713 end_page_writeback(pp);
2714 + atomic_dec_32(&zp->z_async_writes_cnt);
2718 * Push a page out to disk, once the page is on stable storage the
2719 * registered commit callback will be run as notification of completion.
2721 - * IN: ip - page mapped for inode.
2722 - * pp - page to push (page is locked)
2723 - * wbc - writeback control data
2724 + * IN: ip - page mapped for inode.
2725 + * pp - page to push (page is locked)
2726 + * wbc - writeback control data
2727 + * for_sync - does the caller intend to wait synchronously for the
2728 + * page writeback to complete?
2730 * RETURN: 0 if success
2731 * error code if failure
2732 @@ -3458,7 +3482,8 @@ zfs_putpage_commit_cb(void *arg)
2736 -zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
2737 +zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc,
2738 + boolean_t for_sync)
2740 znode_t *zp = ITOZ(ip);
2741 zfsvfs_t *zfsvfs = ITOZSB(ip);
2742 @@ -3556,6 +3581,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
2743 zfs_rangelock_exit(lr);
2745 if (wbc->sync_mode != WB_SYNC_NONE) {
2747 + * Speed up any non-sync page writebacks since
2748 + * they may take several seconds to complete.
2749 + * Refer to the comment in zpl_fsync() (when
2750 + * HAVE_FSYNC_RANGE is defined) for details.
2752 + if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
2753 + zil_commit(zfsvfs->z_log, zp->z_id);
2756 if (PageWriteback(pp))
2757 #ifdef HAVE_PAGEMAP_FOLIO_WAIT_BIT
2758 folio_wait_bit(page_folio(pp), PG_writeback);
2759 @@ -3581,6 +3616,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
2760 * was in fact not skipped and should not be counted as if it were.
2762 wbc->pages_skipped--;
2764 + atomic_inc_32(&zp->z_async_writes_cnt);
2765 set_page_writeback(pp);
2768 @@ -3602,6 +3639,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
2771 end_page_writeback(pp);
2773 + atomic_dec_32(&zp->z_async_writes_cnt);
2774 zfs_rangelock_exit(lr);
2777 @@ -3626,7 +3665,9 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
2778 err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
2780 zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
2781 - zfs_putpage_commit_cb, pp);
2782 + for_sync ? zfs_putpage_sync_commit_cb :
2783 + zfs_putpage_async_commit_cb, pp);
2787 zfs_rangelock_exit(lr);
2788 @@ -3638,6 +3679,16 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
2789 * performance reasons.
2791 zil_commit(zfsvfs->z_log, zp->z_id);
2792 + } else if (!for_sync && atomic_load_32(&zp->z_sync_writes_cnt) > 0) {
2794 + * If the caller does not intend to wait synchronously
2795 + * for this page writeback to complete and there are active
2796 + * synchronous calls on this file, do a commit so that
2797 + * the latter don't accidentally end up waiting for
2798 + * our writeback to complete. Refer to the comment in
2799 + * zpl_fsync() (when HAVE_FSYNC_RANGE is defined) for details.
2801 + zil_commit(zfsvfs->z_log, zp->z_id);
2804 dataset_kstats_update_write_kstats(&zfsvfs->z_kstat, pglen);
2805 @@ -3766,55 +3817,45 @@ zfs_inactive(struct inode *ip)
2806 * Fill pages with data from the disk.
2809 -zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
2810 +zfs_fillpage(struct inode *ip, struct page *pp)
2812 - znode_t *zp = ITOZ(ip);
2813 zfsvfs_t *zfsvfs = ITOZSB(ip);
2815 - struct page *cur_pp;
2816 - u_offset_t io_off, total;
2819 - unsigned page_idx;
2821 + loff_t i_size = i_size_read(ip);
2822 + u_offset_t io_off = page_offset(pp);
2823 + size_t io_len = PAGE_SIZE;
2825 - os = zfsvfs->z_os;
2826 - io_len = nr_pages << PAGE_SHIFT;
2827 - i_size = i_size_read(ip);
2828 - io_off = page_offset(pl[0]);
2829 + ASSERT3U(io_off, <, i_size);
2831 if (io_off + io_len > i_size)
2832 io_len = i_size - io_off;
2835 - * Iterate over list of pages and read each page individually.
2838 - for (total = io_off + io_len; io_off < total; io_off += PAGESIZE) {
2840 + void *va = kmap(pp);
2841 + int error = dmu_read(zfsvfs->z_os, ITOZ(ip)->z_id, io_off,
2842 + io_len, va, DMU_READ_PREFETCH);
2843 + if (io_len != PAGE_SIZE)
2844 + memset((char *)va + io_len, 0, PAGE_SIZE - io_len);
2847 - cur_pp = pl[page_idx++];
2848 - va = kmap(cur_pp);
2849 - err = dmu_read(os, zp->z_id, io_off, PAGESIZE, va,
2850 - DMU_READ_PREFETCH);
2853 - /* convert checksum errors into IO errors */
2854 - if (err == ECKSUM)
2855 - err = SET_ERROR(EIO);
2859 + /* convert checksum errors into IO errors */
2860 + if (error == ECKSUM)
2861 + error = SET_ERROR(EIO);
2864 + ClearPageUptodate(pp);
2866 + ClearPageError(pp);
2867 + SetPageUptodate(pp);
2875 - * Uses zfs_fillpage to read data from the file and fill the pages.
2876 + * Uses zfs_fillpage to read data from the file and fill the page.
2878 * IN: ip - inode of file to get data from.
2879 - * pl - list of pages to read
2880 - * nr_pages - number of pages to read
2881 + * pp - page to read
2883 * RETURN: 0 on success, error code on failure.
2885 @@ -3823,24 +3864,22 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
2889 -zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
2890 +zfs_getpage(struct inode *ip, struct page *pp)
2892 - znode_t *zp = ITOZ(ip);
2893 zfsvfs_t *zfsvfs = ITOZSB(ip);
2898 + znode_t *zp = ITOZ(ip);
2904 - err = zfs_fillpage(ip, pl, nr_pages);
2906 - dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, nr_pages*PAGESIZE);
2907 + error = zfs_fillpage(ip, pp);
2909 + dataset_kstats_update_read_kstats(&zfsvfs->z_kstat, PAGE_SIZE);
2918 diff --git a/module/os/linux/zfs/zfs_znode.c b/module/os/linux/zfs/zfs_znode.c
2919 index f3475b4d9..0236b3216 100644
2920 --- a/module/os/linux/zfs/zfs_znode.c
2921 +++ b/module/os/linux/zfs/zfs_znode.c
2922 @@ -134,6 +134,9 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
2923 zp->z_acl_cached = NULL;
2924 zp->z_xattr_cached = NULL;
2925 zp->z_xattr_parent = 0;
2926 + zp->z_sync_writes_cnt = 0;
2927 + zp->z_async_writes_cnt = 0;
2932 @@ -151,9 +154,12 @@ zfs_znode_cache_destructor(void *buf, void *arg)
2933 rw_destroy(&zp->z_xattr_lock);
2934 zfs_rangelock_fini(&zp->z_rangelock);
2936 - ASSERT(zp->z_dirlocks == NULL);
2937 - ASSERT(zp->z_acl_cached == NULL);
2938 - ASSERT(zp->z_xattr_cached == NULL);
2939 + ASSERT3P(zp->z_dirlocks, ==, NULL);
2940 + ASSERT3P(zp->z_acl_cached, ==, NULL);
2941 + ASSERT3P(zp->z_xattr_cached, ==, NULL);
2943 + ASSERT0(atomic_load_32(&zp->z_sync_writes_cnt));
2944 + ASSERT0(atomic_load_32(&zp->z_async_writes_cnt));
2948 @@ -540,7 +546,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
2949 ASSERT3P(zp->z_xattr_cached, ==, NULL);
2950 zp->z_unlinked = B_FALSE;
2951 zp->z_atime_dirty = B_FALSE;
2952 +#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
2953 zp->z_is_mapped = B_FALSE;
2955 zp->z_is_ctldir = B_FALSE;
2956 zp->z_suspended = B_FALSE;
2957 zp->z_sa_hdl = NULL;
2958 @@ -549,6 +557,8 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
2959 zp->z_blksz = blksz;
2960 zp->z_seq = 0x7A4653;
2962 + zp->z_sync_writes_cnt = 0;
2963 + zp->z_async_writes_cnt = 0;
2965 zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
2967 @@ -1628,7 +1638,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
2968 * Zero partial page cache entries. This must be done under a
2969 * range lock in order to keep the ARC and page cache in sync.
2971 - if (zp->z_is_mapped) {
2972 + if (zn_has_cached_data(zp, off, off + len - 1)) {
2973 loff_t first_page, last_page, page_len;
2974 loff_t first_page_offset, last_page_offset;
2976 diff --git a/module/os/linux/zfs/zpl_ctldir.c b/module/os/linux/zfs/zpl_ctldir.c
2977 index 9b526afd0..cf4da470f 100644
2978 --- a/module/os/linux/zfs/zpl_ctldir.c
2979 +++ b/module/os/linux/zfs/zpl_ctldir.c
2980 @@ -101,7 +101,11 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
2984 -#ifdef HAVE_USERNS_IOPS_GETATTR
2985 +#ifdef HAVE_IDMAP_IOPS_GETATTR
2986 +zpl_root_getattr_impl(struct mnt_idmap *user_ns,
2987 + const struct path *path, struct kstat *stat, u32 request_mask,
2988 + unsigned int query_flags)
2989 +#elif defined(HAVE_USERNS_IOPS_GETATTR)
2990 zpl_root_getattr_impl(struct user_namespace *user_ns,
2991 const struct path *path, struct kstat *stat, u32 request_mask,
2992 unsigned int query_flags)
2993 @@ -112,8 +116,14 @@ zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
2995 struct inode *ip = path->dentry->d_inode;
2997 -#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
2998 +#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
2999 +#ifdef HAVE_GENERIC_FILLATTR_USERNS
3000 generic_fillattr(user_ns, ip, stat);
3001 +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
3002 + generic_fillattr(user_ns, ip, stat);
3007 generic_fillattr(ip, stat);
3009 @@ -304,6 +314,10 @@ static int
3010 zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
3011 struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
3013 +#elif defined(HAVE_IOPS_RENAME_IDMAP)
3014 +zpl_snapdir_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
3015 + struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
3016 + unsigned int flags)
3018 zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
3019 struct inode *tdip, struct dentry *tdentry, unsigned int flags)
3020 @@ -325,7 +339,9 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
3024 -#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
3025 +#if (!defined(HAVE_RENAME_WANTS_FLAGS) && \
3026 + !defined(HAVE_IOPS_RENAME_USERNS) && \
3027 + !defined(HAVE_IOPS_RENAME_IDMAP))
3029 zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
3030 struct inode *tdip, struct dentry *tdentry)
3031 @@ -352,6 +368,9 @@ static int
3032 #ifdef HAVE_IOPS_MKDIR_USERNS
3033 zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
3034 struct dentry *dentry, umode_t mode)
3035 +#elif defined(HAVE_IOPS_MKDIR_IDMAP)
3036 +zpl_snapdir_mkdir(struct mnt_idmap *user_ns, struct inode *dip,
3037 + struct dentry *dentry, umode_t mode)
3039 zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
3041 @@ -384,7 +403,11 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
3045 -#ifdef HAVE_USERNS_IOPS_GETATTR
3046 +#ifdef HAVE_IDMAP_IOPS_GETATTR
3047 +zpl_snapdir_getattr_impl(struct mnt_idmap *user_ns,
3048 + const struct path *path, struct kstat *stat, u32 request_mask,
3049 + unsigned int query_flags)
3050 +#elif defined(HAVE_USERNS_IOPS_GETATTR)
3051 zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
3052 const struct path *path, struct kstat *stat, u32 request_mask,
3053 unsigned int query_flags)
3054 @@ -397,8 +420,14 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
3055 zfsvfs_t *zfsvfs = ITOZSB(ip);
3058 -#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
3059 +#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
3060 +#ifdef HAVE_GENERIC_FILLATTR_USERNS
3061 + generic_fillattr(user_ns, ip, stat);
3062 +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
3063 generic_fillattr(user_ns, ip, stat);
3068 generic_fillattr(ip, stat);
3070 @@ -439,7 +468,9 @@ const struct file_operations zpl_fops_snapdir = {
3071 const struct inode_operations zpl_ops_snapdir = {
3072 .lookup = zpl_snapdir_lookup,
3073 .getattr = zpl_snapdir_getattr,
3074 -#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
3075 +#if (defined(HAVE_RENAME_WANTS_FLAGS) || \
3076 + defined(HAVE_IOPS_RENAME_USERNS) || \
3077 + defined(HAVE_IOPS_RENAME_IDMAP))
3078 .rename = zpl_snapdir_rename2,
3080 .rename = zpl_snapdir_rename,
3081 @@ -530,6 +561,10 @@ static int
3082 zpl_shares_getattr_impl(struct user_namespace *user_ns,
3083 const struct path *path, struct kstat *stat, u32 request_mask,
3084 unsigned int query_flags)
3085 +#elif defined(HAVE_IDMAP_IOPS_GETATTR)
3086 +zpl_shares_getattr_impl(struct mnt_idmap *user_ns,
3087 + const struct path *path, struct kstat *stat, u32 request_mask,
3088 + unsigned int query_flags)
3090 zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
3091 u32 request_mask, unsigned int query_flags)
3092 @@ -543,8 +578,14 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
3095 if (zfsvfs->z_shares_dir == 0) {
3096 -#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
3097 +#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
3098 +#ifdef HAVE_GENERIC_FILLATTR_USERNS
3099 + generic_fillattr(user_ns, path->dentry->d_inode, stat);
3100 +#elif defined(HAVE_GENERIC_FILLATTR_IDMAP)
3101 generic_fillattr(user_ns, path->dentry->d_inode, stat);
3106 generic_fillattr(path->dentry->d_inode, stat);
3108 @@ -556,7 +597,7 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
3110 error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
3112 -#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
3113 +#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
3114 error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
3116 error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
3117 diff --git a/module/os/linux/zfs/zpl_file.c b/module/os/linux/zfs/zpl_file.c
3118 index 38d2bd147..d5d354db1 100644
3119 --- a/module/os/linux/zfs/zpl_file.c
3120 +++ b/module/os/linux/zfs/zpl_file.c
3121 @@ -165,17 +165,56 @@ static int
3122 zpl_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
3124 struct inode *inode = filp->f_mapping->host;
3125 + znode_t *zp = ITOZ(inode);
3126 + zfsvfs_t *zfsvfs = ITOZSB(inode);
3127 cred_t *cr = CRED();
3129 fstrans_cookie_t cookie;
3132 + * The variables z_sync_writes_cnt and z_async_writes_cnt work in
3133 + * tandem so that sync writes can detect if there are any non-sync
3134 + * writes going on and vice-versa. The "vice-versa" part to this logic
3135 + * is located in zfs_putpage() where non-sync writes check if there are
3136 + * any ongoing sync writes. If any sync and non-sync writes overlap,
3137 + * we do a commit to complete the non-sync writes since the latter can
3138 + * potentially take several seconds to complete and thus block sync
3139 + * writes in the upcoming call to filemap_write_and_wait_range().
3141 + atomic_inc_32(&zp->z_sync_writes_cnt);
3143 + * If the following check does not detect an overlapping non-sync write
3144 + * (say because it's just about to start), then it is guaranteed that
3145 + * the non-sync write will detect this sync write. This is because we
3146 + * always increment z_sync_writes_cnt / z_async_writes_cnt before doing
3147 + * the check on z_async_writes_cnt / z_sync_writes_cnt here and in
3148 + * zfs_putpage() respectively.
3150 + if (atomic_load_32(&zp->z_async_writes_cnt) > 0) {
3151 + ZPL_ENTER(zfsvfs);
3152 + zil_commit(zfsvfs->z_log, zp->z_id);
3156 error = filemap_write_and_wait_range(inode->i_mapping, start, end);
3159 + * The sync write is not complete yet but we decrement
3160 + * z_sync_writes_cnt since zfs_fsync() increments and decrements
3161 + * it internally. If a non-sync write starts just after the decrement
3162 + * operation but before we call zfs_fsync(), it may not detect this
3163 + * overlapping sync write but it does not matter since we have already
3164 + * gone past filemap_write_and_wait_range() and we won't block due to
3165 + * the non-sync write.
3167 + atomic_dec_32(&zp->z_sync_writes_cnt);
3173 cookie = spl_fstrans_mark();
3174 - error = -zfs_fsync(ITOZ(inode), datasync, cr);
3175 + error = -zfs_fsync(zp, datasync, cr);
3176 spl_fstrans_unmark(cookie);
3178 ASSERT3S(error, <=, 0);
3179 @@ -579,7 +618,6 @@ static int
3180 zpl_mmap(struct file *filp, struct vm_area_struct *vma)
3182 struct inode *ip = filp->f_mapping->host;
3183 - znode_t *zp = ITOZ(ip);
3185 fstrans_cookie_t cookie;
3187 @@ -594,9 +632,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
3191 +#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
3192 + znode_t *zp = ITOZ(ip);
3193 mutex_enter(&zp->z_lock);
3194 zp->z_is_mapped = B_TRUE;
3195 mutex_exit(&zp->z_lock);
3200 @@ -609,29 +650,16 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
3202 zpl_readpage_common(struct page *pp)
3205 - struct page *pl[1];
3207 fstrans_cookie_t cookie;
3209 ASSERT(PageLocked(pp));
3210 - ip = pp->mapping->host;
3213 cookie = spl_fstrans_mark();
3214 - error = -zfs_getpage(ip, pl, 1);
3215 + int error = -zfs_getpage(pp->mapping->host, pp);
3216 spl_fstrans_unmark(cookie);
3220 - ClearPageUptodate(pp);
3222 - ClearPageError(pp);
3223 - SetPageUptodate(pp);
3224 - flush_dcache_page(pp);
3232 @@ -688,19 +716,42 @@ zpl_readahead(struct readahead_control *ractl)
3234 zpl_putpage(struct page *pp, struct writeback_control *wbc, void *data)
3236 - struct address_space *mapping = data;
3237 + boolean_t *for_sync = data;
3238 fstrans_cookie_t cookie;
3240 ASSERT(PageLocked(pp));
3241 ASSERT(!PageWriteback(pp));
3243 cookie = spl_fstrans_mark();
3244 - (void) zfs_putpage(mapping->host, pp, wbc);
3245 + (void) zfs_putpage(pp->mapping->host, pp, wbc, *for_sync);
3246 spl_fstrans_unmark(cookie);
3251 +#ifdef HAVE_WRITEPAGE_T_FOLIO
3253 +zpl_putfolio(struct folio *pp, struct writeback_control *wbc, void *data)
3255 + (void) zpl_putpage(&pp->page, wbc, data);
3261 +zpl_write_cache_pages(struct address_space *mapping,
3262 + struct writeback_control *wbc, void *data)
3266 +#ifdef HAVE_WRITEPAGE_T_FOLIO
3267 + result = write_cache_pages(mapping, wbc, zpl_putfolio, data);
3269 + result = write_cache_pages(mapping, wbc, zpl_putpage, data);
3275 zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
3277 @@ -722,8 +773,9 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
3278 * we run it once in non-SYNC mode so that the ZIL gets all the data,
3279 * and then we commit it all in one go.
3281 + boolean_t for_sync = (sync_mode == WB_SYNC_ALL);
3282 wbc->sync_mode = WB_SYNC_NONE;
3283 - result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
3284 + result = zpl_write_cache_pages(mapping, wbc, &for_sync);
3285 if (sync_mode != wbc->sync_mode) {
3288 @@ -739,7 +791,7 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
3289 * details). That being said, this is a no-op in most cases.
3291 wbc->sync_mode = sync_mode;
3292 - result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
3293 + result = zpl_write_cache_pages(mapping, wbc, &for_sync);
3297 @@ -756,7 +808,9 @@ zpl_writepage(struct page *pp, struct writeback_control *wbc)
3298 if (ITOZSB(pp->mapping->host)->z_os->os_sync == ZFS_SYNC_ALWAYS)
3299 wbc->sync_mode = WB_SYNC_ALL;
3301 - return (zpl_putpage(pp, wbc, pp->mapping));
3302 + boolean_t for_sync = (wbc->sync_mode == WB_SYNC_ALL);
3304 + return (zpl_putpage(pp, wbc, &for_sync));
3308 @@ -924,7 +978,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
3309 !capable(CAP_LINUX_IMMUTABLE))
3312 - if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
3313 + if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
3317 diff --git a/module/os/linux/zfs/zpl_inode.c b/module/os/linux/zfs/zpl_inode.c
3318 index dd634f70e..6efaaf438 100644
3319 --- a/module/os/linux/zfs/zpl_inode.c
3320 +++ b/module/os/linux/zfs/zpl_inode.c
3321 @@ -131,6 +131,9 @@ static int
3322 #ifdef HAVE_IOPS_CREATE_USERNS
3323 zpl_create(struct user_namespace *user_ns, struct inode *dir,
3324 struct dentry *dentry, umode_t mode, bool flag)
3325 +#elif defined(HAVE_IOPS_CREATE_IDMAP)
3326 +zpl_create(struct mnt_idmap *user_ns, struct inode *dir,
3327 + struct dentry *dentry, umode_t mode, bool flag)
3329 zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
3331 @@ -174,6 +177,9 @@ static int
3332 #ifdef HAVE_IOPS_MKNOD_USERNS
3333 zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
3334 struct dentry *dentry, umode_t mode,
3335 +#elif defined(HAVE_IOPS_MKNOD_IDMAP)
3336 +zpl_mknod(struct mnt_idmap *user_ns, struct inode *dir,
3337 + struct dentry *dentry, umode_t mode,
3339 zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
3341 @@ -224,7 +230,10 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
3345 -#ifndef HAVE_TMPFILE_DENTRY
3346 +#ifdef HAVE_TMPFILE_IDMAP
3347 +zpl_tmpfile(struct mnt_idmap *userns, struct inode *dir,
3348 + struct file *file, umode_t mode)
3349 +#elif !defined(HAVE_TMPFILE_DENTRY)
3350 zpl_tmpfile(struct user_namespace *userns, struct inode *dir,
3351 struct file *file, umode_t mode)
3353 @@ -317,6 +326,9 @@ static int
3354 #ifdef HAVE_IOPS_MKDIR_USERNS
3355 zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
3356 struct dentry *dentry, umode_t mode)
3357 +#elif defined(HAVE_IOPS_MKDIR_IDMAP)
3358 +zpl_mkdir(struct mnt_idmap *user_ns, struct inode *dir,
3359 + struct dentry *dentry, umode_t mode)
3361 zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
3363 @@ -386,6 +398,10 @@ static int
3364 zpl_getattr_impl(struct user_namespace *user_ns,
3365 const struct path *path, struct kstat *stat, u32 request_mask,
3366 unsigned int query_flags)
3367 +#elif defined(HAVE_IDMAP_IOPS_GETATTR)
3368 +zpl_getattr_impl(struct mnt_idmap *user_ns,
3369 + const struct path *path, struct kstat *stat, u32 request_mask,
3370 + unsigned int query_flags)
3372 zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
3373 unsigned int query_flags)
3374 @@ -402,7 +418,7 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
3375 * XXX query_flags currently ignored.
3378 -#ifdef HAVE_USERNS_IOPS_GETATTR
3379 +#if (defined(HAVE_USERNS_IOPS_GETATTR) || defined(HAVE_IDMAP_IOPS_GETATTR))
3380 error = -zfs_getattr_fast(user_ns, ip, stat);
3382 error = -zfs_getattr_fast(kcred->user_ns, ip, stat);
3383 @@ -441,9 +457,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
3384 ZPL_GETATTR_WRAPPER(zpl_getattr);
3387 -#ifdef HAVE_SETATTR_PREPARE_USERNS
3388 +#ifdef HAVE_USERNS_IOPS_SETATTR
3389 zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
3391 +#elif defined(HAVE_IDMAP_IOPS_SETATTR)
3392 +zpl_setattr(struct mnt_idmap *user_ns, struct dentry *dentry,
3395 zpl_setattr(struct dentry *dentry, struct iattr *ia)
3397 @@ -454,7 +473,13 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
3399 fstrans_cookie_t cookie;
3401 - error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
3402 +#ifdef HAVE_SETATTR_PREPARE_USERNS
3403 + error = zpl_setattr_prepare(user_ns, dentry, ia);
3404 +#elif defined(HAVE_SETATTR_PREPARE_IDMAP)
3405 + error = zpl_setattr_prepare(user_ns, dentry, ia);
3407 + error = zpl_setattr_prepare(zfs_init_idmap, dentry, ia);
3412 @@ -489,10 +514,14 @@ static int
3413 #ifdef HAVE_IOPS_RENAME_USERNS
3414 zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
3415 struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
3416 - unsigned int flags)
3417 + unsigned int rflags)
3418 +#elif defined(HAVE_IOPS_RENAME_IDMAP)
3419 +zpl_rename2(struct mnt_idmap *user_ns, struct inode *sdip,
3420 + struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
3421 + unsigned int rflags)
3423 zpl_rename2(struct inode *sdip, struct dentry *sdentry,
3424 - struct inode *tdip, struct dentry *tdentry, unsigned int flags)
3425 + struct inode *tdip, struct dentry *tdentry, unsigned int rflags)
3428 cred_t *cr = CRED();
3429 @@ -500,7 +529,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
3430 fstrans_cookie_t cookie;
3432 /* We don't have renameat2(2) support */
3438 @@ -514,7 +543,9 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
3442 -#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
3443 +#if !defined(HAVE_IOPS_RENAME_USERNS) && \
3444 + !defined(HAVE_RENAME_WANTS_FLAGS) && \
3445 + !defined(HAVE_IOPS_RENAME_IDMAP)
3447 zpl_rename(struct inode *sdip, struct dentry *sdentry,
3448 struct inode *tdip, struct dentry *tdentry)
3449 @@ -527,6 +558,9 @@ static int
3450 #ifdef HAVE_IOPS_SYMLINK_USERNS
3451 zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
3452 struct dentry *dentry, const char *name)
3453 +#elif defined(HAVE_IOPS_SYMLINK_IDMAP)
3454 +zpl_symlink(struct mnt_idmap *user_ns, struct inode *dir,
3455 + struct dentry *dentry, const char *name)
3457 zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
3459 @@ -745,6 +779,8 @@ const struct inode_operations zpl_dir_inode_operations = {
3461 #if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
3462 .rename = zpl_rename2,
3463 +#elif defined(HAVE_IOPS_RENAME_IDMAP)
3464 + .rename = zpl_rename2,
3466 .rename = zpl_rename,
3468 diff --git a/module/os/linux/zfs/zpl_xattr.c b/module/os/linux/zfs/zpl_xattr.c
3469 index 364cd34c1..084817609 100644
3470 --- a/module/os/linux/zfs/zpl_xattr.c
3471 +++ b/module/os/linux/zfs/zpl_xattr.c
3472 @@ -725,9 +725,11 @@ __zpl_xattr_user_get(struct inode *ip, const char *name,
3473 ZPL_XATTR_GET_WRAPPER(zpl_xattr_user_get);
3476 -__zpl_xattr_user_set(struct inode *ip, const char *name,
3477 +__zpl_xattr_user_set(zidmap_t *user_ns,
3478 + struct inode *ip, const char *name,
3479 const void *value, size_t size, int flags)
3484 /* xattr_resolve_name will do this for us if this is defined */
3485 @@ -794,9 +796,11 @@ __zpl_xattr_trusted_get(struct inode *ip, const char *name,
3486 ZPL_XATTR_GET_WRAPPER(zpl_xattr_trusted_get);
3489 -__zpl_xattr_trusted_set(struct inode *ip, const char *name,
3490 +__zpl_xattr_trusted_set(zidmap_t *user_ns,
3491 + struct inode *ip, const char *name,
3492 const void *value, size_t size, int flags)
3498 @@ -863,9 +867,11 @@ __zpl_xattr_security_get(struct inode *ip, const char *name,
3499 ZPL_XATTR_GET_WRAPPER(zpl_xattr_security_get);
3502 -__zpl_xattr_security_set(struct inode *ip, const char *name,
3503 +__zpl_xattr_security_set(zidmap_t *user_ns,
3504 + struct inode *ip, const char *name,
3505 const void *value, size_t size, int flags)
3510 /* xattr_resolve_name will do this for us if this is defined */
3511 @@ -889,7 +895,7 @@ zpl_xattr_security_init_impl(struct inode *ip, const struct xattr *xattrs,
3514 for (xattr = xattrs; xattr->name != NULL; xattr++) {
3515 - error = __zpl_xattr_security_set(ip,
3516 + error = __zpl_xattr_security_set(NULL, ip,
3517 xattr->name, xattr->value, xattr->value_len, 0);
3520 @@ -1004,6 +1010,9 @@ int
3521 #ifdef HAVE_SET_ACL_USERNS
3522 zpl_set_acl(struct user_namespace *userns, struct inode *ip,
3523 struct posix_acl *acl, int type)
3524 +#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
3525 +zpl_set_acl(struct mnt_idmap *userns, struct dentry *dentry,
3526 + struct posix_acl *acl, int type)
3527 #elif defined(HAVE_SET_ACL_USERNS_DENTRY_ARG2)
3528 zpl_set_acl(struct user_namespace *userns, struct dentry *dentry,
3529 struct posix_acl *acl, int type)
3530 @@ -1013,6 +1022,8 @@ zpl_set_acl(struct inode *ip, struct posix_acl *acl, int type)
3532 #ifdef HAVE_SET_ACL_USERNS_DENTRY_ARG2
3533 return (zpl_set_acl_impl(d_inode(dentry), acl, type));
3534 +#elif defined(HAVE_SET_ACL_IDMAP_DENTRY)
3535 + return (zpl_set_acl_impl(d_inode(dentry), acl, type));
3537 return (zpl_set_acl_impl(ip, acl, type));
3538 #endif /* HAVE_SET_ACL_USERNS_DENTRY_ARG2 */
3539 @@ -1256,7 +1267,8 @@ __zpl_xattr_acl_get_default(struct inode *ip, const char *name,
3540 ZPL_XATTR_GET_WRAPPER(zpl_xattr_acl_get_default);
3543 -__zpl_xattr_acl_set_access(struct inode *ip, const char *name,
3544 +__zpl_xattr_acl_set_access(zidmap_t *mnt_ns,
3545 + struct inode *ip, const char *name,
3546 const void *value, size_t size, int flags)
3548 struct posix_acl *acl;
3549 @@ -1270,8 +1282,14 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
3550 if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
3551 return (-EOPNOTSUPP);
3553 - if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
3554 +#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
3555 + if (!zpl_inode_owner_or_capable(mnt_ns, ip))
3559 + if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
3564 acl = zpl_acl_from_xattr(value, size);
3565 @@ -1295,7 +1313,8 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
3566 ZPL_XATTR_SET_WRAPPER(zpl_xattr_acl_set_access);
3569 -__zpl_xattr_acl_set_default(struct inode *ip, const char *name,
3570 +__zpl_xattr_acl_set_default(zidmap_t *mnt_ns,
3571 + struct inode *ip, const char *name,
3572 const void *value, size_t size, int flags)
3574 struct posix_acl *acl;
3575 @@ -1309,8 +1328,14 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
3576 if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
3577 return (-EOPNOTSUPP);
3579 - if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
3580 +#if defined(HAVE_XATTR_SET_USERNS) || defined(HAVE_XATTR_SET_IDMAP)
3581 + if (!zpl_inode_owner_or_capable(mnt_ns, ip))
3585 + if (!zpl_inode_owner_or_capable(zfs_init_idmap, ip))
3590 acl = zpl_acl_from_xattr(value, size);
3591 diff --git a/module/zcommon/Makefile.in b/module/zcommon/Makefile.in
3592 index ebc538440..614968a42 100644
3593 --- a/module/zcommon/Makefile.in
3594 +++ b/module/zcommon/Makefile.in
3595 @@ -26,3 +26,7 @@ $(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
3596 $(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
3597 $(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
3598 $(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o
3600 +ifeq ($(CONFIG_ARM64),y)
3601 +CFLAGS_REMOVE_zfs_fletcher_aarch64_neon.o += -mgeneral-regs-only
3603 diff --git a/module/zfs/Makefile.in b/module/zfs/Makefile.in
3604 index 653ea0da9..0e04d7ef0 100644
3605 --- a/module/zfs/Makefile.in
3606 +++ b/module/zfs/Makefile.in
3607 @@ -154,4 +154,9 @@ ifeq ($(CONFIG_ALTIVEC),y)
3608 $(obj)/vdev_raidz_math_powerpc_altivec.o: c_flags += -maltivec
3611 +ifeq ($(CONFIG_ARM64),y)
3612 +CFLAGS_REMOVE_vdev_raidz_math_aarch64_neon.o += -mgeneral-regs-only
3613 +CFLAGS_REMOVE_vdev_raidz_math_aarch64_neonx2.o += -mgeneral-regs-only
3616 include $(mfdir)/../os/linux/zfs/Makefile
3617 diff --git a/module/zfs/abd.c b/module/zfs/abd.c
3618 index 8ee8e7e57..754974a55 100644
3619 --- a/module/zfs/abd.c
3620 +++ b/module/zfs/abd.c
3621 @@ -109,7 +109,6 @@ void
3622 abd_verify(abd_t *abd)
3625 - ASSERT3U(abd->abd_size, >, 0);
3626 ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
3627 ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
3628 ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
3629 @@ -118,6 +117,7 @@ abd_verify(abd_t *abd)
3630 IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
3631 IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
3632 if (abd_is_linear(abd)) {
3633 + ASSERT3U(abd->abd_size, >, 0);
3634 ASSERT3P(ABD_LINEAR_BUF(abd), !=, NULL);
3635 } else if (abd_is_gang(abd)) {
3636 uint_t child_sizes = 0;
3637 @@ -130,6 +130,7 @@ abd_verify(abd_t *abd)
3639 ASSERT3U(abd->abd_size, ==, child_sizes);
3641 + ASSERT3U(abd->abd_size, >, 0);
3642 abd_verify_scatter(abd);
3645 @@ -369,7 +370,20 @@ abd_gang_add_gang(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
3646 * will retain all the free_on_free settings after being
3647 * added to the parents list.
3651 + * If cabd had abd_parent, we have to drop it here. We can't
3652 + * transfer it to pabd, nor we can clear abd_size leaving it.
3654 + if (cabd->abd_parent != NULL) {
3655 + (void) zfs_refcount_remove_many(
3656 + &cabd->abd_parent->abd_children,
3657 + cabd->abd_size, cabd);
3658 + cabd->abd_parent = NULL;
3661 pabd->abd_size += cabd->abd_size;
3662 + cabd->abd_size = 0;
3663 list_move_tail(&ABD_GANG(pabd).abd_gang_chain,
3664 &ABD_GANG(cabd).abd_gang_chain);
3665 ASSERT(list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
3666 @@ -407,7 +421,6 @@ abd_gang_add(abd_t *pabd, abd_t *cabd, boolean_t free_on_free)
3668 if (abd_is_gang(cabd)) {
3669 ASSERT(!list_link_active(&cabd->abd_gang_link));
3670 - ASSERT(!list_is_empty(&ABD_GANG(cabd).abd_gang_chain));
3671 return (abd_gang_add_gang(pabd, cabd, free_on_free));
3673 ASSERT(!abd_is_gang(cabd));
3674 diff --git a/module/zfs/dmu_recv.c b/module/zfs/dmu_recv.c
3675 index 98ca2b3bc..b8161f710 100644
3676 --- a/module/zfs/dmu_recv.c
3677 +++ b/module/zfs/dmu_recv.c
3678 @@ -71,6 +71,12 @@ int zfs_recv_write_batch_size = 1024 * 1024;
3679 static char *dmu_recv_tag = "dmu_recv_tag";
3680 const char *recv_clone_name = "%recv";
3688 static int receive_read_payload_and_next_header(dmu_recv_cookie_t *ra, int len,
3691 @@ -121,6 +127,9 @@ struct receive_writer_arg {
3692 uint8_t or_iv[ZIO_DATA_IV_LEN];
3693 uint8_t or_mac[ZIO_DATA_MAC_LEN];
3694 boolean_t or_byteorder;
3696 + /* Keep track of DRR_FREEOBJECTS right after DRR_OBJECT_RANGE */
3697 + or_need_sync_t or_need_sync;
3700 typedef struct dmu_recv_begin_arg {
3701 @@ -1658,10 +1667,22 @@ receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
3702 /* object was freed and we are about to allocate a new one */
3703 object_to_hold = DMU_NEW_OBJECT;
3706 + * If the only record in this range so far was DRR_FREEOBJECTS
3707 + * with at least one actually freed object, it's possible that
3708 + * the block will now be converted to a hole. We need to wait
3709 + * for the txg to sync to prevent races.
3711 + if (rwa->or_need_sync == ORNS_YES)
3712 + txg_wait_synced(dmu_objset_pool(rwa->os), 0);
3714 /* object is free and we are about to allocate a new one */
3715 object_to_hold = DMU_NEW_OBJECT;
3718 + /* Only relevant for the first object in the range */
3719 + rwa->or_need_sync = ORNS_NO;
3722 * If this is a multi-slot dnode there is a chance that this
3723 * object will expand into a slot that is already used by
3724 @@ -1856,6 +1877,9 @@ receive_freeobjects(struct receive_writer_arg *rwa,
3729 + if (rwa->or_need_sync == ORNS_MAYBE)
3730 + rwa->or_need_sync = ORNS_YES;
3732 if (next_err != ESRCH)
3734 @@ -2298,6 +2322,8 @@ receive_object_range(struct receive_writer_arg *rwa,
3735 bcopy(drror->drr_mac, rwa->or_mac, ZIO_DATA_MAC_LEN);
3736 rwa->or_byteorder = byteorder;
3738 + rwa->or_need_sync = ORNS_MAYBE;
3743 diff --git a/module/zfs/dmu_send.c b/module/zfs/dmu_send.c
3744 index cd9ecc07f..0dd1ec210 100644
3745 --- a/module/zfs/dmu_send.c
3746 +++ b/module/zfs/dmu_send.c
3747 @@ -2797,6 +2797,7 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
3752 err = zap_lookup(dspp.dp->dp_meta_objset,
3753 dspp.to_ds->ds_object,
3754 DS_FIELD_RESUME_TOGUID, 8, 1,
3755 @@ -2810,21 +2811,24 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
3756 sizeof (dspp.saved_toname),
3760 + /* Only disown if there was an error in the lookups */
3761 + if (owned && (err != 0))
3762 dsl_dataset_disown(dspp.to_ds, dsflags, FTAG);
3766 err = dsl_dataset_own(dspp.dp, tosnap, dsflags,
3773 err = dsl_dataset_hold_flags(dspp.dp, tosnap, dsflags, FTAG,
3778 + /* Note: dsl dataset is not owned at this point */
3779 dsl_pool_rele(dspp.dp, FTAG);
3782 diff --git a/module/zfs/dmu_tx.c b/module/zfs/dmu_tx.c
3783 index 1eed0526b..063934f39 100644
3784 --- a/module/zfs/dmu_tx.c
3785 +++ b/module/zfs/dmu_tx.c
3786 @@ -290,6 +290,53 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
3791 +dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
3793 + dnode_t *dn = txh->txh_dnode;
3799 + (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
3805 + * For i/o error checking, read the blocks that will be needed
3806 + * to perform the append; first level-0 block (if not aligned, i.e.
3807 + * if they are partial-block writes), no additional blocks are read.
3809 + if (dn->dn_maxblkid == 0) {
3810 + if (off < dn->dn_datablksz &&
3811 + (off > 0 || len < dn->dn_datablksz)) {
3812 + err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
3814 + txh->txh_tx->tx_err = err;
3818 + zio_t *zio = zio_root(dn->dn_objset->os_spa,
3819 + NULL, NULL, ZIO_FLAG_CANFAIL);
3821 + /* first level-0 block */
3822 + uint64_t start = off >> dn->dn_datablkshift;
3823 + if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
3824 + err = dmu_tx_check_ioerr(zio, dn, 0, start);
3826 + txh->txh_tx->tx_err = err;
3830 + err = zio_wait(zio);
3832 + txh->txh_tx->tx_err = err;
3838 dmu_tx_count_dnode(dmu_tx_hold_t *txh)
3840 @@ -330,6 +377,42 @@ dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
3845 + * Should be used when appending to an object and the exact offset is unknown.
3846 + * The write must occur at or beyond the specified offset. Only the L0 block
3847 + * at provided offset will be prefetched.
3850 +dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
3852 + dmu_tx_hold_t *txh;
3854 + ASSERT0(tx->tx_txg);
3855 + ASSERT3U(len, <=, DMU_MAX_ACCESS);
3857 + txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
3858 + object, THT_APPEND, off, DMU_OBJECT_END);
3859 + if (txh != NULL) {
3860 + dmu_tx_count_append(txh, off, len);
3861 + dmu_tx_count_dnode(txh);
3866 +dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
3868 + dmu_tx_hold_t *txh;
3870 + ASSERT0(tx->tx_txg);
3871 + ASSERT3U(len, <=, DMU_MAX_ACCESS);
3873 + txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END);
3874 + if (txh != NULL) {
3875 + dmu_tx_count_append(txh, off, len);
3876 + dmu_tx_count_dnode(txh);
3881 * This function marks the transaction as being a "net free". The end
3882 * result is that refquotas will be disabled for this transaction, and
3883 @@ -638,6 +721,26 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
3885 match_offset = TRUE;
3888 + if (blkid >= beginblk && (blkid <= endblk ||
3889 + txh->txh_arg2 == DMU_OBJECT_END))
3890 + match_offset = TRUE;
3893 + * THT_WRITE used for bonus and spill blocks.
3895 + ASSERT(blkid != DMU_BONUS_BLKID &&
3896 + blkid != DMU_SPILL_BLKID);
3899 + * They might have to increase nlevels,
3900 + * thus dirtying the new TLIBs. Or the
3901 + * might have to change the block size,
3902 + * thus dirying the new lvl=0 blk=0.
3905 + match_offset = TRUE;
3909 * We will dirty all the level 1 blocks in
3910 @@ -1421,6 +1524,8 @@ dmu_tx_fini(void)
3911 EXPORT_SYMBOL(dmu_tx_create);
3912 EXPORT_SYMBOL(dmu_tx_hold_write);
3913 EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
3914 +EXPORT_SYMBOL(dmu_tx_hold_append);
3915 +EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode);
3916 EXPORT_SYMBOL(dmu_tx_hold_free);
3917 EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
3918 EXPORT_SYMBOL(dmu_tx_hold_zap);
3919 diff --git a/module/zfs/dsl_deadlist.c b/module/zfs/dsl_deadlist.c
3920 index d5fe2ee56..9827eb147 100644
3921 --- a/module/zfs/dsl_deadlist.c
3922 +++ b/module/zfs/dsl_deadlist.c
3923 @@ -859,7 +859,7 @@ void
3924 dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
3926 zap_cursor_t zc, pzc;
3927 - zap_attribute_t za, pza;
3928 + zap_attribute_t *za, *pza;
3930 dsl_deadlist_phys_t *dlp;
3931 dmu_object_info_t doi;
3932 @@ -874,28 +874,31 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
3936 + za = kmem_alloc(sizeof (*za), KM_SLEEP);
3937 + pza = kmem_alloc(sizeof (*pza), KM_SLEEP);
3939 mutex_enter(&dl->dl_lock);
3941 * Prefetch up to 128 deadlists first and then more as we progress.
3942 * The limit is a balance between ARC use and diminishing returns.
3944 for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0;
3945 - (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128;
3946 + (perror = zap_cursor_retrieve(&pzc, pza)) == 0 && i < 128;
3947 zap_cursor_advance(&pzc), i++) {
3948 - dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
3949 - zfs_strtonum(pza.za_name, NULL));
3950 + dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
3951 + zfs_strtonum(pza->za_name, NULL));
3953 for (zap_cursor_init(&zc, dl->dl_os, obj);
3954 - (error = zap_cursor_retrieve(&zc, &za)) == 0;
3955 + (error = zap_cursor_retrieve(&zc, za)) == 0;
3956 zap_cursor_advance(&zc)) {
3957 - uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
3958 - dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
3959 + uint64_t mintxg = zfs_strtonum(za->za_name, NULL);
3960 + dsl_deadlist_insert_bpobj(dl, za->za_first_integer, mintxg, tx);
3961 VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
3963 - dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
3964 - zfs_strtonum(pza.za_name, NULL));
3965 + dsl_deadlist_prefetch_bpobj(dl, pza->za_first_integer,
3966 + zfs_strtonum(pza->za_name, NULL));
3967 zap_cursor_advance(&pzc);
3968 - perror = zap_cursor_retrieve(&pzc, &pza);
3969 + perror = zap_cursor_retrieve(&pzc, pza);
3972 VERIFY3U(error, ==, ENOENT);
3973 @@ -908,6 +911,9 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
3974 bzero(dlp, sizeof (*dlp));
3975 dmu_buf_rele(bonus, FTAG);
3976 mutex_exit(&dl->dl_lock);
3978 + kmem_free(za, sizeof (*za));
3979 + kmem_free(pza, sizeof (*pza));
3983 diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c
3984 index f3c639b0d..f0a851ff5 100644
3985 --- a/module/zfs/dsl_scan.c
3986 +++ b/module/zfs/dsl_scan.c
3988 #include <sys/dmu_tx.h>
3989 #include <sys/dmu_objset.h>
3990 #include <sys/arc.h>
3991 +#include <sys/arc_impl.h>
3992 #include <sys/zap.h>
3993 #include <sys/zio.h>
3994 #include <sys/zfs_context.h>
3995 @@ -126,11 +127,20 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
3996 static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
3997 static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
3998 static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
3999 -static uint64_t dsl_scan_count_data_disks(vdev_t *vd);
4000 +static uint64_t dsl_scan_count_data_disks(spa_t *spa);
4002 extern int zfs_vdev_async_write_active_min_dirty_percent;
4003 static int zfs_scan_blkstats = 0;
4006 + * 'zpool status' uses bytes processed per pass to report throughput and
4007 + * estimate time remaining. We define a pass to start when the scanning
4008 + * phase completes for a sequential resilver. Optionally, this value
4009 + * may be used to reset the pass statistics every N txgs to provide an
4010 + * estimated completion time based on currently observed performance.
4012 +static uint_t zfs_scan_report_txgs = 0;
4015 * By default zfs will check to ensure it is not over the hard memory
4016 * limit before each txg. If finer-grained control of this is needed
4017 @@ -147,7 +157,7 @@ int zfs_scan_strict_mem_lim = B_FALSE;
4018 * overload the drives with I/O, since that is protected by
4019 * zfs_vdev_scrub_max_active.
4021 -unsigned long zfs_scan_vdev_limit = 4 << 20;
4022 +unsigned long zfs_scan_vdev_limit = 16 << 20;
4024 int zfs_scan_issue_strategy = 0;
4025 int zfs_scan_legacy = B_FALSE; /* don't queue & sort zios, go direct */
4026 @@ -450,11 +460,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
4029 * Calculate the max number of in-flight bytes for pool-wide
4030 - * scanning operations (minimum 1MB). Limits for the issuing
4031 - * phase are done per top-level vdev and are handled separately.
4032 + * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
4033 + * Limits for the issuing phase are done per top-level vdev and
4034 + * are handled separately.
4036 - scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
4037 - dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);
4038 + scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
4039 + zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
4041 avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
4042 offsetof(scan_ds_t, sds_node));
4043 @@ -584,6 +595,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
4046 spa_scan_stat_init(spa);
4047 + vdev_scan_stat_init(spa->spa_root_vdev);
4052 @@ -742,6 +755,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
4053 scn->scn_last_checkpoint = 0;
4054 scn->scn_checkpointing = B_FALSE;
4055 spa_scan_stat_init(spa);
4056 + vdev_scan_stat_init(spa->spa_root_vdev);
4058 if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
4059 scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
4060 @@ -2797,8 +2811,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
4064 -dsl_scan_count_data_disks(vdev_t *rvd)
4065 +dsl_scan_count_data_disks(spa_t *spa)
4067 + vdev_t *rvd = spa->spa_root_vdev;
4068 uint64_t i, leaves = 0;
4070 for (i = 0; i < rvd->vdev_children; i++) {
4071 @@ -3637,6 +3652,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
4076 + * Disabled by default, set zfs_scan_report_txgs to report
4077 + * average performance over the last zfs_scan_report_txgs TXGs.
4079 + if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
4080 + tx->tx_txg % zfs_scan_report_txgs == 0) {
4081 + scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
4082 + spa_scan_stat_init(spa);
4086 * It is possible to switch from unsorted to sorted at any time,
4087 * but afterwards the scan will remain sorted unless reloaded from
4088 @@ -3693,12 +3718,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
4089 taskqid_t prefetch_tqid;
4092 - * Recalculate the max number of in-flight bytes for pool-wide
4093 - * scanning operations (minimum 1MB). Limits for the issuing
4094 - * phase are done per top-level vdev and are handled separately.
4095 + * Calculate the max number of in-flight bytes for pool-wide
4096 + * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
4097 + * Limits for the issuing phase are done per top-level vdev and
4098 + * are handled separately.
4100 - scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit *
4101 - dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20);
4102 + scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
4103 + zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
4105 if (scnp->scn_ddt_bookmark.ddb_class <=
4106 scnp->scn_ddt_class_max) {
4107 @@ -3759,6 +3785,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
4108 if (scn->scn_is_sorted) {
4109 scn->scn_checkpointing = B_TRUE;
4110 scn->scn_clearing = B_TRUE;
4111 + scn->scn_issued_before_pass +=
4112 + spa->spa_scan_pass_issued;
4113 + spa_scan_stat_init(spa);
4115 zfs_dbgmsg("scan complete txg %llu",
4116 (longlong_t)tx->tx_txg);
4117 @@ -4485,6 +4514,9 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW,
4118 ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, INT, ZMOD_RW,
4119 "Tunable to adjust bias towards more filled segments during scans");
4121 +ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
4122 + "Tunable to report resilver performance over the last N txgs");
4124 ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
4125 "Process all resilvers immediately");
4127 diff --git a/module/zfs/mmp.c b/module/zfs/mmp.c
4128 index f67a4eb22..139bb0acd 100644
4129 --- a/module/zfs/mmp.c
4130 +++ b/module/zfs/mmp.c
4131 @@ -444,7 +444,7 @@ mmp_write_uberblock(spa_t *spa)
4134 hrtime_t lock_acquire_time = gethrtime();
4135 - spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
4136 + spa_config_enter_mmp(spa, SCL_STATE, mmp_tag, RW_READER);
4137 lock_acquire_time = gethrtime() - lock_acquire_time;
4138 if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
4139 zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "
4140 diff --git a/module/zfs/spa.c b/module/zfs/spa.c
4141 index 1ed79eed3..5f238e691 100644
4142 --- a/module/zfs/spa.c
4143 +++ b/module/zfs/spa.c
4145 * Copyright 2017 Joyent, Inc.
4146 * Copyright (c) 2017, Intel Corporation.
4147 * Copyright (c) 2021, Colm Buckley <colm@tuatha.org>
4148 + * Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
4152 @@ -6261,6 +6262,16 @@ spa_tryimport(nvlist_t *tryconfig)
4153 spa->spa_config_source = SPA_CONFIG_SRC_SCAN;
4157 + * spa_import() relies on a pool config fetched by spa_try_import()
4158 + * for spare/cache devices. Import flags are not passed to
4159 + * spa_tryimport(), which makes it return early due to a missing log
4160 + * device and missing retrieving the cache device and spare eventually.
4161 + * Passing ZFS_IMPORT_MISSING_LOG to spa_tryimport() makes it fetch
4162 + * the correct configuration regardless of the missing log device.
4164 + spa->spa_import_flags |= ZFS_IMPORT_MISSING_LOG;
4166 error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING);
4169 @@ -6747,9 +6758,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
4170 if (!spa_feature_is_enabled(spa, SPA_FEATURE_DEVICE_REBUILD))
4171 return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
4173 - if (dsl_scan_resilvering(spa_get_dsl(spa)))
4174 + if (dsl_scan_resilvering(spa_get_dsl(spa)) ||
4175 + dsl_scan_resilver_scheduled(spa_get_dsl(spa))) {
4176 return (spa_vdev_exit(spa, NULL, txg,
4177 ZFS_ERR_RESILVER_IN_PROGRESS));
4180 if (vdev_rebuild_active(rvd))
4181 return (spa_vdev_exit(spa, NULL, txg,
4182 @@ -6987,7 +7000,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing,
4183 * Detach a device from a mirror or replacing vdev.
4185 * If 'replace_done' is specified, only detach if the parent
4186 - * is a replacing vdev.
4187 + * is a replacing or a spare vdev.
4190 spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
4191 @@ -7294,6 +7307,10 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
4192 vd->vdev_initialize_state != VDEV_INITIALIZE_ACTIVE) {
4193 mutex_exit(&vd->vdev_initialize_lock);
4194 return (SET_ERROR(ESRCH));
4195 + } else if (cmd_type == POOL_INITIALIZE_UNINIT &&
4196 + vd->vdev_initialize_thread != NULL) {
4197 + mutex_exit(&vd->vdev_initialize_lock);
4198 + return (SET_ERROR(EBUSY));
4202 @@ -7306,6 +7323,9 @@ spa_vdev_initialize_impl(spa_t *spa, uint64_t guid, uint64_t cmd_type,
4203 case POOL_INITIALIZE_SUSPEND:
4204 vdev_initialize_stop(vd, VDEV_INITIALIZE_SUSPENDED, vd_list);
4206 + case POOL_INITIALIZE_UNINIT:
4207 + vdev_uninitialize(vd);
4210 panic("invalid cmd_type %llu", (unsigned long long)cmd_type);
4212 @@ -8210,7 +8230,8 @@ spa_async_thread(void *arg)
4213 * If any devices are done replacing, detach them.
4215 if (tasks & SPA_ASYNC_RESILVER_DONE ||
4216 - tasks & SPA_ASYNC_REBUILD_DONE) {
4217 + tasks & SPA_ASYNC_REBUILD_DONE ||
4218 + tasks & SPA_ASYNC_DETACH_SPARE) {
4219 spa_vdev_resilver_done(spa);
4222 diff --git a/module/zfs/spa_misc.c b/module/zfs/spa_misc.c
4223 index a57f0727d..113943026 100644
4224 --- a/module/zfs/spa_misc.c
4225 +++ b/module/zfs/spa_misc.c
4226 @@ -494,8 +494,9 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
4231 -spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
4233 +spa_config_enter_impl(spa_t *spa, int locks, const void *tag, krw_t rw,
4237 int wlocks_held = 0;
4238 @@ -510,7 +511,8 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
4240 mutex_enter(&scl->scl_lock);
4241 if (rw == RW_READER) {
4242 - while (scl->scl_writer || scl->scl_write_wanted) {
4243 + while (scl->scl_writer ||
4244 + (!mmp_flag && scl->scl_write_wanted)) {
4245 cv_wait(&scl->scl_cv, &scl->scl_lock);
4248 @@ -528,6 +530,27 @@ spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
4249 ASSERT3U(wlocks_held, <=, locks);
4253 +spa_config_enter(spa_t *spa, int locks, const void *tag, krw_t rw)
4255 + spa_config_enter_impl(spa, locks, tag, rw, 0);
4259 + * The spa_config_enter_mmp() allows the mmp thread to cut in front of
4260 + * outstanding write lock requests. This is needed since the mmp updates are
4261 + * time sensitive and failure to service them promptly will result in a
4262 + * suspended pool. This pool suspension has been seen in practice when there is
4263 + * a single disk in a pool that is responding slowly and presumably about to
4268 +spa_config_enter_mmp(spa_t *spa, int locks, const void *tag, krw_t rw)
4270 + spa_config_enter_impl(spa, locks, tag, rw, 1);
4274 spa_config_exit(spa_t *spa, int locks, const void *tag)
4276 @@ -2564,7 +2587,6 @@ spa_scan_stat_init(spa_t *spa)
4277 spa->spa_scan_pass_scrub_spent_paused = 0;
4278 spa->spa_scan_pass_exam = 0;
4279 spa->spa_scan_pass_issued = 0;
4280 - vdev_scan_stat_init(spa->spa_root_vdev);
4284 diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c
4285 index 4b9d7e7c0..57259b8ce 100644
4286 --- a/module/zfs/vdev.c
4287 +++ b/module/zfs/vdev.c
4289 * Copyright 2017 Joyent, Inc.
4290 * Copyright (c) 2017, Intel Corporation.
4291 * Copyright (c) 2019, Datto Inc. All rights reserved.
4292 - * Copyright [2021] Hewlett Packard Enterprise Development LP
4293 + * Copyright (c) 2021, 2023 Hewlett Packard Enterprise Development LP.
4296 #include <sys/zfs_context.h>
4297 @@ -2645,6 +2645,17 @@ vdev_reopen(vdev_t *vd)
4298 (void) vdev_validate(vd);
4302 + * Recheck if resilver is still needed and cancel any
4303 + * scheduled resilver if resilver is unneeded.
4305 + if (!vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL) &&
4306 + spa->spa_async_tasks & SPA_ASYNC_RESILVER) {
4307 + mutex_enter(&spa->spa_async_lock);
4308 + spa->spa_async_tasks &= ~SPA_ASYNC_RESILVER;
4309 + mutex_exit(&spa->spa_async_lock);
4313 * Reassess parent vdev's health.
4315 @@ -3983,11 +3994,18 @@ vdev_remove_wanted(spa_t *spa, uint64_t guid)
4316 return (spa_vdev_state_exit(spa, NULL, SET_ERROR(ENODEV)));
4319 - * If the vdev is already removed, then don't do anything.
4320 + * If the vdev is already removed, or expanding which can trigger
4321 + * repartition add/remove events, then don't do anything.
4323 - if (vd->vdev_removed)
4324 + if (vd->vdev_removed || vd->vdev_expanding)
4325 return (spa_vdev_state_exit(spa, NULL, 0));
4328 + * Confirm the vdev has been removed, otherwise don't do anything.
4330 + if (vd->vdev_ops->vdev_op_leaf && !zio_wait(vdev_probe(vd, NULL)))
4331 + return (spa_vdev_state_exit(spa, NULL, SET_ERROR(EEXIST)));
4333 vd->vdev_remove_wanted = B_TRUE;
4334 spa_async_request(spa, SPA_ASYNC_REMOVE);
4336 @@ -4085,9 +4103,19 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
4339 (oldstate < VDEV_STATE_DEGRADED &&
4340 - vd->vdev_state >= VDEV_STATE_DEGRADED))
4341 + vd->vdev_state >= VDEV_STATE_DEGRADED)) {
4342 spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
4345 + * Asynchronously detach spare vdev if resilver or
4346 + * rebuild is not required
4348 + if (vd->vdev_unspare &&
4349 + !dsl_scan_resilvering(spa->spa_dsl_pool) &&
4350 + !dsl_scan_resilver_scheduled(spa->spa_dsl_pool) &&
4351 + !vdev_rebuild_active(tvd))
4352 + spa_async_request(spa, SPA_ASYNC_DETACH_SPARE);
4354 return (spa_vdev_state_exit(spa, vd, 0));
4357 diff --git a/module/zfs/vdev_initialize.c b/module/zfs/vdev_initialize.c
4358 index 6ffd0d618..5d90fd67c 100644
4359 --- a/module/zfs/vdev_initialize.c
4360 +++ b/module/zfs/vdev_initialize.c
4361 @@ -100,6 +100,39 @@ vdev_initialize_zap_update_sync(void *arg, dmu_tx_t *tx)
4362 &initialize_state, tx));
4366 +vdev_initialize_zap_remove_sync(void *arg, dmu_tx_t *tx)
4368 + uint64_t guid = *(uint64_t *)arg;
4370 + kmem_free(arg, sizeof (uint64_t));
4372 + vdev_t *vd = spa_lookup_by_guid(tx->tx_pool->dp_spa, guid, B_FALSE);
4373 + if (vd == NULL || vd->vdev_top->vdev_removing || !vdev_is_concrete(vd))
4376 + ASSERT3S(vd->vdev_initialize_state, ==, VDEV_INITIALIZE_NONE);
4377 + ASSERT3U(vd->vdev_leaf_zap, !=, 0);
4379 + vd->vdev_initialize_last_offset = 0;
4380 + vd->vdev_initialize_action_time = 0;
4382 + objset_t *mos = vd->vdev_spa->spa_meta_objset;
4385 + error = zap_remove(mos, vd->vdev_leaf_zap,
4386 + VDEV_LEAF_ZAP_INITIALIZE_LAST_OFFSET, tx);
4387 + VERIFY(error == 0 || error == ENOENT);
4389 + error = zap_remove(mos, vd->vdev_leaf_zap,
4390 + VDEV_LEAF_ZAP_INITIALIZE_STATE, tx);
4391 + VERIFY(error == 0 || error == ENOENT);
4393 + error = zap_remove(mos, vd->vdev_leaf_zap,
4394 + VDEV_LEAF_ZAP_INITIALIZE_ACTION_TIME, tx);
4395 + VERIFY(error == 0 || error == ENOENT);
4399 vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
4401 @@ -127,8 +160,14 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
4403 dmu_tx_t *tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
4404 VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
4405 - dsl_sync_task_nowait(spa_get_dsl(spa), vdev_initialize_zap_update_sync,
4408 + if (new_state != VDEV_INITIALIZE_NONE) {
4409 + dsl_sync_task_nowait(spa_get_dsl(spa),
4410 + vdev_initialize_zap_update_sync, guid, tx);
4412 + dsl_sync_task_nowait(spa_get_dsl(spa),
4413 + vdev_initialize_zap_remove_sync, guid, tx);
4416 switch (new_state) {
4417 case VDEV_INITIALIZE_ACTIVE:
4418 @@ -149,6 +188,10 @@ vdev_initialize_change_state(vdev_t *vd, vdev_initializing_state_t new_state)
4419 spa_history_log_internal(spa, "initialize", tx,
4420 "vdev=%s complete", vd->vdev_path);
4422 + case VDEV_INITIALIZE_NONE:
4423 + spa_history_log_internal(spa, "uninitialize", tx,
4424 + "vdev=%s", vd->vdev_path);
4427 panic("invalid state %llu", (unsigned long long)new_state);
4429 @@ -604,6 +647,24 @@ vdev_initialize(vdev_t *vd)
4430 vdev_initialize_thread, vd, 0, &p0, TS_RUN, maxclsyspri);
4434 + * Uninitializes a device. Caller must hold vdev_initialize_lock.
4435 + * Device must be a leaf and not already be initializing.
4438 +vdev_uninitialize(vdev_t *vd)
4440 + ASSERT(MUTEX_HELD(&vd->vdev_initialize_lock));
4441 + ASSERT(vd->vdev_ops->vdev_op_leaf);
4442 + ASSERT(vdev_is_concrete(vd));
4443 + ASSERT3P(vd->vdev_initialize_thread, ==, NULL);
4444 + ASSERT(!vd->vdev_detached);
4445 + ASSERT(!vd->vdev_initialize_exit_wanted);
4446 + ASSERT(!vd->vdev_top->vdev_removing);
4448 + vdev_initialize_change_state(vd, VDEV_INITIALIZE_NONE);
4452 * Wait for the initialize thread to be terminated (cancelled or stopped).
4454 @@ -760,6 +821,7 @@ vdev_initialize_restart(vdev_t *vd)
4457 EXPORT_SYMBOL(vdev_initialize);
4458 +EXPORT_SYMBOL(vdev_uninitialize);
4459 EXPORT_SYMBOL(vdev_initialize_stop);
4460 EXPORT_SYMBOL(vdev_initialize_stop_all);
4461 EXPORT_SYMBOL(vdev_initialize_stop_wait);
4462 diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c
4463 index 9dfbe0cf6..b180fa146 100644
4464 --- a/module/zfs/vdev_rebuild.c
4465 +++ b/module/zfs/vdev_rebuild.c
4467 #include <sys/zio.h>
4468 #include <sys/dmu_tx.h>
4469 #include <sys/arc.h>
4470 +#include <sys/arc_impl.h>
4471 #include <sys/zap.h>
4474 @@ -116,13 +117,12 @@ unsigned long zfs_rebuild_max_segment = 1024 * 1024;
4475 * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
4476 * the queue depth short.
4478 - * 32MB was selected as the default value to achieve good performance with
4479 - * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential
4480 - * rebuild was unable to saturate all of the drives using smaller values.
4481 - * With a value of 32MB the sequential resilver write rate was measured at
4482 - * 800MB/s sustained while rebuilding to a distributed spare.
4483 + * 64MB was observed to deliver the best performance and set as the default.
4484 + * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
4485 + * and a rebuild rate of 1.2GB/s was measured to the distribute spare.
4486 + * Smaller values were unable to fully saturate the available pool I/O.
4488 -unsigned long zfs_rebuild_vdev_limit = 32 << 20;
4489 +unsigned long zfs_rebuild_vdev_limit = 64 << 20;
4492 * Automatically start a pool scrub when the last active sequential resilver
4493 @@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg)
4496 spa_t *spa = vd->vdev_spa;
4497 + vdev_t *rvd = spa->spa_root_vdev;
4501 @@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg)
4502 vr->vr_pass_bytes_scanned = 0;
4503 vr->vr_pass_bytes_issued = 0;
4505 - vr->vr_bytes_inflight_max = MAX(1ULL << 20,
4506 - zfs_rebuild_vdev_limit * vd->vdev_children);
4508 uint64_t update_est_time = gethrtime();
4509 vdev_rebuild_update_bytes_est(vd, 0);
4511 @@ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg)
4512 metaslab_t *msp = vd->vdev_ms[i];
4513 vr->vr_scan_msp = msp;
4516 + * Calculate the max number of in-flight bytes for top-level
4517 + * vdev scanning operations (minimum 1MB, maximum 1/4 of
4518 + * arc_c_max shared by all top-level vdevs). Limits for the
4519 + * issuing phase are done per top-level vdev and are handled
4522 + uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
4523 + vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
4524 + zfs_rebuild_vdev_limit * vd->vdev_children));
4527 * Removal of vdevs from the vdev tree may eliminate the need
4528 * for the rebuild, in which case it should be canceled. The
4529 diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c
4530 index a4b391cbe..f441328f3 100644
4531 --- a/module/zfs/zfs_ioctl.c
4532 +++ b/module/zfs/zfs_ioctl.c
4533 @@ -3985,7 +3985,8 @@ zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4535 if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
4536 cmd_type == POOL_INITIALIZE_START ||
4537 - cmd_type == POOL_INITIALIZE_SUSPEND)) {
4538 + cmd_type == POOL_INITIALIZE_SUSPEND ||
4539 + cmd_type == POOL_INITIALIZE_UNINIT)) {
4540 return (SET_ERROR(EINVAL));
4543 diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c
4544 index b9498d17e..0987fd0f7 100644
4545 --- a/module/zfs/zfs_vnops.c
4546 +++ b/module/zfs/zfs_vnops.c
4547 @@ -68,7 +68,9 @@ zfs_fsync(znode_t *zp, int syncflag, cred_t *cr)
4548 if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
4551 + atomic_inc_32(&zp->z_sync_writes_cnt);
4552 zil_commit(zfsvfs->z_log, zp->z_id);
4553 + atomic_dec_32(&zp->z_sync_writes_cnt);
4556 tsd_set(zfs_fsyncer_key, NULL);
4557 @@ -102,7 +104,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
4560 /* Flush any mmap()'d data to disk */
4561 - if (zn_has_cached_data(zp))
4562 + if (zn_has_cached_data(zp, 0, file_sz - 1))
4563 zn_flush_cached_data(zp, B_FALSE);
4565 lr = zfs_rangelock_enter(&zp->z_rangelock, 0, UINT64_MAX, RL_READER);
4566 @@ -275,7 +277,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
4567 error = mappedread_sf(zp, nbytes, uio);
4570 - if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) {
4571 + if (zn_has_cached_data(zp, zfs_uio_offset(uio),
4572 + zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) {
4573 error = mappedread(zp, nbytes, uio);
4575 error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
4576 @@ -686,7 +689,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
4577 zfs_uioskip(uio, nbytes);
4580 - if (tx_bytes && zn_has_cached_data(zp) &&
4582 + zn_has_cached_data(zp, woff, woff + tx_bytes - 1) &&
4583 !(ioflag & O_DIRECT)) {
4584 update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
4586 diff --git a/module/zfs/zil.c b/module/zfs/zil.c
4587 index aaf509a2f..f2aaeb550 100644
4588 --- a/module/zfs/zil.c
4589 +++ b/module/zfs/zil.c
4590 @@ -226,11 +226,10 @@ zil_init_log_chain(zilog_t *zilog, blkptr_t *bp)
4593 zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
4594 - blkptr_t *nbp, void *dst, char **end)
4595 + blkptr_t *nbp, char **begin, char **end, arc_buf_t **abuf)
4597 enum zio_flag zio_flags = ZIO_FLAG_CANFAIL;
4598 arc_flags_t aflags = ARC_FLAG_WAIT;
4599 - arc_buf_t *abuf = NULL;
4600 zbookmark_phys_t zb;
4603 @@ -247,7 +246,7 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
4604 ZB_ZIL_OBJECT, ZB_ZIL_LEVEL, bp->blk_cksum.zc_word[ZIL_ZC_SEQ]);
4606 error = arc_read(NULL, zilog->zl_spa, bp, arc_getbuf_func,
4607 - &abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
4608 + abuf, ZIO_PRIORITY_SYNC_READ, zio_flags, &aflags, &zb);
4611 zio_cksum_t cksum = bp->blk_cksum;
4612 @@ -262,23 +261,23 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
4614 cksum.zc_word[ZIL_ZC_SEQ]++;
4616 + uint64_t size = BP_GET_LSIZE(bp);
4617 if (BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_ZILOG2) {
4618 - zil_chain_t *zilc = abuf->b_data;
4619 + zil_chain_t *zilc = (*abuf)->b_data;
4620 char *lr = (char *)(zilc + 1);
4621 - uint64_t len = zilc->zc_nused - sizeof (zil_chain_t);
4623 if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
4624 - sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
4625 + sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk) ||
4626 + zilc->zc_nused < sizeof (*zilc) ||
4627 + zilc->zc_nused > size) {
4628 error = SET_ERROR(ECKSUM);
4630 - ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
4631 - bcopy(lr, dst, len);
4632 - *end = (char *)dst + len;
4634 + *end = lr + zilc->zc_nused - sizeof (*zilc);
4635 *nbp = zilc->zc_next_blk;
4638 - char *lr = abuf->b_data;
4639 - uint64_t size = BP_GET_LSIZE(bp);
4640 + char *lr = (*abuf)->b_data;
4641 zil_chain_t *zilc = (zil_chain_t *)(lr + size) - 1;
4643 if (bcmp(&cksum, &zilc->zc_next_blk.blk_cksum,
4644 @@ -286,15 +285,11 @@ zil_read_log_block(zilog_t *zilog, boolean_t decrypt, const blkptr_t *bp,
4645 (zilc->zc_nused > (size - sizeof (*zilc)))) {
4646 error = SET_ERROR(ECKSUM);
4648 - ASSERT3U(zilc->zc_nused, <=,
4649 - SPA_OLD_MAXBLOCKSIZE);
4650 - bcopy(lr, dst, zilc->zc_nused);
4651 - *end = (char *)dst + zilc->zc_nused;
4653 + *end = lr + zilc->zc_nused;
4654 *nbp = zilc->zc_next_blk;
4658 - arc_buf_destroy(abuf, &abuf);
4662 @@ -362,7 +357,6 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
4663 uint64_t blk_count = 0;
4664 uint64_t lr_count = 0;
4665 blkptr_t blk, next_blk;
4666 - char *lrbuf, *lrp;
4669 bzero(&next_blk, sizeof (blkptr_t));
4670 @@ -382,13 +376,13 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
4671 * If the log has been claimed, stop if we encounter a sequence
4672 * number greater than the highest claimed sequence number.
4674 - lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
4675 zil_bp_tree_init(zilog);
4677 for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
4678 uint64_t blk_seq = blk.blk_cksum.zc_word[ZIL_ZC_SEQ];
4682 + arc_buf_t *abuf = NULL;
4684 if (blk_seq > claim_blk_seq)
4686 @@ -404,8 +398,10 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
4689 error = zil_read_log_block(zilog, decrypt, &blk, &next_blk,
4691 + &lrp, &end, &abuf);
4694 + arc_buf_destroy(abuf, &abuf);
4696 char name[ZFS_MAX_DATASET_NAME_LEN];
4698 @@ -418,7 +414,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
4702 - for (lrp = lrbuf; lrp < end; lrp += reclen) {
4703 + for (; lrp < end; lrp += reclen) {
4704 lr_t *lr = (lr_t *)lrp;
4705 reclen = lr->lrc_reclen;
4706 ASSERT3U(reclen, >=, sizeof (lr_t));
4707 @@ -432,6 +428,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
4708 max_lr_seq = lr->lrc_seq;
4711 + arc_buf_destroy(abuf, &abuf);
4714 zilog->zl_parse_error = error;
4715 @@ -441,7 +438,6 @@ done:
4716 zilog->zl_parse_lr_count = lr_count;
4718 zil_bp_tree_fini(zilog);
4719 - zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
4723 @@ -1593,6 +1589,7 @@ zil_lwb_write_issue(zilog_t *zilog, lwb_t *lwb)
4724 wsz = P2ROUNDUP_TYPED(lwb->lwb_nused, ZIL_MIN_BLKSZ, uint64_t);
4725 ASSERT3U(wsz, <=, lwb->lwb_sz);
4726 zio_shrink(lwb->lwb_write_zio, wsz);
4727 + wsz = lwb->lwb_write_zio->io_size;
4731 @@ -2848,7 +2845,14 @@ static void
4732 zil_commit_itx_assign(zilog_t *zilog, zil_commit_waiter_t *zcw)
4734 dmu_tx_t *tx = dmu_tx_create(zilog->zl_os);
4735 - VERIFY0(dmu_tx_assign(tx, TXG_WAIT));
4738 + * Since we are not going to create any new dirty data, and we
4739 + * can even help with clearing the existing dirty data, we
4740 + * should not be subject to the dirty data based delays. We
4741 + * use TXG_NOTHROTTLE to bypass the delay mechanism.
4743 + VERIFY0(dmu_tx_assign(tx, TXG_WAIT | TXG_NOTHROTTLE));
4745 itx_t *itx = zil_itx_create(TX_COMMIT, sizeof (lr_t));
4746 itx->itx_sync = B_TRUE;
4747 diff --git a/module/zfs/zio.c b/module/zfs/zio.c
4748 index 700f87910..c367ef721 100644
4749 --- a/module/zfs/zio.c
4750 +++ b/module/zfs/zio.c
4751 @@ -2287,7 +2287,7 @@ zio_nowait(zio_t *zio)
4752 ASSERT3P(zio->io_executor, ==, NULL);
4754 if (zio->io_child_type == ZIO_CHILD_LOGICAL &&
4755 - zio_unique_parent(zio) == NULL) {
4756 + list_is_empty(&zio->io_parent_list)) {
4760 diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run
4761 index 554cf96f8..6c2296d4c 100644
4762 --- a/tests/runfiles/common.run
4763 +++ b/tests/runfiles/common.run
4764 @@ -37,7 +37,7 @@ tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
4765 'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
4766 'alloc_class_007_pos', 'alloc_class_008_pos', 'alloc_class_009_pos',
4767 'alloc_class_010_pos', 'alloc_class_011_neg', 'alloc_class_012_pos',
4768 - 'alloc_class_013_pos']
4769 + 'alloc_class_013_pos', 'alloc_class_014_neg', 'alloc_class_015_pos']
4770 tags = ['functional', 'alloc_class']
4772 [tests/functional/arc]
4773 @@ -407,7 +407,7 @@ tests = ['zpool_import_001_pos', 'zpool_import_002_pos',
4774 'import_cachefile_mirror_detached',
4775 'import_cachefile_paths_changed',
4776 'import_cachefile_shared_device',
4777 - 'import_devices_missing',
4778 + 'import_devices_missing', 'import_log_missing',
4779 'import_paths_changed',
4780 'import_rewind_config_changed',
4781 'import_rewind_device_replaced']
4782 @@ -431,6 +431,7 @@ tests = ['zpool_initialize_attach_detach_add_remove',
4783 'zpool_initialize_start_and_cancel_neg',
4784 'zpool_initialize_start_and_cancel_pos',
4785 'zpool_initialize_suspend_resume',
4786 + 'zpool_initialize_uninit',
4787 'zpool_initialize_unsupported_vdevs',
4788 'zpool_initialize_verify_checksums',
4789 'zpool_initialize_verify_initialized']
4790 @@ -456,7 +457,8 @@ tests = ['zpool_replace_001_neg', 'replace-o_ashift', 'replace_prop_ashift']
4791 tags = ['functional', 'cli_root', 'zpool_replace']
4793 [tests/functional/cli_root/zpool_resilver]
4794 -tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart']
4795 +tests = ['zpool_resilver_bad_args', 'zpool_resilver_restart',
4796 + 'zpool_resilver_concurrent']
4797 tags = ['functional', 'cli_root', 'zpool_resilver']
4799 [tests/functional/cli_root/zpool_scrub]
4800 @@ -669,7 +671,8 @@ tests = ['migration_001_pos', 'migration_002_pos', 'migration_003_pos',
4801 tags = ['functional', 'migration']
4803 [tests/functional/mmap]
4804 -tests = ['mmap_write_001_pos', 'mmap_read_001_pos', 'mmap_seek_001_pos']
4805 +tests = ['mmap_mixed', 'mmap_read_001_pos', 'mmap_seek_001_pos',
4806 + 'mmap_write_001_pos', 'mmap_sync_001_pos']
4807 tags = ['functional', 'mmap']
4809 [tests/functional/mount]
4810 @@ -823,9 +826,9 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
4811 'send-c_mixed_compression', 'send-c_stream_size_estimate',
4812 'send-c_embedded_blocks', 'send-c_resume', 'send-cpL_varied_recsize',
4813 'send-c_recv_dedup', 'send-L_toggle',
4814 - 'send_encrypted_incremental.ksh', 'send_encrypted_hierarchy',
4815 - 'send_encrypted_props', 'send_encrypted_truncated_files',
4816 - 'send_freeobjects', 'send_realloc_files',
4817 + 'send_encrypted_incremental.ksh', 'send_encrypted_freeobjects',
4818 + 'send_encrypted_hierarchy', 'send_encrypted_props',
4819 + 'send_encrypted_truncated_files', 'send_freeobjects', 'send_realloc_files',
4820 'send_realloc_encrypted_files', 'send_spill_block', 'send_holds',
4821 'send_hole_birth', 'send_mixed_raw', 'send-wR_encrypted_zvol',
4822 'send_partial_dataset', 'send_invalid', 'send_doall',
4823 diff --git a/tests/runfiles/sanity.run b/tests/runfiles/sanity.run
4824 index fb39fa54b..0a3d42cb2 100644
4825 --- a/tests/runfiles/sanity.run
4826 +++ b/tests/runfiles/sanity.run
4827 @@ -547,6 +547,7 @@ tests = ['recv_dedup', 'recv_dedup_encrypted_zvol', 'rsend_001_pos',
4828 'rsend_014_pos', 'rsend_016_neg', 'send-c_verify_contents',
4829 'send-c_volume', 'send-c_zstreamdump', 'send-c_recv_dedup',
4830 'send-L_toggle', 'send_encrypted_hierarchy', 'send_encrypted_props',
4831 + 'send_encrypted_freeobjects',
4832 'send_encrypted_truncated_files', 'send_freeobjects', 'send_holds',
4833 'send_mixed_raw', 'send-wR_encrypted_zvol', 'send_partial_dataset',
4835 diff --git a/tests/test-runner/bin/test-runner.py.in b/tests/test-runner/bin/test-runner.py.in
4836 index a652d3d4a..5c868d945 100755
4837 --- a/tests/test-runner/bin/test-runner.py.in
4838 +++ b/tests/test-runner/bin/test-runner.py.in
4839 @@ -33,7 +33,7 @@ from subprocess import PIPE
4840 from subprocess import Popen
4841 from subprocess import check_output
4842 from threading import Timer
4843 -from time import time, CLOCK_MONOTONIC_RAW
4844 +from time import time, CLOCK_MONOTONIC
4845 from os.path import exists
4847 BASEDIR = '/var/tmp/test_results'
4848 @@ -62,7 +62,7 @@ clock_gettime.argtypes = [ctypes.c_int, ctypes.POINTER(timespec)]
4850 def monotonic_time():
4852 - if clock_gettime(CLOCK_MONOTONIC_RAW, ctypes.pointer(t)) != 0:
4853 + if clock_gettime(CLOCK_MONOTONIC, ctypes.pointer(t)) != 0:
4854 errno_ = ctypes.get_errno()
4855 raise OSError(errno_, os.strerror(errno_))
4856 return t.tv_sec + t.tv_nsec * 1e-9
4857 diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in
4858 index 432899c21..878b30025 100755
4859 --- a/tests/test-runner/bin/zts-report.py.in
4860 +++ b/tests/test-runner/bin/zts-report.py.in
4861 @@ -183,10 +183,13 @@ if sys.platform.startswith('freebsd'):
4863 'cli_root/zfs_receive/receive-o-x_props_override':
4864 ['FAIL', known_reason],
4865 + 'cli_root/zpool_resilver/zpool_resilver_concurrent':
4866 + ['SKIP', na_reason],
4867 'cli_root/zpool_wait/zpool_wait_trim_basic': ['SKIP', trim_reason],
4868 'cli_root/zpool_wait/zpool_wait_trim_cancel': ['SKIP', trim_reason],
4869 'cli_root/zpool_wait/zpool_wait_trim_flag': ['SKIP', trim_reason],
4870 'link_count/link_count_001': ['SKIP', na_reason],
4871 + 'mmap/mmap_sync_001_pos': ['SKIP', na_reason],
4873 elif sys.platform.startswith('linux'):
4875 @@ -210,6 +213,7 @@ elif sys.platform.startswith('linux'):
4876 # reasons listed above can be used.
4879 + 'threadsappend/threadsappend_001_pos': ['FAIL', 6136],
4880 'chattr/setup': ['SKIP', exec_reason],
4881 'crtime/crtime_001_pos': ['SKIP', statx_reason],
4882 'cli_root/zdb/zdb_006_pos': ['FAIL', known_reason],
4883 @@ -243,6 +247,7 @@ maybe = {
4884 'mmp/mmp_on_uberblocks': ['FAIL', known_reason],
4885 'pyzfs/pyzfs_unittest': ['SKIP', python_deps_reason],
4886 'pool_checkpoint/checkpoint_discard_busy': ['FAIL', '11946'],
4887 + 'pam/setup': ['SKIP', "pamtester might be not available"],
4888 'projectquota/setup': ['SKIP', exec_reason],
4889 'removal/removal_condense_export': ['FAIL', known_reason],
4890 'reservation/reservation_008_pos': ['FAIL', '7741'],
4891 @@ -252,14 +257,12 @@ maybe = {
4892 'snapshot/snapshot_010_pos': ['FAIL', '7961'],
4893 'snapused/snapused_004_pos': ['FAIL', '5513'],
4894 'tmpfile/setup': ['SKIP', tmpfile_reason],
4895 - 'threadsappend/threadsappend_001_pos': ['FAIL', '6136'],
4896 'trim/setup': ['SKIP', trim_reason],
4897 'upgrade/upgrade_projectquota_001_pos': ['SKIP', project_id_reason],
4898 'user_namespace/setup': ['SKIP', user_ns_reason],
4899 'userquota/setup': ['SKIP', exec_reason],
4900 - 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', '6935'],
4901 + 'vdev_zaps/vdev_zaps_004_pos': ['FAIL', known_reason],
4902 'zvol/zvol_ENOSPC/zvol_ENOSPC_001_pos': ['FAIL', '5848'],
4903 - 'pam/setup': ['SKIP', "pamtester might be not available"],
4906 if sys.platform.startswith('freebsd'):
4907 @@ -275,12 +278,18 @@ if sys.platform.startswith('freebsd'):
4908 'resilver/resilver_restart_001': ['FAIL', known_reason],
4909 'pool_checkpoint/checkpoint_big_rewind': ['FAIL', '12622'],
4910 'pool_checkpoint/checkpoint_indirect': ['FAIL', '12623'],
4911 + 'snapshot/snapshot_002_pos': ['FAIL', '14831'],
4913 elif sys.platform.startswith('linux'):
4915 'cli_root/zfs_rename/zfs_rename_002_pos': ['FAIL', known_reason],
4916 'cli_root/zpool_reopen/zpool_reopen_003_pos': ['FAIL', known_reason],
4917 - 'fault/auto_spare_shared': ['FAIL', '11889'],
4918 + 'fault/auto_online_002_pos': ['FAIL', 11889],
4919 + 'fault/auto_replace_001_pos': ['FAIL', 14851],
4920 + 'fault/auto_spare_002_pos': ['FAIL', 11889],
4921 + 'fault/auto_spare_multiple': ['FAIL', 11889],
4922 + 'fault/auto_spare_shared': ['FAIL', 11889],
4923 + 'fault/decompress_fault': ['FAIL', 11889],
4924 'io/io_uring': ['SKIP', 'io_uring support required'],
4925 'limits/filesystem_limit': ['SKIP', known_reason],
4926 'limits/snapshot_limit': ['SKIP', known_reason],
4927 diff --git a/tests/zfs-tests/cmd/Makefile.am b/tests/zfs-tests/cmd/Makefile.am
4928 index d1c29fcd1..7ec4cb619 100644
4929 --- a/tests/zfs-tests/cmd/Makefile.am
4930 +++ b/tests/zfs-tests/cmd/Makefile.am
4931 @@ -20,6 +20,7 @@ SUBDIRS = \
4939 diff --git a/tests/zfs-tests/cmd/mmap_sync/.gitignore b/tests/zfs-tests/cmd/mmap_sync/.gitignore
4940 new file mode 100644
4941 index 000000000..c721f472b
4943 +++ b/tests/zfs-tests/cmd/mmap_sync/.gitignore
4946 diff --git a/tests/zfs-tests/cmd/mmap_sync/Makefile.am b/tests/zfs-tests/cmd/mmap_sync/Makefile.am
4947 new file mode 100644
4948 index 000000000..313e8db5c
4950 +++ b/tests/zfs-tests/cmd/mmap_sync/Makefile.am
4952 +include $(top_srcdir)/config/Rules.am
4954 +pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/bin
4956 +pkgexec_PROGRAMS = mmap_sync
4957 +mmap_sync_SOURCES = mmap_sync.c
4958 diff --git a/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c b/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c
4959 new file mode 100644
4960 index 000000000..226e71be2
4962 +++ b/tests/zfs-tests/cmd/mmap_sync/mmap_sync.c
4965 + * CDDL HEADER START
4967 + * The contents of this file are subject to the terms of the
4968 + * Common Development and Distribution License (the "License").
4969 + * You may not use this file except in compliance with the License.
4971 + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
4972 + * or http://opensource.org/licenses/CDDL-1.0.
4973 + * See the License for the specific language governing permissions
4974 + * and limitations under the License.
4976 + * When distributing Covered Code, include this CDDL HEADER in each
4977 + * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
4978 + * If applicable, add the following below this CDDL HEADER, with the
4979 + * fields enclosed by brackets "[]" replaced with your own identifying
4980 + * information: Portions Copyright [yyyy] [name of copyright owner]
4985 +#include <stdlib.h>
4987 +#include <string.h>
4988 +#include <sys/mman.h>
4989 +#include <sys/stat.h>
4990 +#include <sys/time.h>
4992 +#include <unistd.h>
4996 +cleanup(char *file)
4998 + (void) remove(file);
5002 +main(int argc, char *argv[])
5004 + char *testdir = getenv("TESTDIR");
5006 + fprintf(stderr, "environment variable TESTDIR not set\n");
5012 + if (stat(testdir, &st) != 0 &&
5013 + mkdir(testdir, 0777) != 0) {
5019 + fprintf(stderr, "usage: %s "
5020 + "[run time in mins] "
5021 + "[max msync time in ms]\n", argv[0]);
5025 + int run_time_mins = 1;
5027 + run_time_mins = atoi(argv[1]);
5030 + int max_msync_time_ms = 1000;
5032 + max_msync_time_ms = atoi(argv[2]);
5035 + char filepath[512];
5036 + filepath[0] = '\0';
5037 + char *file = &filepath[0];
5039 + (void) snprintf(file, 512, "%s/msync_file", testdir);
5041 + const int LEN = 8;
5044 + int fd = open(file, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR |
5045 + S_IRGRP | S_IROTH);
5048 + (void) fprintf(stderr, "%s: %s: ", argv[0], file);
5053 + if (ftruncate(fd, LEN) != 0) {
5054 + perror("ftruncate");
5059 + void *ptr = mmap(NULL, LEN, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
5061 + if (ptr == MAP_FAILED) {
5067 + struct timeval tstart;
5068 + gettimeofday(&tstart, NULL);
5070 + long long x = 0LL;
5073 + *((long long *)ptr) = x;
5076 + struct timeval t1, t2;
5077 + gettimeofday(&t1, NULL);
5078 + if (msync(ptr, LEN, MS_SYNC|MS_INVALIDATE) != 0) {
5084 + gettimeofday(&t2, NULL);
5086 + double elapsed = (t2.tv_sec - t1.tv_sec) * 1000.0;
5087 + elapsed += ((t2.tv_usec - t1.tv_usec) / 1000.0);
5088 + if (elapsed > max_msync_time_ms) {
5089 + fprintf(stderr, "slow msync: %f ms\n", elapsed);
5090 + if (munmap(ptr, LEN) != 0)
5096 + double elapsed_start = (t2.tv_sec - tstart.tv_sec) * 1000.0;
5097 + elapsed_start += ((t2.tv_usec - tstart.tv_usec) / 1000.0);
5098 + if (elapsed_start > run_time_mins * 60 * 1000) {
5103 + if (munmap(ptr, LEN) != 0) {
5109 + if (close(fd) != 0) {
5116 diff --git a/tests/zfs-tests/include/commands.cfg b/tests/zfs-tests/include/commands.cfg
5117 index 78802c9fb..8ac38dfd8 100644
5118 --- a/tests/zfs-tests/include/commands.cfg
5119 +++ b/tests/zfs-tests/include/commands.cfg
5120 @@ -207,6 +207,7 @@ export ZFSTEST_FILES='badsend
5128 diff --git a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
5129 index 7cffb2eac..82fd9f340 100644
5130 --- a/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
5131 +++ b/tests/zfs-tests/tests/functional/alloc_class/Makefile.am
5132 @@ -14,7 +14,9 @@ dist_pkgdata_SCRIPTS = \
5133 alloc_class_010_pos.ksh \
5134 alloc_class_011_neg.ksh \
5135 alloc_class_012_pos.ksh \
5136 - alloc_class_013_pos.ksh
5137 + alloc_class_013_pos.ksh \
5138 + alloc_class_014_neg.ksh \
5139 + alloc_class_015_pos.ksh
5141 dist_pkgdata_DATA = \
5143 diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
5144 index 2ce22a624..790a47f26 100755
5145 --- a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
5146 +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_013_pos.ksh
5147 @@ -42,7 +42,8 @@ log_must display_status "$TESTPOOL"
5149 log_must zfs create -o dedup=on -V 2G $TESTPOOL/$TESTVOL
5151 -log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null 2>&1"
5152 +block_device_wait "$ZVOL_DEVDIR/$TESTPOOL/$TESTVOL"
5153 +log_must eval "new_fs $ZVOL_DEVDIR/$TESTPOOL/$TESTVOL >/dev/null"
5156 log_must zpool list -v $TESTPOOL
5157 diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh
5158 new file mode 100755
5159 index 000000000..1b52014fd
5161 +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_014_neg.ksh
5166 +# This file and its contents are supplied under the terms of the
5167 +# Common Development and Distribution License ("CDDL"), version 1.0.
5168 +# You may only use this file in accordance with the terms of version
5171 +# A full copy of the text of the CDDL should have accompanied this
5172 +# source. A copy of the CDDL is also available via the Internet at
5173 +# http://www.illumos.org/license/CDDL.
5176 +. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
5180 +# Setting the special_small_blocks property greater than recordsize fails.
5183 +verify_runnable "global"
5185 +claim="Setting the special_small_blocks property greater than recordsize fails"
5189 +log_must disk_setup
5191 +for size in 512 4096 32768 131072 524288 1048576
5193 + let bigger=$size*2
5194 + log_mustnot zpool create -O recordsize=$size \
5195 + -O special_small_blocks=$bigger \
5196 + $TESTPOOL raidz $ZPOOL_DISKS special mirror \
5197 + $CLASS_DISK0 $CLASS_DISK1
5201 diff --git a/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh
5202 new file mode 100755
5203 index 000000000..49c468af6
5205 +++ b/tests/zfs-tests/tests/functional/alloc_class/alloc_class_015_pos.ksh
5210 +# This file and its contents are supplied under the terms of the
5211 +# Common Development and Distribution License ("CDDL"), version 1.0.
5212 +# You may only use this file in accordance with the terms of version
5215 +# A full copy of the text of the CDDL should have accompanied this
5216 +# source. A copy of the CDDL is also available via the Internet at
5217 +# http://www.illumos.org/license/CDDL.
5220 +. $STF_SUITE/tests/functional/alloc_class/alloc_class.kshlib
5224 +# Can set special_small_blocks property less than or equal to recordsize.
5227 +verify_runnable "global"
5229 +claim="Can set special_small_blocks property less than or equal to recordsize"
5233 +log_must disk_setup
5235 +for size in 8192 32768 131072 524288 1048576
5237 + let smaller=$size/2
5238 + log_must zpool create -O recordsize=$size \
5239 + -O special_small_blocks=$smaller \
5240 + $TESTPOOL raidz $ZPOOL_DISKS special mirror \
5241 + $CLASS_DISK0 $CLASS_DISK1
5242 + log_must zpool destroy -f "$TESTPOOL"
5244 + log_must zpool create -O recordsize=$size \
5245 + -O special_small_blocks=$size \
5246 + $TESTPOOL raidz $ZPOOL_DISKS special mirror \
5247 + $CLASS_DISK0 $CLASS_DISK1
5248 + log_must zpool destroy -f "$TESTPOOL"
5252 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
5253 index a8c9a31dc..4230ec557 100644
5254 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
5255 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/Makefile.am
5256 @@ -12,6 +12,7 @@ dist_pkgdata_SCRIPTS = \
5257 import_cachefile_paths_changed.ksh \
5258 import_cachefile_shared_device.ksh \
5259 import_devices_missing.ksh \
5260 + import_log_missing.ksh \
5261 import_paths_changed.ksh \
5262 import_rewind_config_changed.ksh \
5263 import_rewind_device_replaced.ksh \
5264 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
5265 new file mode 100755
5266 index 000000000..f12cac785
5268 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_import/import_log_missing.ksh
5273 +# This file and its contents are supplied under the terms of the
5274 +# Common Development and Distribution License ("CDDL"), version 1.0.
5275 +# You may only use this file in accordance with the terms of version
5278 +# A full copy of the text of the CDDL should have accompanied this
5279 +# source. A copy of the CDDL is also available via the Internet at
5280 +# http://www.illumos.org/license/CDDL.
5283 +. $STF_SUITE/tests/functional/cli_root/zpool_import/zpool_import.kshlib
5287 +# Import with missing log device should not remove spare/cache.
5290 +# 1. Create a pool.
5291 +# 2. Add spare, cache and log devices to the pool.
5292 +# 3. Export the pool.
5293 +# 4. Remove the log device.
5294 +# 5. Import the pool with -m flag.
5295 +# 6. Verify that spare and cache are still present in the pool.
5298 +verify_runnable "global"
5302 +function test_missing_log
5304 + typeset poolcreate="$1"
5305 + typeset cachevdev="$2"
5306 + typeset sparevdev="$3"
5307 + typeset logvdev="$4"
5308 + typeset missingvdev="$4"
5310 + log_note "$0: pool '$poolcreate', adding $cachevdev, $sparevdev," \
5311 + "$logvdev then moving away $missingvdev."
5313 + log_must zpool create $TESTPOOL1 $poolcreate
5315 + log_must zpool add $TESTPOOL1 cache $cachevdev spare $sparevdev \
5318 + log_must_busy zpool export $TESTPOOL1
5320 + log_must mv $missingvdev $BACKUP_DEVICE_DIR
5322 + log_must zpool import -m -d $DEVICE_DIR $TESTPOOL1
5324 + CACHE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $cachevdev)
5326 + SPARE_PRESENT=$(zpool status -v $TESTPOOL1 | grep $sparevdev)
5328 + if [ -z "$CACHE_PRESENT"] || [ -z "SPARE_PRESENT"]
5330 + log_fail "cache/spare vdev missing after importing with missing" \
5335 + log_must zpool destroy $TESTPOOL1
5340 +log_must mkdir -p $BACKUP_DEVICE_DIR
5342 +test_missing_log "$VDEV0" "$VDEV1" "$VDEV2" "$VDEV3"
5344 +log_pass "zpool import succeeded with missing log device"
5345 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
5346 index 3968902ec..483c1c2f5 100644
5347 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
5348 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/Makefile.am
5349 @@ -10,6 +10,7 @@ dist_pkgdata_SCRIPTS = \
5350 zpool_initialize_start_and_cancel_neg.ksh \
5351 zpool_initialize_start_and_cancel_pos.ksh \
5352 zpool_initialize_suspend_resume.ksh \
5353 + zpool_initialize_uninit.ksh \
5354 zpool_initialize_unsupported_vdevs.ksh \
5355 zpool_initialize_verify_checksums.ksh \
5356 zpool_initialize_verify_initialized.ksh
5357 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh
5358 new file mode 100755
5359 index 000000000..17f776cfb
5361 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_initialize/zpool_initialize_uninit.ksh
5365 +# CDDL HEADER START
5367 +# The contents of this file are subject to the terms of the
5368 +# Common Development and Distribution License (the "License").
5369 +# You may not use this file except in compliance with the License.
5371 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
5372 +# or https://opensource.org/licenses/CDDL-1.0.
5373 +# See the License for the specific language governing permissions
5374 +# and limitations under the License.
5376 +# When distributing Covered Code, include this CDDL HEADER in each
5377 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
5378 +# If applicable, add the following below this CDDL HEADER, with the
5379 +# fields enclosed by brackets "[]" replaced with your own identifying
5380 +# information: Portions Copyright [yyyy] [name of copyright owner]
5386 +# Copyright (c) 2016 by Delphix. All rights reserved.
5387 +# Copyright (C) 2023 Lawrence Livermore National Security, LLC.
5389 +. $STF_SUITE/include/libtest.shlib
5390 +. $STF_SUITE/tests/functional/cli_root/zpool_initialize/zpool_initialize.kshlib
5394 +# Starting, stopping, uninitializing, and restart an initialize works.
5397 +# 1. Create a one-disk pool.
5398 +# 2. Verify uninitialize succeeds for uninitialized pool.
5399 +# 3. Verify pool wide cancel|suspend + uninit
5400 +# a. Start initializing and verify that initializing is active.
5401 +# b. Verify uninitialize fails when actively initializing.
5402 +# c. Cancel or suspend initializing and verify that initializing is not active.
5403 +# d. Verify uninitialize succeeds after being cancelled.
5404 +# 4. Verify per-disk cancel|suspend + uninit
5407 +DISK1="$(echo $DISKS | cut -d' ' -f1)"
5408 +DISK2="$(echo $DISKS | cut -d' ' -f2)"
5409 +DISK3="$(echo $DISKS | cut -d' ' -f3)"
5411 +function status_check # pool disk1-state disk2-state disk3-state
5414 + typeset disk1_state="$2"
5415 + typeset disk2_state="$3"
5416 + typeset disk3_state="$4"
5418 + state=$(zpool status -i "$pool" | grep "$DISK1" | grep "$disk1_state")
5419 + if [[ -z "$state" ]]; then
5420 + log_fail "DISK1 state; expected='$disk1_state' got '$state'"
5423 + state=$(zpool status -i "$pool" | grep "$DISK2" | grep "$disk2_state")
5424 + if [[ -z "$state" ]]; then
5425 + log_fail "DISK2 state; expected='$disk2_state' got '$state'"
5428 + state=$(zpool status -i "$pool" | grep "$DISK3" | grep "$disk3_state")
5429 + if [[ -z "$state" ]]; then
5430 + log_fail "DISK3 state; expected='$disk3_state' got '$state'"
5434 +function status_check_all # pool disk-state
5437 + typeset disk_state="$2"
5439 + status_check "$pool" "$disk_state" "$disk_state" "$disk_state"
5442 +# 1. Create a one-disk pool.
5443 +log_must zpool create -f $TESTPOOL $DISK1 $DISK2 $DISK3
5444 +status_check_all $TESTPOOL "uninitialized"
5446 +# 2. Verify uninitialize succeeds for uninitialized pool.
5447 +log_must zpool initialize -u $TESTPOOL
5448 +status_check_all $TESTPOOL "uninitialized"
5450 +# 3. Verify pool wide cancel + uninit
5451 +log_must zpool initialize $TESTPOOL
5452 +status_check_all $TESTPOOL "[[:digit:]]* initialized"
5454 +log_mustnot zpool initialize -u $TESTPOOL
5455 +status_check_all $TESTPOOL "[[:digit:]]* initialized"
5457 +log_must zpool initialize -c $TESTPOOL
5458 +status_check_all $TESTPOOL "uninitialized"
5460 +log_must zpool initialize -u $TESTPOOL
5461 +status_check_all $TESTPOOL "uninitialized"
5463 +# 3. Verify pool wide suspend + uninit
5464 +log_must zpool initialize $TESTPOOL
5465 +status_check_all $TESTPOOL "[[:digit:]]* initialized"
5467 +log_mustnot zpool initialize -u $TESTPOOL
5468 +status_check_all $TESTPOOL "[[:digit:]]* initialized"
5470 +log_must zpool initialize -s $TESTPOOL
5471 +status_check_all $TESTPOOL "suspended"
5473 +log_must zpool initialize -u $TESTPOOL
5474 +status_check_all $TESTPOOL "uninitialized"
5476 +# 4. Verify per-disk cancel|suspend + uninit
5477 +log_must zpool initialize $TESTPOOL
5478 +status_check_all $TESTPOOL "[[:digit:]]* initialized"
5480 +log_must zpool initialize -c $TESTPOOL $DISK1
5481 +log_must zpool initialize -s $TESTPOOL $DISK2
5482 +log_mustnot zpool initialize -u $TESTPOOL $DISK3
5483 +status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized"
5485 +log_must zpool initialize -u $TESTPOOL $DISK1
5486 +status_check $TESTPOOL "uninitialized" "suspended" "[[:digit:]]* initialized"
5488 +log_must zpool initialize -u $TESTPOOL $DISK2
5489 +status_check $TESTPOOL "uninitialized" "uninitialized" "[[:digit:]]* initialized"
5491 +log_must zpool initialize $TESTPOOL $DISK1
5492 +status_check $TESTPOOL "[[:digit:]]* initialized" "uninitialized" "[[:digit:]]* initialized"
5494 +log_must zpool initialize $TESTPOOL $DISK2
5495 +status_check_all $TESTPOOL "[[:digit:]]* initialized"
5497 +log_must zpool initialize -s $TESTPOOL
5498 +status_check_all $TESTPOOL "suspended"
5500 +log_must zpool initialize -u $TESTPOOL $DISK1 $DISK2 $DISK3
5501 +status_check_all $TESTPOOL "uninitialized"
5503 +log_pass "Initialize start + cancel/suspend + uninit + start works"
5504 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
5505 index 2cec5335f..7ca9e81c1 100644
5506 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
5507 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/Makefile.am
5508 @@ -3,7 +3,8 @@ dist_pkgdata_SCRIPTS = \
5511 zpool_resilver_bad_args.ksh \
5512 - zpool_resilver_restart.ksh
5513 + zpool_resilver_restart.ksh \
5514 + zpool_resilver_concurrent.ksh
5516 dist_pkgdata_DATA = \
5518 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
5519 new file mode 100755
5520 index 000000000..4c3b09796
5522 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_resilver/zpool_resilver_concurrent.ksh
5526 +# CDDL HEADER START
5528 +# The contents of this file are subject to the terms of the
5529 +# Common Development and Distribution License (the "License").
5530 +# You may not use this file except in compliance with the License.
5532 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
5533 +# or http://www.opensolaris.org/os/licensing.
5534 +# See the License for the specific language governing permissions
5535 +# and limitations under the License.
5537 +# When distributing Covered Code, include this CDDL HEADER in each
5538 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
5539 +# If applicable, add the following below this CDDL HEADER, with the
5540 +# fields enclosed by brackets "[]" replaced with your own identifying
5541 +# information: Portions Copyright [yyyy] [name of copyright owner]
5547 +# Copyright (c) 2023 Hewlett Packard Enterprise Development LP.
5550 +. $STF_SUITE/include/libtest.shlib
5551 +. $STF_SUITE/tests/functional/redundancy/redundancy.kshlib
5555 +# Verify 'zpool clear' doesn't cause concurrent resilvers
5558 +# 1. Create N(10) virtual disk files.
5559 +# 2. Create draid pool based on the virtual disk files.
5560 +# 3. Fill the filesystem with directories and files.
5561 +# 4. Force-fault 2 vdevs and verify distributed spare is kicked in.
5562 +# 5. Free the distributed spare by replacing the faulty drive.
5563 +# 6. Run zpool clear and verify that it does not initiate 2 resilvers
5564 +# concurrently while distributed spare gets kicked in.
5567 +verify_runnable "global"
5569 +typeset -ir devs=10
5570 +typeset -ir nparity=1
5571 +typeset -ir ndata=8
5572 +typeset -ir dspare=1
5576 + poolexists "$TESTPOOL" && destroy_pool "$TESTPOOL"
5578 + for i in {0..$devs}; do
5579 + log_must rm -f "$BASEDIR/vdev$i"
5582 + for dir in $BASEDIR; do
5583 + if [[ -d $dir ]]; then
5584 + log_must rm -rf $dir
5592 +log_assert "Verify zpool clear on draid pool doesn't cause concurrent resilvers"
5595 +setup_test_env $TESTPOOL draid${nparity}:${ndata}d:${dspare}s $devs
5597 +# ZED needed for sequential resilver
5601 +log_must zpool offline -f $TESTPOOL $BASEDIR/vdev5
5602 +log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
5603 +log_must zpool wait -t resilver $TESTPOOL
5604 +log_must zpool offline -f $TESTPOOL $BASEDIR/vdev6
5606 +log_must zpool labelclear -f $BASEDIR/vdev5
5607 +log_must zpool labelclear -f $BASEDIR/vdev6
5609 +log_must zpool replace -w $TESTPOOL $BASEDIR/vdev5
5610 +sync_pool $TESTPOOL
5612 +log_must zpool events -c
5613 +log_must zpool clear $TESTPOOL
5614 +log_must wait_vdev_state $TESTPOOL draid1-0-0 "ONLINE" 60
5615 +log_must zpool wait -t resilver $TESTPOOL
5616 +log_must zpool wait -t scrub $TESTPOOL
5618 +nof_resilver=$(zpool events | grep -c resilver_start)
5619 +if [ $nof_resilver = 1 ] ; then
5620 + log_must verify_pool $TESTPOOL
5621 + log_pass "zpool clear on draid pool doesn't cause concurrent resilvers"
5623 + log_fail "FAIL: sequential and healing resilver initiated concurrently"
5625 diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
5626 index fbb0c2910..19781137d 100755
5627 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
5628 +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_trim/zpool_trim_start_and_cancel_pos.ksh
5632 log_must zpool create -f $TESTPOOL $DISK1
5633 -log_must zpool trim $TESTPOOL
5634 +log_must zpool trim -r 1 $TESTPOOL
5636 [[ -z "$(trim_progress $TESTPOOL $DISK1)" ]] && \
5637 log_fail "TRIM did not start"
5638 diff --git a/tests/zfs-tests/tests/functional/mmap/Makefile.am b/tests/zfs-tests/tests/functional/mmap/Makefile.am
5639 index b26791ee7..526405954 100644
5640 --- a/tests/zfs-tests/tests/functional/mmap/Makefile.am
5641 +++ b/tests/zfs-tests/tests/functional/mmap/Makefile.am
5642 @@ -2,10 +2,12 @@ pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/mmap
5643 dist_pkgdata_SCRIPTS = \
5647 mmap_read_001_pos.ksh \
5648 mmap_write_001_pos.ksh \
5649 mmap_libaio_001_pos.ksh \
5650 - mmap_seek_001_pos.ksh
5651 + mmap_seek_001_pos.ksh \
5652 + mmap_sync_001_pos.ksh
5654 dist_pkgdata_DATA = \
5656 diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
5657 new file mode 100755
5658 index 000000000..6c8246d48
5660 +++ b/tests/zfs-tests/tests/functional/mmap/mmap_mixed.ksh
5664 +# CDDL HEADER START
5666 +# The contents of this file are subject to the terms of the
5667 +# Common Development and Distribution License (the "License").
5668 +# You may not use this file except in compliance with the License.
5670 +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
5671 +# or https://opensource.org/licenses/CDDL-1.0.
5672 +# See the License for the specific language governing permissions
5673 +# and limitations under the License.
5675 +# When distributing Covered Code, include this CDDL HEADER in each
5676 +# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
5677 +# If applicable, add the following below this CDDL HEADER, with the
5678 +# fields enclosed by brackets "[]" replaced with your own identifying
5679 +# information: Portions Copyright [yyyy] [name of copyright owner]
5685 +# Copyright (c) 2023 by Lawrence Livermore National Security, LLC.
5688 +. $STF_SUITE/include/libtest.shlib
5689 +. $STF_SUITE/tests/functional/mmap/mmap.cfg
5693 +# Verify mixed buffered and mmap IO.
5696 +# 1. Create an empty file.
5697 +# 2. Start a background buffered read/write fio to the file.
5698 +# 3. Start a background mmap read/write fio to the file.
5701 +verify_runnable "global"
5705 + log_must rm -f "$tmp_file"
5708 +log_assert "Verify mixed buffered and mmap IO"
5712 +mntpnt=$(get_prop mountpoint $TESTPOOL/$TESTFS)
5713 +tmp_file=$mntpnt/file
5716 +size=$((bs * blocks))
5719 +log_must dd if=/dev/zero of=$tmp_file bs=$bs count=$blocks
5721 +# Buffered IO writes
5722 +log_must eval "fio --filename=$tmp_file --name=buffer-write \
5723 + --rw=randwrite --size=$size --bs=$bs --direct=0 --numjobs=1 \
5724 + --ioengine=sync --fallocate=none --group_reporting --minimal \
5725 + --runtime=$runtime --time_based --norandommap &"
5727 +# Buffered IO reads
5728 +log_must eval "fio --filename=$tmp_file --name=buffer-read \
5729 + --rw=randread --size=$size --bs=$bs --direct=0 --numjobs=1 \
5730 + --ioengine=sync --fallocate=none --group_reporting --minimal \
5731 + --runtime=$runtime --time_based --norandommap &"
5734 +log_must eval "fio --filename=$tmp_file --name=mmap-write \
5735 + --rw=randwrite --size=$size --bs=$bs --numjobs=1 \
5736 + --ioengine=mmap --fallocate=none --group_reporting --minimal \
5737 + --runtime=$runtime --time_based --norandommap &"
5740 +log_must eval "fio --filename=$tmp_file --name=mmap-read \
5741 + --rw=randread --size=$size --bs=$bs --numjobs=1 \
5742 + --ioengine=mmap --fallocate=none --group_reporting --minimal \
5743 + --runtime=$runtime --time_based --norandommap &"
5747 +log_pass "Verfied mixed buffered and mmap IO"
5748 diff --git a/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh b/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh
5749 new file mode 100755
5750 index 000000000..b764d6607
5752 +++ b/tests/zfs-tests/tests/functional/mmap/mmap_sync_001_pos.ksh
5757 +# This file and its contents are supplied under the terms of the
5758 +# Common Development and Distribution License ("CDDL"), version 1.0.
5759 +# You may only use this file in accordance with the terms of version
5762 +# A full copy of the text of the CDDL should have accompanied this
5763 +# source. A copy of the CDDL is also available via the Internet at
5764 +# http://www.illumos.org/license/CDDL.
5768 +# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
5771 +. $STF_SUITE/include/libtest.shlib
5775 +# msync()s of mmap()'ed file should complete quickly during
5776 +# background dirty page writebacks by the kernel.
5781 + log_must eval "echo $saved_vm_dirty_expire_centisecs > /proc/sys/vm/dirty_expire_centisecs"
5782 + log_must eval "echo $saved_vm_dirty_background_ratio > /proc/sys/vm/dirty_background_ratio"
5783 + log_must eval "echo $saved_vm_dirty_writeback_centisecs > /proc/sys/vm/dirty_writeback_centisecs"
5785 + # revert to some sensible defaults if the values we saved
5786 + # were incorrect due to a previous run being interrupted
5787 + if [ $(</proc/sys/vm/dirty_expire_centisecs) -eq 1 ]; then
5788 + log_must eval "echo 3000 > /proc/sys/vm/dirty_expire_centisecs"
5791 + if [ $(</proc/sys/vm/dirty_background_ratio) -eq 0 ]; then
5792 + log_must eval "echo 10 > /proc/sys/vm/dirty_background_ratio"
5795 + if [ $(</proc/sys/vm/dirty_writeback_centisecs) -eq 1 ]; then
5796 + log_must eval "echo 500 > /proc/sys/vm/dirty_writeback_centisecs"
5800 +if ! is_linux; then
5801 + log_unsupported "Only supported on Linux, requires /proc/sys/vm/ tunables"
5805 +log_assert "Run the tests for mmap_sync"
5807 +read -r saved_vm_dirty_expire_centisecs < /proc/sys/vm/dirty_expire_centisecs
5808 +read -r saved_vm_dirty_background_ratio < /proc/sys/vm/dirty_background_ratio
5809 +read -r saved_vm_dirty_writeback_centisecs < /proc/sys/vm/dirty_writeback_centisecs
5811 +log_must eval "echo 1 > /proc/sys/vm/dirty_expire_centisecs"
5812 +log_must eval "echo 1 > /proc/sys/vm/dirty_background_bytes"
5813 +log_must eval "echo 1 > /proc/sys/vm/dirty_writeback_centisecs"
5816 +log_pass "mmap_sync tests passed."
5817 diff --git a/tests/zfs-tests/tests/functional/rsend/Makefile.am b/tests/zfs-tests/tests/functional/rsend/Makefile.am
5818 index d80d2124e..2cedf03d3 100644
5819 --- a/tests/zfs-tests/tests/functional/rsend/Makefile.am
5820 +++ b/tests/zfs-tests/tests/functional/rsend/Makefile.am
5821 @@ -25,6 +25,7 @@ dist_pkgdata_SCRIPTS = \
5824 send_encrypted_files.ksh \
5825 + send_encrypted_freeobjects.ksh \
5826 send_encrypted_hierarchy.ksh \
5827 send_encrypted_props.ksh \
5828 send_encrypted_truncated_files.ksh \
5829 diff --git a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
5830 index 988ed91b9..1bf234823 100755
5831 --- a/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
5832 +++ b/tests/zfs-tests/tests/functional/rsend/send-c_volume.ksh
5838 log_must_busy zfs destroy -r $vol
5841 @@ -60,7 +61,9 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/full"
5843 verify_stream_size $BACKDIR/full $vol
5844 verify_stream_size $BACKDIR/full $vol2
5845 -md5=$(dd if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest)
5846 +block_device_wait $voldev2
5847 +log_must dd if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs
5848 +md5=$(md5digest $BACKDIR/copy)
5849 [[ $md5 = $md5_1 ]] || log_fail "md5 mismatch: $md5 != $md5_1"
5851 # Repeat, for an incremental send
5852 @@ -72,7 +75,9 @@ log_must eval "zfs recv -d $POOL2 <$BACKDIR/inc"
5854 verify_stream_size $BACKDIR/inc $vol 90 $vol@snap
5855 verify_stream_size $BACKDIR/inc $vol2 90 $vol2@snap
5856 -md5=$(dd skip=$megs if=$voldev2 bs=1024k count=$megs 2>/dev/null | md5digest)
5857 +block_device_wait $voldev2
5858 +log_must dd skip=$megs if=$voldev2 of=$BACKDIR/copy bs=1024k count=$megs
5859 +md5=$(md5digest $BACKDIR/copy)
5860 [[ $md5 = $md5_2 ]] || log_fail "md5 mismatch: $md5 != $md5_2"
5862 log_pass "Verify compressed send works with volumes"
5863 diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh
5864 new file mode 100755
5865 index 000000000..92451bd1a
5867 +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_freeobjects.ksh
5872 +# This file and its contents are supplied under the terms of the
5873 +# Common Development and Distribution License ("CDDL"), version 1.0.
5874 +# You may only use this file in accordance with the terms of version
5877 +# A full copy of the text of the CDDL should have accompanied this
5878 +# source. A copy of the CDDL is also available via the Internet at
5879 +# http://www.illumos.org/license/CDDL.
5883 +# Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
5884 +# Copyright (c) 2023 by Findity AB
5887 +. $STF_SUITE/tests/functional/rsend/rsend.kshlib
5891 +# Verify that receiving a raw encrypted stream, with a FREEOBJECTS
5892 +# removing all existing objects in a block followed by an OBJECT write
5893 +# to the same block, does not result in a panic.
5896 +# 1. Create a new encrypted filesystem
5897 +# 2. Create file f1 as the first object in some block (here object 128)
5898 +# 3. Take snapshot A
5899 +# 4. Create file f2 as the second object in the same block (here object 129)
5901 +# 6. Take snapshot B
5902 +# 7. Receive a full raw encrypted send of A
5903 +# 8. Receive an incremental raw send of B
5905 +verify_runnable "both"
5907 +function create_object_with_num
5913 + for ((i=0; i<$tries; i++)); do
5915 + onum=$(ls -li $file | awk '{print $1}')
5917 + if [[ $onum -ne $num ]] ; then
5923 + if [[ $i -eq $tries ]]; then
5924 + log_fail "Failed to create object with number $num"
5928 +log_assert "FREEOBJECTS followed by OBJECT in encrypted stream does not crash"
5932 +keyfile=/$POOL/keyencfods
5933 +f1=/$POOL/$sendds/f1
5934 +f2=/$POOL/$sendds/f2
5936 +log_must eval "echo 'password' > $keyfile"
5939 +# xattr=sa and dnodesize=legacy for sequential object numbers, see
5940 +# note in send_freeobjects.ksh.
5942 +log_must zfs create -o xattr=sa -o dnodesize=legacy -o encryption=on \
5943 + -o keyformat=passphrase -o keylocation=file://$keyfile $POOL/$sendds
5945 +create_object_with_num $f1 128
5946 +log_must zfs snap $POOL/$sendds@A
5947 +create_object_with_num $f2 129
5949 +log_must zfs snap $POOL/$sendds@B
5951 +log_must eval "zfs send -w $POOL/$sendds@A | zfs recv $POOL/$recvds"
5952 +log_must eval "zfs send -w -i $POOL/$sendds@A $POOL/$sendds@B |" \
5953 + "zfs recv $POOL/$recvds"
5955 +log_pass "FREEOBJECTS followed by OBJECT in encrypted stream did not crash"